1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=1 \
3; RUN:   | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWON
4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c  -fixup-byte-word-insts=0 \
5; RUN:   | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWOFF
6; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+f16c -fixup-byte-word-insts=1 \
7; RUN:    | FileCheck %s -check-prefixes=CHECK,BWON,BWON-F16C
8; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -fixup-byte-word-insts=0  \
9; RUN:    | FileCheck %s -check-prefixes=CHECK-I686
10
11define void @test_load_store(ptr %in, ptr %out) #0 {
12; CHECK-LIBCALL-LABEL: test_load_store:
13; CHECK-LIBCALL:       # %bb.0:
14; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rdi), %xmm0
15; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
16; CHECK-LIBCALL-NEXT:    movw %ax, (%rsi)
17; CHECK-LIBCALL-NEXT:    retq
18;
19; BWON-F16C-LABEL: test_load_store:
20; BWON-F16C:       # %bb.0:
21; BWON-F16C-NEXT:    vpinsrw $0, (%rdi), %xmm0, %xmm0
22; BWON-F16C-NEXT:    vpextrw $0, %xmm0, (%rsi)
23; BWON-F16C-NEXT:    retq
24;
25; CHECK-I686-LABEL: test_load_store:
26; CHECK-I686:       # %bb.0:
27; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
28; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
29; CHECK-I686-NEXT:    pinsrw $0, (%ecx), %xmm0
30; CHECK-I686-NEXT:    pextrw $0, %xmm0, %ecx
31; CHECK-I686-NEXT:    movw %cx, (%eax)
32; CHECK-I686-NEXT:    retl
33  %val = load half, ptr %in
34  store half %val, ptr %out
35  ret void
36}
37
38define i16 @test_bitcast_from_half(ptr %addr) #0 {
39; BWON-LABEL: test_bitcast_from_half:
40; BWON:       # %bb.0:
41; BWON-NEXT:    movzwl (%rdi), %eax
42; BWON-NEXT:    retq
43;
44; BWOFF-LABEL: test_bitcast_from_half:
45; BWOFF:       # %bb.0:
46; BWOFF-NEXT:    movw (%rdi), %ax
47; BWOFF-NEXT:    retq
48;
49; CHECK-I686-LABEL: test_bitcast_from_half:
50; CHECK-I686:       # %bb.0:
51; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
52; CHECK-I686-NEXT:    movw (%eax), %ax
53; CHECK-I686-NEXT:    retl
54  %val = load half, ptr %addr
55  %val_int = bitcast half %val to i16
56  ret i16 %val_int
57}
58
59define void @test_bitcast_to_half(ptr %addr, i16 %in) #0 {
60; CHECK-LABEL: test_bitcast_to_half:
61; CHECK:       # %bb.0:
62; CHECK-NEXT:    movw %si, (%rdi)
63; CHECK-NEXT:    retq
64;
65; CHECK-I686-LABEL: test_bitcast_to_half:
66; CHECK-I686:       # %bb.0:
67; CHECK-I686-NEXT:    movw {{[0-9]+}}(%esp), %ax
68; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %ecx
69; CHECK-I686-NEXT:    movw %ax, (%ecx)
70; CHECK-I686-NEXT:    retl
71  %val_fp = bitcast i16 %in to half
72  store half %val_fp, ptr %addr
73  ret void
74}
75
76define float @test_extend32(ptr %addr) #0 {
77; CHECK-LIBCALL-LABEL: test_extend32:
78; CHECK-LIBCALL:       # %bb.0:
79; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rdi), %xmm0
80; CHECK-LIBCALL-NEXT:    jmp __extendhfsf2@PLT # TAILCALL
81;
82; BWON-F16C-LABEL: test_extend32:
83; BWON-F16C:       # %bb.0:
84; BWON-F16C-NEXT:    movzwl (%rdi), %eax
85; BWON-F16C-NEXT:    vmovd %eax, %xmm0
86; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
87; BWON-F16C-NEXT:    retq
88;
89; CHECK-I686-LABEL: test_extend32:
90; CHECK-I686:       # %bb.0:
91; CHECK-I686-NEXT:    subl $12, %esp
92; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
93; CHECK-I686-NEXT:    pinsrw $0, (%eax), %xmm0
94; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
95; CHECK-I686-NEXT:    movw %ax, (%esp)
96; CHECK-I686-NEXT:    calll __extendhfsf2
97; CHECK-I686-NEXT:    addl $12, %esp
98; CHECK-I686-NEXT:    retl
99  %val16 = load half, ptr %addr
100  %val32 = fpext half %val16 to float
101  ret float %val32
102}
103
104define double @test_extend64(ptr %addr) #0 {
105; CHECK-LIBCALL-LABEL: test_extend64:
106; CHECK-LIBCALL:       # %bb.0:
107; CHECK-LIBCALL-NEXT:    pushq %rax
108; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rdi), %xmm0
109; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
110; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
111; CHECK-LIBCALL-NEXT:    popq %rax
112; CHECK-LIBCALL-NEXT:    retq
113;
114; BWON-F16C-LABEL: test_extend64:
115; BWON-F16C:       # %bb.0:
116; BWON-F16C-NEXT:    movzwl (%rdi), %eax
117; BWON-F16C-NEXT:    vmovd %eax, %xmm0
118; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
119; BWON-F16C-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
120; BWON-F16C-NEXT:    retq
121;
122; CHECK-I686-LABEL: test_extend64:
123; CHECK-I686:       # %bb.0:
124; CHECK-I686-NEXT:    subl $12, %esp
125; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
126; CHECK-I686-NEXT:    pinsrw $0, (%eax), %xmm0
127; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
128; CHECK-I686-NEXT:    movw %ax, (%esp)
129; CHECK-I686-NEXT:    calll __extendhfsf2
130; CHECK-I686-NEXT:    addl $12, %esp
131; CHECK-I686-NEXT:    retl
132  %val16 = load half, ptr %addr
133  %val32 = fpext half %val16 to double
134  ret double %val32
135}
136
137define void @test_trunc32(float %in, ptr %addr) #0 {
138; CHECK-LIBCALL-LABEL: test_trunc32:
139; CHECK-LIBCALL:       # %bb.0:
140; CHECK-LIBCALL-NEXT:    pushq %rbx
141; CHECK-LIBCALL-NEXT:    movq %rdi, %rbx
142; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
143; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
144; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
145; CHECK-LIBCALL-NEXT:    popq %rbx
146; CHECK-LIBCALL-NEXT:    retq
147;
148; BWON-F16C-LABEL: test_trunc32:
149; BWON-F16C:       # %bb.0:
150; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
151; BWON-F16C-NEXT:    vmovd %xmm0, %eax
152; BWON-F16C-NEXT:    movw %ax, (%rdi)
153; BWON-F16C-NEXT:    retq
154;
155; CHECK-I686-LABEL: test_trunc32:
156; CHECK-I686:       # %bb.0:
157; CHECK-I686-NEXT:    pushl %esi
158; CHECK-I686-NEXT:    subl $8, %esp
159; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
160; CHECK-I686-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
161; CHECK-I686-NEXT:    movd %xmm0, (%esp)
162; CHECK-I686-NEXT:    calll __truncsfhf2
163; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
164; CHECK-I686-NEXT:    movw %ax, (%esi)
165; CHECK-I686-NEXT:    addl $8, %esp
166; CHECK-I686-NEXT:    popl %esi
167; CHECK-I686-NEXT:    retl
168  %val16 = fptrunc float %in to half
169  store half %val16, ptr %addr
170  ret void
171}
172
173define void @test_trunc64(double %in, ptr %addr) #0 {
174; CHECK-LIBCALL-LABEL: test_trunc64:
175; CHECK-LIBCALL:       # %bb.0:
176; CHECK-LIBCALL-NEXT:    pushq %rbx
177; CHECK-LIBCALL-NEXT:    movq %rdi, %rbx
178; CHECK-LIBCALL-NEXT:    callq __truncdfhf2@PLT
179; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
180; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
181; CHECK-LIBCALL-NEXT:    popq %rbx
182; CHECK-LIBCALL-NEXT:    retq
183;
184; BWON-F16C-LABEL: test_trunc64:
185; BWON-F16C:       # %bb.0:
186; BWON-F16C-NEXT:    pushq %rbx
187; BWON-F16C-NEXT:    movq %rdi, %rbx
188; BWON-F16C-NEXT:    callq __truncdfhf2@PLT
189; BWON-F16C-NEXT:    vpextrw $0, %xmm0, (%rbx)
190; BWON-F16C-NEXT:    popq %rbx
191; BWON-F16C-NEXT:    retq
192;
193; CHECK-I686-LABEL: test_trunc64:
194; CHECK-I686:       # %bb.0:
195; CHECK-I686-NEXT:    pushl %esi
196; CHECK-I686-NEXT:    subl $8, %esp
197; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
198; CHECK-I686-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
199; CHECK-I686-NEXT:    movq %xmm0, (%esp)
200; CHECK-I686-NEXT:    calll __truncdfhf2
201; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
202; CHECK-I686-NEXT:    movw %ax, (%esi)
203; CHECK-I686-NEXT:    addl $8, %esp
204; CHECK-I686-NEXT:    popl %esi
205; CHECK-I686-NEXT:    retl
206  %val16 = fptrunc double %in to half
207  store half %val16, ptr %addr
208  ret void
209}
210
211define i64 @test_fptosi_i64(ptr %p) #0 {
212; CHECK-LIBCALL-LABEL: test_fptosi_i64:
213; CHECK-LIBCALL:       # %bb.0:
214; CHECK-LIBCALL-NEXT:    pushq %rax
215; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rdi), %xmm0
216; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
217; CHECK-LIBCALL-NEXT:    cvttss2si %xmm0, %rax
218; CHECK-LIBCALL-NEXT:    popq %rcx
219; CHECK-LIBCALL-NEXT:    retq
220;
221; BWON-F16C-LABEL: test_fptosi_i64:
222; BWON-F16C:       # %bb.0:
223; BWON-F16C-NEXT:    movzwl (%rdi), %eax
224; BWON-F16C-NEXT:    vmovd %eax, %xmm0
225; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
226; BWON-F16C-NEXT:    vcvttss2si %xmm0, %rax
227; BWON-F16C-NEXT:    retq
228;
229; CHECK-I686-LABEL: test_fptosi_i64:
230; CHECK-I686:       # %bb.0:
231; CHECK-I686-NEXT:    subl $28, %esp
232; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
233; CHECK-I686-NEXT:    pinsrw $0, (%eax), %xmm0
234; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
235; CHECK-I686-NEXT:    movw %ax, (%esp)
236; CHECK-I686-NEXT:    calll __extendhfsf2
237; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
238; CHECK-I686-NEXT:    flds {{[0-9]+}}(%esp)
239; CHECK-I686-NEXT:    fnstcw {{[0-9]+}}(%esp)
240; CHECK-I686-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
241; CHECK-I686-NEXT:    orl $3072, %eax # imm = 0xC00
242; CHECK-I686-NEXT:    movw %ax, {{[0-9]+}}(%esp)
243; CHECK-I686-NEXT:    fldcw {{[0-9]+}}(%esp)
244; CHECK-I686-NEXT:    fistpll {{[0-9]+}}(%esp)
245; CHECK-I686-NEXT:    fldcw {{[0-9]+}}(%esp)
246; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
247; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %edx
248; CHECK-I686-NEXT:    addl $28, %esp
249; CHECK-I686-NEXT:    retl
250  %a = load half, ptr %p, align 2
251  %r = fptosi half %a to i64
252  ret i64 %r
253}
254
255define void @test_sitofp_i64(i64 %a, ptr %p) #0 {
256; CHECK-LIBCALL-LABEL: test_sitofp_i64:
257; CHECK-LIBCALL:       # %bb.0:
258; CHECK-LIBCALL-NEXT:    pushq %rbx
259; CHECK-LIBCALL-NEXT:    movq %rsi, %rbx
260; CHECK-LIBCALL-NEXT:    cvtsi2ss %rdi, %xmm0
261; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
262; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
263; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
264; CHECK-LIBCALL-NEXT:    popq %rbx
265; CHECK-LIBCALL-NEXT:    retq
266;
267; BWON-F16C-LABEL: test_sitofp_i64:
268; BWON-F16C:       # %bb.0:
269; BWON-F16C-NEXT:    vcvtsi2ss %rdi, %xmm0, %xmm0
270; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
271; BWON-F16C-NEXT:    vmovd %xmm0, %eax
272; BWON-F16C-NEXT:    movw %ax, (%rsi)
273; BWON-F16C-NEXT:    retq
274;
275; CHECK-I686-LABEL: test_sitofp_i64:
276; CHECK-I686:       # %bb.0:
277; CHECK-I686-NEXT:    pushl %esi
278; CHECK-I686-NEXT:    subl $24, %esp
279; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
280; CHECK-I686-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
281; CHECK-I686-NEXT:    movlps %xmm0, {{[0-9]+}}(%esp)
282; CHECK-I686-NEXT:    fildll {{[0-9]+}}(%esp)
283; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
284; CHECK-I686-NEXT:    movd {{.*#+}} xmm0 = mem[0],zero,zero,zero
285; CHECK-I686-NEXT:    movd %xmm0, (%esp)
286; CHECK-I686-NEXT:    calll __truncsfhf2
287; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
288; CHECK-I686-NEXT:    movw %ax, (%esi)
289; CHECK-I686-NEXT:    addl $24, %esp
290; CHECK-I686-NEXT:    popl %esi
291; CHECK-I686-NEXT:    retl
292  %r = sitofp i64 %a to half
293  store half %r, ptr %p
294  ret void
295}
296
297define i64 @test_fptoui_i64(ptr %p) #0 {
298; CHECK-LIBCALL-LABEL: test_fptoui_i64:
299; CHECK-LIBCALL:       # %bb.0:
300; CHECK-LIBCALL-NEXT:    pushq %rax
301; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rdi), %xmm0
302; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
303; CHECK-LIBCALL-NEXT:    cvttss2si %xmm0, %rcx
304; CHECK-LIBCALL-NEXT:    movq %rcx, %rdx
305; CHECK-LIBCALL-NEXT:    sarq $63, %rdx
306; CHECK-LIBCALL-NEXT:    subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0
307; CHECK-LIBCALL-NEXT:    cvttss2si %xmm0, %rax
308; CHECK-LIBCALL-NEXT:    andq %rdx, %rax
309; CHECK-LIBCALL-NEXT:    orq %rcx, %rax
310; CHECK-LIBCALL-NEXT:    popq %rcx
311; CHECK-LIBCALL-NEXT:    retq
312;
313; BWON-F16C-LABEL: test_fptoui_i64:
314; BWON-F16C:       # %bb.0:
315; BWON-F16C-NEXT:    movzwl (%rdi), %eax
316; BWON-F16C-NEXT:    vmovd %eax, %xmm0
317; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
318; BWON-F16C-NEXT:    vcvttss2si %xmm0, %rcx
319; BWON-F16C-NEXT:    movq %rcx, %rdx
320; BWON-F16C-NEXT:    sarq $63, %rdx
321; BWON-F16C-NEXT:    vsubss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
322; BWON-F16C-NEXT:    vcvttss2si %xmm0, %rax
323; BWON-F16C-NEXT:    andq %rdx, %rax
324; BWON-F16C-NEXT:    orq %rcx, %rax
325; BWON-F16C-NEXT:    retq
326;
327; CHECK-I686-LABEL: test_fptoui_i64:
328; CHECK-I686:       # %bb.0:
329; CHECK-I686-NEXT:    subl $28, %esp
330; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
331; CHECK-I686-NEXT:    pinsrw $0, (%eax), %xmm0
332; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
333; CHECK-I686-NEXT:    movw %ax, (%esp)
334; CHECK-I686-NEXT:    calll __extendhfsf2
335; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
336; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
337; CHECK-I686-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
338; CHECK-I686-NEXT:    ucomiss %xmm1, %xmm0
339; CHECK-I686-NEXT:    jae .LBB9_2
340; CHECK-I686-NEXT:  # %bb.1:
341; CHECK-I686-NEXT:    xorps %xmm1, %xmm1
342; CHECK-I686-NEXT:  .LBB9_2:
343; CHECK-I686-NEXT:    subss %xmm1, %xmm0
344; CHECK-I686-NEXT:    movss %xmm0, {{[0-9]+}}(%esp)
345; CHECK-I686-NEXT:    setae %al
346; CHECK-I686-NEXT:    flds {{[0-9]+}}(%esp)
347; CHECK-I686-NEXT:    fnstcw {{[0-9]+}}(%esp)
348; CHECK-I686-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
349; CHECK-I686-NEXT:    orl $3072, %ecx # imm = 0xC00
350; CHECK-I686-NEXT:    movw %cx, {{[0-9]+}}(%esp)
351; CHECK-I686-NEXT:    fldcw {{[0-9]+}}(%esp)
352; CHECK-I686-NEXT:    fistpll {{[0-9]+}}(%esp)
353; CHECK-I686-NEXT:    fldcw {{[0-9]+}}(%esp)
354; CHECK-I686-NEXT:    movzbl %al, %edx
355; CHECK-I686-NEXT:    shll $31, %edx
356; CHECK-I686-NEXT:    xorl {{[0-9]+}}(%esp), %edx
357; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
358; CHECK-I686-NEXT:    addl $28, %esp
359; CHECK-I686-NEXT:    retl
360  %a = load half, ptr %p, align 2
361  %r = fptoui half %a to i64
362  ret i64 %r
363}
364
365define void @test_uitofp_i64(i64 %a, ptr %p) #0 {
366; CHECK-LIBCALL-LABEL: test_uitofp_i64:
367; CHECK-LIBCALL:       # %bb.0:
368; CHECK-LIBCALL-NEXT:    pushq %rbx
369; CHECK-LIBCALL-NEXT:    movq %rsi, %rbx
370; CHECK-LIBCALL-NEXT:    testq %rdi, %rdi
371; CHECK-LIBCALL-NEXT:    js .LBB10_1
372; CHECK-LIBCALL-NEXT:  # %bb.2:
373; CHECK-LIBCALL-NEXT:    cvtsi2ss %rdi, %xmm0
374; CHECK-LIBCALL-NEXT:    jmp .LBB10_3
375; CHECK-LIBCALL-NEXT:  .LBB10_1:
376; CHECK-LIBCALL-NEXT:    movq %rdi, %rax
377; CHECK-LIBCALL-NEXT:    shrq %rax
378; CHECK-LIBCALL-NEXT:    andl $1, %edi
379; CHECK-LIBCALL-NEXT:    orq %rax, %rdi
380; CHECK-LIBCALL-NEXT:    cvtsi2ss %rdi, %xmm0
381; CHECK-LIBCALL-NEXT:    addss %xmm0, %xmm0
382; CHECK-LIBCALL-NEXT:  .LBB10_3:
383; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
384; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
385; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
386; CHECK-LIBCALL-NEXT:    popq %rbx
387; CHECK-LIBCALL-NEXT:    retq
388;
389; BWON-F16C-LABEL: test_uitofp_i64:
390; BWON-F16C:       # %bb.0:
391; BWON-F16C-NEXT:    testq %rdi, %rdi
392; BWON-F16C-NEXT:    js .LBB10_1
393; BWON-F16C-NEXT:  # %bb.2:
394; BWON-F16C-NEXT:    vcvtsi2ss %rdi, %xmm0, %xmm0
395; BWON-F16C-NEXT:    jmp .LBB10_3
396; BWON-F16C-NEXT:  .LBB10_1:
397; BWON-F16C-NEXT:    movq %rdi, %rax
398; BWON-F16C-NEXT:    shrq %rax
399; BWON-F16C-NEXT:    andl $1, %edi
400; BWON-F16C-NEXT:    orq %rax, %rdi
401; BWON-F16C-NEXT:    vcvtsi2ss %rdi, %xmm0, %xmm0
402; BWON-F16C-NEXT:    vaddss %xmm0, %xmm0, %xmm0
403; BWON-F16C-NEXT:  .LBB10_3:
404; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
405; BWON-F16C-NEXT:    vmovd %xmm0, %eax
406; BWON-F16C-NEXT:    movw %ax, (%rsi)
407; BWON-F16C-NEXT:    retq
408;
409; CHECK-I686-LABEL: test_uitofp_i64:
410; CHECK-I686:       # %bb.0:
411; CHECK-I686-NEXT:    pushl %esi
412; CHECK-I686-NEXT:    subl $24, %esp
413; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
414; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
415; CHECK-I686-NEXT:    movq {{.*#+}} xmm0 = mem[0],zero
416; CHECK-I686-NEXT:    movq %xmm0, {{[0-9]+}}(%esp)
417; CHECK-I686-NEXT:    shrl $31, %eax
418; CHECK-I686-NEXT:    fildll {{[0-9]+}}(%esp)
419; CHECK-I686-NEXT:    fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4)
420; CHECK-I686-NEXT:    fstps (%esp)
421; CHECK-I686-NEXT:    calll __truncsfhf2
422; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
423; CHECK-I686-NEXT:    movw %ax, (%esi)
424; CHECK-I686-NEXT:    addl $24, %esp
425; CHECK-I686-NEXT:    popl %esi
426; CHECK-I686-NEXT:    retl
427  %r = uitofp i64 %a to half
428  store half %r, ptr %p
429  ret void
430}
431
432define <4 x float> @test_extend32_vec4(ptr %p) #0 {
433; CHECK-LIBCALL-LABEL: test_extend32_vec4:
434; CHECK-LIBCALL:       # %bb.0:
435; CHECK-LIBCALL-NEXT:    subq $72, %rsp
436; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rdi), %xmm0
437; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
438; CHECK-LIBCALL-NEXT:    pinsrw $0, 2(%rdi), %xmm0
439; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
440; CHECK-LIBCALL-NEXT:    pinsrw $0, 4(%rdi), %xmm0
441; CHECK-LIBCALL-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
442; CHECK-LIBCALL-NEXT:    pinsrw $0, 6(%rdi), %xmm0
443; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
444; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
445; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
446; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
447; CHECK-LIBCALL-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
448; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
449; CHECK-LIBCALL-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
450; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
451; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
452; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
453; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
454; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
455; CHECK-LIBCALL-NEXT:    unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
456; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
457; CHECK-LIBCALL-NEXT:    unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload
458; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0]
459; CHECK-LIBCALL-NEXT:    addq $72, %rsp
460; CHECK-LIBCALL-NEXT:    retq
461;
462; BWON-F16C-LABEL: test_extend32_vec4:
463; BWON-F16C:       # %bb.0:
464; BWON-F16C-NEXT:    vcvtph2ps (%rdi), %xmm0
465; BWON-F16C-NEXT:    retq
466;
467; CHECK-I686-LABEL: test_extend32_vec4:
468; CHECK-I686:       # %bb.0:
469; CHECK-I686-NEXT:    pushl %esi
470; CHECK-I686-NEXT:    subl $88, %esp
471; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
472; CHECK-I686-NEXT:    pinsrw $0, (%eax), %xmm0
473; CHECK-I686-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
474; CHECK-I686-NEXT:    pinsrw $0, 6(%eax), %xmm0
475; CHECK-I686-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
476; CHECK-I686-NEXT:    pinsrw $0, 4(%eax), %xmm0
477; CHECK-I686-NEXT:    pinsrw $0, 2(%eax), %xmm1
478; CHECK-I686-NEXT:    pextrw $0, %xmm1, %eax
479; CHECK-I686-NEXT:    movw %ax, (%esp)
480; CHECK-I686-NEXT:    pextrw $0, %xmm0, %esi
481; CHECK-I686-NEXT:    calll __extendhfsf2
482; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
483; CHECK-I686-NEXT:    movw %si, (%esp)
484; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
485; CHECK-I686-NEXT:    pextrw $0, %xmm0, %esi
486; CHECK-I686-NEXT:    calll __extendhfsf2
487; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
488; CHECK-I686-NEXT:    movw %si, (%esp)
489; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
490; CHECK-I686-NEXT:    pextrw $0, %xmm0, %esi
491; CHECK-I686-NEXT:    calll __extendhfsf2
492; CHECK-I686-NEXT:    movw %si, (%esp)
493; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
494; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
495; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
496; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
497; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
498; CHECK-I686-NEXT:    calll __extendhfsf2
499; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
500; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
501; CHECK-I686-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
502; CHECK-I686-NEXT:    unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
503; CHECK-I686-NEXT:    movss {{.*#+}} xmm2 = mem[0],zero,zero,zero
504; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
505; CHECK-I686-NEXT:    unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1]
506; CHECK-I686-NEXT:    movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
507; CHECK-I686-NEXT:    addl $88, %esp
508; CHECK-I686-NEXT:    popl %esi
509; CHECK-I686-NEXT:    retl
510  %a = load <4 x half>, ptr %p, align 8
511  %b = fpext <4 x half> %a to <4 x float>
512  ret <4 x float> %b
513}
514
515define <4 x double> @test_extend64_vec4(ptr %p) #0 {
516; CHECK-LIBCALL-LABEL: test_extend64_vec4:
517; CHECK-LIBCALL:       # %bb.0:
518; CHECK-LIBCALL-NEXT:    subq $72, %rsp
519; CHECK-LIBCALL-NEXT:    pinsrw $0, 4(%rdi), %xmm0
520; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
521; CHECK-LIBCALL-NEXT:    pinsrw $0, 6(%rdi), %xmm0
522; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
523; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rdi), %xmm0
524; CHECK-LIBCALL-NEXT:    movdqa %xmm0, (%rsp) # 16-byte Spill
525; CHECK-LIBCALL-NEXT:    pinsrw $0, 2(%rdi), %xmm0
526; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
527; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
528; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
529; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
530; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
531; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
532; CHECK-LIBCALL-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
533; CHECK-LIBCALL-NEXT:    # xmm0 = xmm0[0],mem[0]
534; CHECK-LIBCALL-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
535; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
536; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
537; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm0
538; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
539; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
540; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
541; CHECK-LIBCALL-NEXT:    cvtss2sd %xmm0, %xmm1
542; CHECK-LIBCALL-NEXT:    unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload
543; CHECK-LIBCALL-NEXT:    # xmm1 = xmm1[0],mem[0]
544; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
545; CHECK-LIBCALL-NEXT:    addq $72, %rsp
546; CHECK-LIBCALL-NEXT:    retq
547;
548; BWON-F16C-LABEL: test_extend64_vec4:
549; BWON-F16C:       # %bb.0:
550; BWON-F16C-NEXT:    vcvtph2ps (%rdi), %xmm0
551; BWON-F16C-NEXT:    vcvtps2pd %xmm0, %ymm0
552; BWON-F16C-NEXT:    retq
553;
554; CHECK-I686-LABEL: test_extend64_vec4:
555; CHECK-I686:       # %bb.0:
556; CHECK-I686-NEXT:    pushl %esi
557; CHECK-I686-NEXT:    subl $104, %esp
558; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
559; CHECK-I686-NEXT:    pinsrw $0, 6(%eax), %xmm0
560; CHECK-I686-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
561; CHECK-I686-NEXT:    pinsrw $0, (%eax), %xmm0
562; CHECK-I686-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
563; CHECK-I686-NEXT:    pinsrw $0, 2(%eax), %xmm0
564; CHECK-I686-NEXT:    pinsrw $0, 4(%eax), %xmm1
565; CHECK-I686-NEXT:    pextrw $0, %xmm1, %eax
566; CHECK-I686-NEXT:    movw %ax, (%esp)
567; CHECK-I686-NEXT:    pextrw $0, %xmm0, %esi
568; CHECK-I686-NEXT:    calll __extendhfsf2
569; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
570; CHECK-I686-NEXT:    movw %si, (%esp)
571; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
572; CHECK-I686-NEXT:    pextrw $0, %xmm0, %esi
573; CHECK-I686-NEXT:    calll __extendhfsf2
574; CHECK-I686-NEXT:    fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill
575; CHECK-I686-NEXT:    movw %si, (%esp)
576; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
577; CHECK-I686-NEXT:    pextrw $0, %xmm0, %esi
578; CHECK-I686-NEXT:    calll __extendhfsf2
579; CHECK-I686-NEXT:    movw %si, (%esp)
580; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
581; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
582; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
583; CHECK-I686-NEXT:    fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload
584; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
585; CHECK-I686-NEXT:    calll __extendhfsf2
586; CHECK-I686-NEXT:    fstpl {{[0-9]+}}(%esp)
587; CHECK-I686-NEXT:    movsd {{.*#+}} xmm0 = mem[0],zero
588; CHECK-I686-NEXT:    movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
589; CHECK-I686-NEXT:    movsd {{.*#+}} xmm1 = mem[0],zero
590; CHECK-I686-NEXT:    movhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1]
591; CHECK-I686-NEXT:    addl $104, %esp
592; CHECK-I686-NEXT:    popl %esi
593; CHECK-I686-NEXT:    retl
594  %a = load <4 x half>, ptr %p, align 8
595  %b = fpext <4 x half> %a to <4 x double>
596  ret <4 x double> %b
597}
598
599define void @test_trunc32_vec4(<4 x float> %a, ptr %p) #0 {
600; CHECK-LIBCALL-LABEL: test_trunc32_vec4:
601; CHECK-LIBCALL:       # %bb.0:
602; CHECK-LIBCALL-NEXT:    pushq %rbx
603; CHECK-LIBCALL-NEXT:    subq $64, %rsp
604; CHECK-LIBCALL-NEXT:    movq %rdi, %rbx
605; CHECK-LIBCALL-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
606; CHECK-LIBCALL-NEXT:    shufps {{.*#+}} xmm0 = xmm0[1,1,1,1]
607; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
608; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
609; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
610; CHECK-LIBCALL-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
611; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
612; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
613; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
614; CHECK-LIBCALL-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
615; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
616; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
617; CHECK-LIBCALL-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
618; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
619; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
620; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
621; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
622; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
623; CHECK-LIBCALL-NEXT:    movw %ax, 6(%rbx)
624; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
625; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
626; CHECK-LIBCALL-NEXT:    movw %ax, 4(%rbx)
627; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
628; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
629; CHECK-LIBCALL-NEXT:    movw %ax, 2(%rbx)
630; CHECK-LIBCALL-NEXT:    addq $64, %rsp
631; CHECK-LIBCALL-NEXT:    popq %rbx
632; CHECK-LIBCALL-NEXT:    retq
633;
634; BWON-F16C-LABEL: test_trunc32_vec4:
635; BWON-F16C:       # %bb.0:
636; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, (%rdi)
637; BWON-F16C-NEXT:    retq
638;
639; CHECK-I686-LABEL: test_trunc32_vec4:
640; CHECK-I686:       # %bb.0:
641; CHECK-I686-NEXT:    pushl %esi
642; CHECK-I686-NEXT:    subl $88, %esp
643; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
644; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
645; CHECK-I686-NEXT:    movaps %xmm0, %xmm1
646; CHECK-I686-NEXT:    shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1]
647; CHECK-I686-NEXT:    movss %xmm1, (%esp)
648; CHECK-I686-NEXT:    calll __truncsfhf2
649; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
650; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
651; CHECK-I686-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
652; CHECK-I686-NEXT:    movss %xmm0, (%esp)
653; CHECK-I686-NEXT:    calll __truncsfhf2
654; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
655; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
656; CHECK-I686-NEXT:    shufps {{.*#+}} xmm0 = xmm0[3,3,3,3]
657; CHECK-I686-NEXT:    movss %xmm0, (%esp)
658; CHECK-I686-NEXT:    calll __truncsfhf2
659; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
660; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
661; CHECK-I686-NEXT:    movd %xmm0, (%esp)
662; CHECK-I686-NEXT:    calll __truncsfhf2
663; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
664; CHECK-I686-NEXT:    movw %ax, (%esi)
665; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
666; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
667; CHECK-I686-NEXT:    movw %ax, 6(%esi)
668; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
669; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
670; CHECK-I686-NEXT:    movw %ax, 4(%esi)
671; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
672; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
673; CHECK-I686-NEXT:    movw %ax, 2(%esi)
674; CHECK-I686-NEXT:    addl $88, %esp
675; CHECK-I686-NEXT:    popl %esi
676; CHECK-I686-NEXT:    retl
677  %v = fptrunc <4 x float> %a to <4 x half>
678  store <4 x half> %v, ptr %p
679  ret void
680}
681
682define void @test_trunc64_vec4(<4 x double> %a, ptr %p) #0 {
683; CHECK-LIBCALL-LABEL: test_trunc64_vec4:
684; CHECK-LIBCALL:       # %bb.0:
685; CHECK-LIBCALL-NEXT:    pushq %rbx
686; CHECK-LIBCALL-NEXT:    subq $64, %rsp
687; CHECK-LIBCALL-NEXT:    movq %rdi, %rbx
688; CHECK-LIBCALL-NEXT:    movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
689; CHECK-LIBCALL-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
690; CHECK-LIBCALL-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
691; CHECK-LIBCALL-NEXT:    callq __truncdfhf2@PLT
692; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
693; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
694; CHECK-LIBCALL-NEXT:    movhlps {{.*#+}} xmm0 = xmm0[1,1]
695; CHECK-LIBCALL-NEXT:    callq __truncdfhf2@PLT
696; CHECK-LIBCALL-NEXT:    movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
697; CHECK-LIBCALL-NEXT:    movaps (%rsp), %xmm0 # 16-byte Reload
698; CHECK-LIBCALL-NEXT:    callq __truncdfhf2@PLT
699; CHECK-LIBCALL-NEXT:    movaps %xmm0, (%rsp) # 16-byte Spill
700; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
701; CHECK-LIBCALL-NEXT:    callq __truncdfhf2@PLT
702; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
703; CHECK-LIBCALL-NEXT:    movw %ax, 4(%rbx)
704; CHECK-LIBCALL-NEXT:    movdqa (%rsp), %xmm0 # 16-byte Reload
705; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
706; CHECK-LIBCALL-NEXT:    movw %ax, (%rbx)
707; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
708; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
709; CHECK-LIBCALL-NEXT:    movw %ax, 6(%rbx)
710; CHECK-LIBCALL-NEXT:    movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
711; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
712; CHECK-LIBCALL-NEXT:    movw %ax, 2(%rbx)
713; CHECK-LIBCALL-NEXT:    addq $64, %rsp
714; CHECK-LIBCALL-NEXT:    popq %rbx
715; CHECK-LIBCALL-NEXT:    retq
716;
717; BWON-F16C-LABEL: test_trunc64_vec4:
718; BWON-F16C:       # %bb.0:
719; BWON-F16C-NEXT:    pushq %rbx
720; BWON-F16C-NEXT:    subq $64, %rsp
721; BWON-F16C-NEXT:    movq %rdi, %rbx
722; BWON-F16C-NEXT:    vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill
723; BWON-F16C-NEXT:    vextractf128 $1, %ymm0, %xmm0
724; BWON-F16C-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
725; BWON-F16C-NEXT:    vzeroupper
726; BWON-F16C-NEXT:    callq __truncdfhf2@PLT
727; BWON-F16C-NEXT:    vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
728; BWON-F16C-NEXT:    vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload
729; BWON-F16C-NEXT:    # xmm0 = mem[1,0]
730; BWON-F16C-NEXT:    callq __truncdfhf2@PLT
731; BWON-F16C-NEXT:    vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload
732; BWON-F16C-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
733; BWON-F16C-NEXT:    vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
734; BWON-F16C-NEXT:    vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload
735; BWON-F16C-NEXT:    # kill: def $xmm0 killed $xmm0 killed $ymm0
736; BWON-F16C-NEXT:    vzeroupper
737; BWON-F16C-NEXT:    callq __truncdfhf2@PLT
738; BWON-F16C-NEXT:    vmovaps %xmm0, (%rsp) # 16-byte Spill
739; BWON-F16C-NEXT:    vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload
740; BWON-F16C-NEXT:    # xmm0 = mem[1,0]
741; BWON-F16C-NEXT:    callq __truncdfhf2@PLT
742; BWON-F16C-NEXT:    vmovdqa (%rsp), %xmm1 # 16-byte Reload
743; BWON-F16C-NEXT:    vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3]
744; BWON-F16C-NEXT:    vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload
745; BWON-F16C-NEXT:    # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
746; BWON-F16C-NEXT:    vmovq %xmm0, (%rbx)
747; BWON-F16C-NEXT:    addq $64, %rsp
748; BWON-F16C-NEXT:    popq %rbx
749; BWON-F16C-NEXT:    retq
750;
751; CHECK-I686-LABEL: test_trunc64_vec4:
752; CHECK-I686:       # %bb.0:
753; CHECK-I686-NEXT:    pushl %esi
754; CHECK-I686-NEXT:    subl $88, %esp
755; CHECK-I686-NEXT:    movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
756; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
757; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %esi
758; CHECK-I686-NEXT:    movlps %xmm0, (%esp)
759; CHECK-I686-NEXT:    calll __truncdfhf2
760; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
761; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
762; CHECK-I686-NEXT:    movhps %xmm0, (%esp)
763; CHECK-I686-NEXT:    calll __truncdfhf2
764; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
765; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
766; CHECK-I686-NEXT:    movlps %xmm0, (%esp)
767; CHECK-I686-NEXT:    calll __truncdfhf2
768; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
769; CHECK-I686-NEXT:    movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
770; CHECK-I686-NEXT:    movhps %xmm0, (%esp)
771; CHECK-I686-NEXT:    calll __truncdfhf2
772; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
773; CHECK-I686-NEXT:    movw %ax, 6(%esi)
774; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
775; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
776; CHECK-I686-NEXT:    movw %ax, 4(%esi)
777; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
778; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
779; CHECK-I686-NEXT:    movw %ax, 2(%esi)
780; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
781; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
782; CHECK-I686-NEXT:    movw %ax, (%esi)
783; CHECK-I686-NEXT:    addl $88, %esp
784; CHECK-I686-NEXT:    popl %esi
785; CHECK-I686-NEXT:    retl
786  %v = fptrunc <4 x double> %a to <4 x half>
787  store <4 x half> %v, ptr %p
788  ret void
789}
790
791declare float @test_floatret();
792
793; On i686, if SSE2 is available, the return value from test_floatret is loaded
794; to f80 and then rounded to f32.  The DAG combiner should not combine this
795; fp_round and the subsequent fptrunc from float to half.
796define half @test_f80trunc_nodagcombine() #0 {
797; CHECK-LIBCALL-LABEL: test_f80trunc_nodagcombine:
798; CHECK-LIBCALL:       # %bb.0:
799; CHECK-LIBCALL-NEXT:    pushq %rax
800; CHECK-LIBCALL-NEXT:    callq test_floatret@PLT
801; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
802; CHECK-LIBCALL-NEXT:    popq %rax
803; CHECK-LIBCALL-NEXT:    retq
804;
805; BWON-F16C-LABEL: test_f80trunc_nodagcombine:
806; BWON-F16C:       # %bb.0:
807; BWON-F16C-NEXT:    pushq %rax
808; BWON-F16C-NEXT:    callq test_floatret@PLT
809; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
810; BWON-F16C-NEXT:    vmovd %xmm0, %eax
811; BWON-F16C-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
812; BWON-F16C-NEXT:    popq %rax
813; BWON-F16C-NEXT:    retq
814;
815; CHECK-I686-LABEL: test_f80trunc_nodagcombine:
816; CHECK-I686:       # %bb.0:
817; CHECK-I686-NEXT:    subl $12, %esp
818; CHECK-I686-NEXT:    calll test_floatret@PLT
819; CHECK-I686-NEXT:    fstps (%esp)
820; CHECK-I686-NEXT:    calll __truncsfhf2
821; CHECK-I686-NEXT:    addl $12, %esp
822; CHECK-I686-NEXT:    retl
823  %1 = call float @test_floatret()
824  %2 = fptrunc float %1 to half
825  ret half %2
826}
827
828
829
830
831define float @test_sitofp_fadd_i32(i32 %a, ptr %b) #0 {
832; CHECK-LIBCALL-LABEL: test_sitofp_fadd_i32:
833; CHECK-LIBCALL:       # %bb.0:
834; CHECK-LIBCALL-NEXT:    subq $40, %rsp
835; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rsi), %xmm0
836; CHECK-LIBCALL-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill
837; CHECK-LIBCALL-NEXT:    xorps %xmm0, %xmm0
838; CHECK-LIBCALL-NEXT:    cvtsi2ss %edi, %xmm0
839; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
840; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
841; CHECK-LIBCALL-NEXT:    movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload
842; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
843; CHECK-LIBCALL-NEXT:    movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill
844; CHECK-LIBCALL-NEXT:    movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload
845; CHECK-LIBCALL-NEXT:    # xmm0 = mem[0],zero,zero,zero
846; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
847; CHECK-LIBCALL-NEXT:    addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload
848; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
849; CHECK-LIBCALL-NEXT:    addq $40, %rsp
850; CHECK-LIBCALL-NEXT:    jmp __extendhfsf2@PLT # TAILCALL
851;
852; BWON-F16C-LABEL: test_sitofp_fadd_i32:
853; BWON-F16C:       # %bb.0:
854; BWON-F16C-NEXT:    movzwl (%rsi), %eax
855; BWON-F16C-NEXT:    vcvtsi2ss %edi, %xmm0, %xmm0
856; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
857; BWON-F16C-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
858; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
859; BWON-F16C-NEXT:    vmovd %eax, %xmm1
860; BWON-F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
861; BWON-F16C-NEXT:    vaddss %xmm0, %xmm1, %xmm0
862; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
863; BWON-F16C-NEXT:    vmovd %xmm0, %eax
864; BWON-F16C-NEXT:    movzwl %ax, %eax
865; BWON-F16C-NEXT:    vmovd %eax, %xmm0
866; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
867; BWON-F16C-NEXT:    retq
868;
869; CHECK-I686-LABEL: test_sitofp_fadd_i32:
870; CHECK-I686:       # %bb.0:
871; CHECK-I686-NEXT:    subl $60, %esp
872; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
873; CHECK-I686-NEXT:    pinsrw $0, (%eax), %xmm0
874; CHECK-I686-NEXT:    movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
875; CHECK-I686-NEXT:    xorps %xmm0, %xmm0
876; CHECK-I686-NEXT:    cvtsi2ssl {{[0-9]+}}(%esp), %xmm0
877; CHECK-I686-NEXT:    movss %xmm0, (%esp)
878; CHECK-I686-NEXT:    calll __truncsfhf2
879; CHECK-I686-NEXT:    movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill
880; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
881; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
882; CHECK-I686-NEXT:    movw %ax, (%esp)
883; CHECK-I686-NEXT:    calll __extendhfsf2
884; CHECK-I686-NEXT:    movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload
885; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
886; CHECK-I686-NEXT:    movw %ax, (%esp)
887; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
888; CHECK-I686-NEXT:    calll __extendhfsf2
889; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
890; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
891; CHECK-I686-NEXT:    addss {{[0-9]+}}(%esp), %xmm0
892; CHECK-I686-NEXT:    movss %xmm0, (%esp)
893; CHECK-I686-NEXT:    calll __truncsfhf2
894; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
895; CHECK-I686-NEXT:    movw %ax, (%esp)
896; CHECK-I686-NEXT:    calll __extendhfsf2
897; CHECK-I686-NEXT:    addl $60, %esp
898; CHECK-I686-NEXT:    retl
899  %tmp0 = load half, ptr %b
900  %tmp1 = sitofp i32 %a to half
901  %tmp2 = fadd half %tmp0, %tmp1
902  %tmp3 = fpext half %tmp2 to float
903  ret float %tmp3
904}
905
906define half @PR40273(half) #0 {
907; CHECK-LIBCALL-LABEL: PR40273:
908; CHECK-LIBCALL:       # %bb.0:
909; CHECK-LIBCALL-NEXT:    pushq %rax
910; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
911; CHECK-LIBCALL-NEXT:    xorl %eax, %eax
912; CHECK-LIBCALL-NEXT:    xorps %xmm1, %xmm1
913; CHECK-LIBCALL-NEXT:    ucomiss %xmm1, %xmm0
914; CHECK-LIBCALL-NEXT:    movl $15360, %ecx # imm = 0x3C00
915; CHECK-LIBCALL-NEXT:    cmovnel %ecx, %eax
916; CHECK-LIBCALL-NEXT:    cmovpl %ecx, %eax
917; CHECK-LIBCALL-NEXT:    pinsrw $0, %eax, %xmm0
918; CHECK-LIBCALL-NEXT:    popq %rax
919; CHECK-LIBCALL-NEXT:    retq
920;
921; BWON-F16C-LABEL: PR40273:
922; BWON-F16C:       # %bb.0:
923; BWON-F16C-NEXT:    vpextrw $0, %xmm0, %eax
924; BWON-F16C-NEXT:    movzwl %ax, %eax
925; BWON-F16C-NEXT:    vmovd %eax, %xmm0
926; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
927; BWON-F16C-NEXT:    xorl %eax, %eax
928; BWON-F16C-NEXT:    vxorps %xmm1, %xmm1, %xmm1
929; BWON-F16C-NEXT:    vucomiss %xmm1, %xmm0
930; BWON-F16C-NEXT:    movl $15360, %ecx # imm = 0x3C00
931; BWON-F16C-NEXT:    cmovnel %ecx, %eax
932; BWON-F16C-NEXT:    cmovpl %ecx, %eax
933; BWON-F16C-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
934; BWON-F16C-NEXT:    retq
935;
936; CHECK-I686-LABEL: PR40273:
937; CHECK-I686:       # %bb.0:
938; CHECK-I686-NEXT:    subl $12, %esp
939; CHECK-I686-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm0
940; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
941; CHECK-I686-NEXT:    movw %ax, (%esp)
942; CHECK-I686-NEXT:    calll __extendhfsf2
943; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
944; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
945; CHECK-I686-NEXT:    xorl %eax, %eax
946; CHECK-I686-NEXT:    xorps %xmm1, %xmm1
947; CHECK-I686-NEXT:    ucomiss %xmm1, %xmm0
948; CHECK-I686-NEXT:    movl $15360, %ecx # imm = 0x3C00
949; CHECK-I686-NEXT:    cmovnel %ecx, %eax
950; CHECK-I686-NEXT:    cmovpl %ecx, %eax
951; CHECK-I686-NEXT:    pinsrw $0, %eax, %xmm0
952; CHECK-I686-NEXT:    addl $12, %esp
953; CHECK-I686-NEXT:    retl
954  %2 = fcmp une half %0, 0xH0000
955  %3 = uitofp i1 %2 to half
956  ret half %3
957}
958
959define dso_local void @brcond(half %0) {
960; CHECK-LIBCALL-LABEL: brcond:
961; CHECK-LIBCALL:       # %bb.0: # %entry
962; CHECK-LIBCALL-NEXT:    pushq %rax
963; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 16
964; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
965; CHECK-LIBCALL-NEXT:    xorps %xmm1, %xmm1
966; CHECK-LIBCALL-NEXT:    ucomiss %xmm1, %xmm0
967; CHECK-LIBCALL-NEXT:    setp %al
968; CHECK-LIBCALL-NEXT:    setne %cl
969; CHECK-LIBCALL-NEXT:    orb %al, %cl
970; CHECK-LIBCALL-NEXT:    jne .LBB18_2
971; CHECK-LIBCALL-NEXT:  # %bb.1: # %if.then
972; CHECK-LIBCALL-NEXT:    popq %rax
973; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 8
974; CHECK-LIBCALL-NEXT:    retq
975; CHECK-LIBCALL-NEXT:  .LBB18_2: # %if.end
976;
977; BWON-F16C-LABEL: brcond:
978; BWON-F16C:       # %bb.0: # %entry
979; BWON-F16C-NEXT:    vpextrw $0, %xmm0, %eax
980; BWON-F16C-NEXT:    movzwl %ax, %eax
981; BWON-F16C-NEXT:    vmovd %eax, %xmm0
982; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
983; BWON-F16C-NEXT:    vxorps %xmm1, %xmm1, %xmm1
984; BWON-F16C-NEXT:    vucomiss %xmm1, %xmm0
985; BWON-F16C-NEXT:    setp %al
986; BWON-F16C-NEXT:    setne %cl
987; BWON-F16C-NEXT:    orb %al, %cl
988; BWON-F16C-NEXT:    jne .LBB18_2
989; BWON-F16C-NEXT:  # %bb.1: # %if.then
990; BWON-F16C-NEXT:    retq
991; BWON-F16C-NEXT:  .LBB18_2: # %if.end
992;
993; CHECK-I686-LABEL: brcond:
994; CHECK-I686:       # %bb.0: # %entry
995; CHECK-I686-NEXT:    subl $12, %esp
996; CHECK-I686-NEXT:    .cfi_def_cfa_offset 16
997; CHECK-I686-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm0
998; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
999; CHECK-I686-NEXT:    movw %ax, (%esp)
1000; CHECK-I686-NEXT:    calll __extendhfsf2
1001; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1002; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1003; CHECK-I686-NEXT:    xorps %xmm1, %xmm1
1004; CHECK-I686-NEXT:    ucomiss %xmm1, %xmm0
1005; CHECK-I686-NEXT:    setp %al
1006; CHECK-I686-NEXT:    setne %cl
1007; CHECK-I686-NEXT:    orb %al, %cl
1008; CHECK-I686-NEXT:    jne .LBB18_2
1009; CHECK-I686-NEXT:  # %bb.1: # %if.then
1010; CHECK-I686-NEXT:    addl $12, %esp
1011; CHECK-I686-NEXT:    .cfi_def_cfa_offset 4
1012; CHECK-I686-NEXT:    retl
1013; CHECK-I686-NEXT:  .LBB18_2: # %if.end
1014entry:
1015  %cmp = fcmp oeq half 0xH0000, %0
1016  br i1 %cmp, label %if.then, label %if.end
1017
1018if.then:                                          ; preds = %entry
1019  ret void
1020
1021if.end:                                           ; preds = %entry
1022  unreachable
1023}
1024
1025define half @test_sqrt(half %0) {
1026; CHECK-LIBCALL-LABEL: test_sqrt:
1027; CHECK-LIBCALL:       # %bb.0: # %entry
1028; CHECK-LIBCALL-NEXT:    pushq %rax
1029; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 16
1030; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1031; CHECK-LIBCALL-NEXT:    sqrtss %xmm0, %xmm0
1032; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
1033; CHECK-LIBCALL-NEXT:    popq %rax
1034; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 8
1035; CHECK-LIBCALL-NEXT:    retq
1036;
1037; BWON-F16C-LABEL: test_sqrt:
1038; BWON-F16C:       # %bb.0: # %entry
1039; BWON-F16C-NEXT:    vpextrw $0, %xmm0, %eax
1040; BWON-F16C-NEXT:    movzwl %ax, %eax
1041; BWON-F16C-NEXT:    vmovd %eax, %xmm0
1042; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
1043; BWON-F16C-NEXT:    vsqrtss %xmm0, %xmm0, %xmm0
1044; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
1045; BWON-F16C-NEXT:    vmovd %xmm0, %eax
1046; BWON-F16C-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0
1047; BWON-F16C-NEXT:    retq
1048;
1049; CHECK-I686-LABEL: test_sqrt:
1050; CHECK-I686:       # %bb.0: # %entry
1051; CHECK-I686-NEXT:    subl $12, %esp
1052; CHECK-I686-NEXT:    .cfi_def_cfa_offset 16
1053; CHECK-I686-NEXT:    pinsrw $0, {{[0-9]+}}(%esp), %xmm0
1054; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
1055; CHECK-I686-NEXT:    movw %ax, (%esp)
1056; CHECK-I686-NEXT:    calll __extendhfsf2
1057; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1058; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1059; CHECK-I686-NEXT:    sqrtss %xmm0, %xmm0
1060; CHECK-I686-NEXT:    movss %xmm0, (%esp)
1061; CHECK-I686-NEXT:    calll __truncsfhf2
1062; CHECK-I686-NEXT:    addl $12, %esp
1063; CHECK-I686-NEXT:    .cfi_def_cfa_offset 4
1064; CHECK-I686-NEXT:    retl
1065entry:
1066  %1 = call half @llvm.sqrt.f16(half %0)
1067  ret half %1
1068}
1069
1070declare half @llvm.sqrt.f16(half)
1071
1072define void @main.158() local_unnamed_addr #0 {
1073; CHECK-LIBCALL-LABEL: main.158:
1074; CHECK-LIBCALL:       # %bb.0: # %entry
1075; CHECK-LIBCALL-NEXT:    pushq %rax
1076; CHECK-LIBCALL-NEXT:    xorps %xmm0, %xmm0
1077; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
1078; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1079; CHECK-LIBCALL-NEXT:    movss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1080; CHECK-LIBCALL-NEXT:    ucomiss %xmm0, %xmm1
1081; CHECK-LIBCALL-NEXT:    xorps %xmm0, %xmm0
1082; CHECK-LIBCALL-NEXT:    jae .LBB20_2
1083; CHECK-LIBCALL-NEXT:  # %bb.1: # %entry
1084; CHECK-LIBCALL-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1085; CHECK-LIBCALL-NEXT:  .LBB20_2: # %entry
1086; CHECK-LIBCALL-NEXT:    callq __truncsfhf2@PLT
1087; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
1088; CHECK-LIBCALL-NEXT:    movw %ax, (%rax)
1089; CHECK-LIBCALL-NEXT:    popq %rax
1090; CHECK-LIBCALL-NEXT:    retq
1091;
1092; BWON-F16C-LABEL: main.158:
1093; BWON-F16C:       # %bb.0: # %entry
1094; BWON-F16C-NEXT:    vxorps %xmm0, %xmm0, %xmm0
1095; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
1096; BWON-F16C-NEXT:    vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
1097; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
1098; BWON-F16C-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
1099; BWON-F16C-NEXT:    vucomiss %xmm0, %xmm1
1100; BWON-F16C-NEXT:    vxorps %xmm0, %xmm0, %xmm0
1101; BWON-F16C-NEXT:    jae .LBB20_2
1102; BWON-F16C-NEXT:  # %bb.1: # %entry
1103; BWON-F16C-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1104; BWON-F16C-NEXT:  .LBB20_2: # %entry
1105; BWON-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
1106; BWON-F16C-NEXT:    vmovd %xmm0, %eax
1107; BWON-F16C-NEXT:    movw %ax, (%rax)
1108; BWON-F16C-NEXT:    retq
1109;
1110; CHECK-I686-LABEL: main.158:
1111; CHECK-I686:       # %bb.0: # %entry
1112; CHECK-I686-NEXT:    subl $12, %esp
1113; CHECK-I686-NEXT:    pxor %xmm0, %xmm0
1114; CHECK-I686-NEXT:    movd %xmm0, (%esp)
1115; CHECK-I686-NEXT:    calll __truncsfhf2
1116; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
1117; CHECK-I686-NEXT:    movw %ax, (%esp)
1118; CHECK-I686-NEXT:    calll __extendhfsf2
1119; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1120; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1121; CHECK-I686-NEXT:    ucomiss {{[0-9]+}}(%esp), %xmm0
1122; CHECK-I686-NEXT:    xorps %xmm0, %xmm0
1123; CHECK-I686-NEXT:    jae .LBB20_2
1124; CHECK-I686-NEXT:  # %bb.1: # %entry
1125; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1126; CHECK-I686-NEXT:  .LBB20_2: # %entry
1127; CHECK-I686-NEXT:    movss %xmm0, (%esp)
1128; CHECK-I686-NEXT:    calll __truncsfhf2
1129; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
1130; CHECK-I686-NEXT:    movw %ax, (%eax)
1131; CHECK-I686-NEXT:    addl $12, %esp
1132; CHECK-I686-NEXT:    retl
1133entry:
1134  %0 = tail call half @llvm.fabs.f16(half undef)
1135  %1 = fpext half %0 to float
1136  %compare.2 = fcmp ole half %0, 0xH4800
1137  %multiply.95 = fmul float %1, 5.000000e-01
1138  %add.82 = fadd float %multiply.95, -2.000000e+00
1139  %multiply.68 = fmul float %add.82, 0.000000e+00
1140  %subtract.65 = fsub float %multiply.68, 0.000000e+00
1141  %multiply.57 = fmul float undef, 0.000000e+00
1142  %2 = select i1 %compare.2, float 0.000000e+00, float %multiply.57
1143  %3 = fptrunc float %2 to half
1144  store half %3, ptr undef, align 2
1145  ret void
1146}
1147
1148define void @main.45() local_unnamed_addr {
1149; CHECK-LIBCALL-LABEL: main.45:
1150; CHECK-LIBCALL:       # %bb.0: # %entry
1151; CHECK-LIBCALL-NEXT:    pushq %rbp
1152; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 16
1153; CHECK-LIBCALL-NEXT:    pushq %r15
1154; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 24
1155; CHECK-LIBCALL-NEXT:    pushq %r14
1156; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 32
1157; CHECK-LIBCALL-NEXT:    pushq %rbx
1158; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 40
1159; CHECK-LIBCALL-NEXT:    pushq %rax
1160; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 48
1161; CHECK-LIBCALL-NEXT:    .cfi_offset %rbx, -40
1162; CHECK-LIBCALL-NEXT:    .cfi_offset %r14, -32
1163; CHECK-LIBCALL-NEXT:    .cfi_offset %r15, -24
1164; CHECK-LIBCALL-NEXT:    .cfi_offset %rbp, -16
1165; CHECK-LIBCALL-NEXT:    pinsrw $0, (%rax), %xmm0
1166; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %eax
1167; CHECK-LIBCALL-NEXT:    movd %eax, %xmm1
1168; CHECK-LIBCALL-NEXT:    pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7]
1169; CHECK-LIBCALL-NEXT:    movq %xmm1, %rbx
1170; CHECK-LIBCALL-NEXT:    movq %rbx, %r14
1171; CHECK-LIBCALL-NEXT:    shrq $48, %r14
1172; CHECK-LIBCALL-NEXT:    movq %rbx, %r15
1173; CHECK-LIBCALL-NEXT:    shrq $32, %r15
1174; CHECK-LIBCALL-NEXT:    movl %ebx, %ebp
1175; CHECK-LIBCALL-NEXT:    shrl $16, %ebp
1176; CHECK-LIBCALL-NEXT:    callq __extendhfsf2@PLT
1177; CHECK-LIBCALL-NEXT:    ucomiss %xmm0, %xmm0
1178; CHECK-LIBCALL-NEXT:    movl $32256, %eax # imm = 0x7E00
1179; CHECK-LIBCALL-NEXT:    cmovpl %eax, %ebp
1180; CHECK-LIBCALL-NEXT:    cmovpl %eax, %r15d
1181; CHECK-LIBCALL-NEXT:    cmovpl %eax, %r14d
1182; CHECK-LIBCALL-NEXT:    cmovpl %eax, %ebx
1183; CHECK-LIBCALL-NEXT:    movw %bx, (%rax)
1184; CHECK-LIBCALL-NEXT:    movw %r14w, (%rax)
1185; CHECK-LIBCALL-NEXT:    movw %r15w, (%rax)
1186; CHECK-LIBCALL-NEXT:    movw %bp, (%rax)
1187; CHECK-LIBCALL-NEXT:    addq $8, %rsp
1188; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 40
1189; CHECK-LIBCALL-NEXT:    popq %rbx
1190; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 32
1191; CHECK-LIBCALL-NEXT:    popq %r14
1192; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 24
1193; CHECK-LIBCALL-NEXT:    popq %r15
1194; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 16
1195; CHECK-LIBCALL-NEXT:    popq %rbp
1196; CHECK-LIBCALL-NEXT:    .cfi_def_cfa_offset 8
1197; CHECK-LIBCALL-NEXT:    retq
1198;
1199; BWON-F16C-LABEL: main.45:
1200; BWON-F16C:       # %bb.0: # %entry
1201; BWON-F16C-NEXT:    movzwl (%rax), %eax
1202; BWON-F16C-NEXT:    vmovd %eax, %xmm0
1203; BWON-F16C-NEXT:    vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7]
1204; BWON-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
1205; BWON-F16C-NEXT:    xorl %eax, %eax
1206; BWON-F16C-NEXT:    vucomiss %xmm0, %xmm0
1207; BWON-F16C-NEXT:    movl $65535, %ecx # imm = 0xFFFF
1208; BWON-F16C-NEXT:    cmovnpl %eax, %ecx
1209; BWON-F16C-NEXT:    vmovd %ecx, %xmm0
1210; BWON-F16C-NEXT:    vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1211; BWON-F16C-NEXT:    vpblendvb %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
1212; BWON-F16C-NEXT:    vmovq %xmm0, (%rax)
1213; BWON-F16C-NEXT:    retq
1214;
1215; CHECK-I686-LABEL: main.45:
1216; CHECK-I686:       # %bb.0: # %entry
1217; CHECK-I686-NEXT:    pushl %edi
1218; CHECK-I686-NEXT:    .cfi_def_cfa_offset 8
1219; CHECK-I686-NEXT:    pushl %esi
1220; CHECK-I686-NEXT:    .cfi_def_cfa_offset 12
1221; CHECK-I686-NEXT:    subl $20, %esp
1222; CHECK-I686-NEXT:    .cfi_def_cfa_offset 32
1223; CHECK-I686-NEXT:    .cfi_offset %esi, -12
1224; CHECK-I686-NEXT:    .cfi_offset %edi, -8
1225; CHECK-I686-NEXT:    pinsrw $0, (%eax), %xmm0
1226; CHECK-I686-NEXT:    pextrw $0, %xmm0, %eax
1227; CHECK-I686-NEXT:    movd %eax, %xmm0
1228; CHECK-I686-NEXT:    pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7]
1229; CHECK-I686-NEXT:    movd %xmm0, %esi
1230; CHECK-I686-NEXT:    movl %esi, %edi
1231; CHECK-I686-NEXT:    shrl $16, %edi
1232; CHECK-I686-NEXT:    movw %ax, (%esp)
1233; CHECK-I686-NEXT:    calll __extendhfsf2
1234; CHECK-I686-NEXT:    fstps {{[0-9]+}}(%esp)
1235; CHECK-I686-NEXT:    movss {{.*#+}} xmm0 = mem[0],zero,zero,zero
1236; CHECK-I686-NEXT:    ucomiss %xmm0, %xmm0
1237; CHECK-I686-NEXT:    movl $32256, %eax # imm = 0x7E00
1238; CHECK-I686-NEXT:    cmovpl %eax, %esi
1239; CHECK-I686-NEXT:    cmovpl %eax, %edi
1240; CHECK-I686-NEXT:    movw %di, (%eax)
1241; CHECK-I686-NEXT:    movw %si, (%eax)
1242; CHECK-I686-NEXT:    addl $20, %esp
1243; CHECK-I686-NEXT:    .cfi_def_cfa_offset 12
1244; CHECK-I686-NEXT:    popl %esi
1245; CHECK-I686-NEXT:    .cfi_def_cfa_offset 8
1246; CHECK-I686-NEXT:    popl %edi
1247; CHECK-I686-NEXT:    .cfi_def_cfa_offset 4
1248; CHECK-I686-NEXT:    retl
1249entry:
1250  %0 = load half, ptr undef, align 8
1251  %1 = bitcast half %0 to i16
1252  %broadcast.splatinsert = insertelement <4 x half> poison, half %0, i64 0
1253  %broadcast.splat = shufflevector <4 x half> %broadcast.splatinsert, <4 x half> poison, <4 x i32> zeroinitializer
1254  %broadcast.splatinsert13 = insertelement <4 x i16> poison, i16 %1, i64 0
1255  %broadcast.splat14 = shufflevector <4 x i16> %broadcast.splatinsert13, <4 x i16> poison, <4 x i32> zeroinitializer
1256  %2 = fcmp uno <4 x half> %broadcast.splat, zeroinitializer
1257  %3 = add <4 x i16> zeroinitializer, %broadcast.splat14
1258  %4 = select i1 undef, <4 x i16> undef, <4 x i16> %3
1259  %5 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> %4
1260  %6 = bitcast <4 x i16> %5 to <4 x half>
1261  %7 = select <4 x i1> %2, <4 x half> <half 0xH7E00, half 0xH7E00, half 0xH7E00, half 0xH7E00>, <4 x half> %6
1262  store <4 x half> %7, ptr undef, align 16
1263  ret void
1264}
1265
1266define half @fcopysign(half %x, half %y) {
1267; CHECK-LIBCALL-LABEL: fcopysign:
1268; CHECK-LIBCALL:       # %bb.0:
1269; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm1, %eax
1270; CHECK-LIBCALL-NEXT:    andl $-32768, %eax # imm = 0x8000
1271; CHECK-LIBCALL-NEXT:    pextrw $0, %xmm0, %ecx
1272; CHECK-LIBCALL-NEXT:    andl $32767, %ecx # imm = 0x7FFF
1273; CHECK-LIBCALL-NEXT:    orl %eax, %ecx
1274; CHECK-LIBCALL-NEXT:    pinsrw $0, %ecx, %xmm0
1275; CHECK-LIBCALL-NEXT:    retq
1276;
1277; BWON-F16C-LABEL: fcopysign:
1278; BWON-F16C:       # %bb.0:
1279; BWON-F16C-NEXT:    vpextrw $0, %xmm1, %eax
1280; BWON-F16C-NEXT:    andl $-32768, %eax # imm = 0x8000
1281; BWON-F16C-NEXT:    vpextrw $0, %xmm0, %ecx
1282; BWON-F16C-NEXT:    andl $32767, %ecx # imm = 0x7FFF
1283; BWON-F16C-NEXT:    orl %eax, %ecx
1284; BWON-F16C-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm0
1285; BWON-F16C-NEXT:    retq
1286;
1287; CHECK-I686-LABEL: fcopysign:
1288; CHECK-I686:       # %bb.0:
1289; CHECK-I686-NEXT:    movl $-32768, %eax # imm = 0x8000
1290; CHECK-I686-NEXT:    andl {{[0-9]+}}(%esp), %eax
1291; CHECK-I686-NEXT:    movzwl {{[0-9]+}}(%esp), %ecx
1292; CHECK-I686-NEXT:    andl $32767, %ecx # imm = 0x7FFF
1293; CHECK-I686-NEXT:    orl %eax, %ecx
1294; CHECK-I686-NEXT:    pinsrw $0, %ecx, %xmm0
1295; CHECK-I686-NEXT:    retl
1296  %a = call half @llvm.copysign.f16(half %x, half %y)
1297  ret half %a
1298}
1299
1300declare half @llvm.fabs.f16(half)
1301declare half @llvm.copysign.f16(half, half)
1302
1303define <8 x half> @select(i1 %c, <8 x half> %x, <8 x half> %y) {
1304; CHECK-LIBCALL-LABEL: select:
1305; CHECK-LIBCALL:       # %bb.0:
1306; CHECK-LIBCALL-NEXT:    testb $1, %dil
1307; CHECK-LIBCALL-NEXT:    jne .LBB23_2
1308; CHECK-LIBCALL-NEXT:  # %bb.1:
1309; CHECK-LIBCALL-NEXT:    movaps %xmm1, %xmm0
1310; CHECK-LIBCALL-NEXT:  .LBB23_2:
1311; CHECK-LIBCALL-NEXT:    retq
1312;
1313; BWON-F16C-LABEL: select:
1314; BWON-F16C:       # %bb.0:
1315; BWON-F16C-NEXT:    testb $1, %dil
1316; BWON-F16C-NEXT:    jne .LBB23_2
1317; BWON-F16C-NEXT:  # %bb.1:
1318; BWON-F16C-NEXT:    vmovaps %xmm1, %xmm0
1319; BWON-F16C-NEXT:  .LBB23_2:
1320; BWON-F16C-NEXT:    retq
1321;
1322; CHECK-I686-LABEL: select:
1323; CHECK-I686:       # %bb.0:
1324; CHECK-I686-NEXT:    testb $1, {{[0-9]+}}(%esp)
1325; CHECK-I686-NEXT:    jne .LBB23_2
1326; CHECK-I686-NEXT:  # %bb.1:
1327; CHECK-I686-NEXT:    movaps %xmm1, %xmm0
1328; CHECK-I686-NEXT:  .LBB23_2:
1329; CHECK-I686-NEXT:    retl
1330  %s = select i1 %c, <8 x half> %x, <8 x half> %y
1331  ret <8 x half> %s
1332}
1333
1334define <8 x half> @shuffle(ptr %p) {
1335; CHECK-LIBCALL-LABEL: shuffle:
1336; CHECK-LIBCALL:       # %bb.0:
1337; CHECK-LIBCALL-NEXT:    movdqu (%rdi), %xmm0
1338; CHECK-LIBCALL-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
1339; CHECK-LIBCALL-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
1340; CHECK-LIBCALL-NEXT:    retq
1341;
1342; BWON-F16C-LABEL: shuffle:
1343; BWON-F16C:       # %bb.0:
1344; BWON-F16C-NEXT:    vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,4,4,4,4]
1345; BWON-F16C-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
1346; BWON-F16C-NEXT:    retq
1347;
1348; CHECK-I686-LABEL: shuffle:
1349; CHECK-I686:       # %bb.0:
1350; CHECK-I686-NEXT:    movl {{[0-9]+}}(%esp), %eax
1351; CHECK-I686-NEXT:    movdqu (%eax), %xmm0
1352; CHECK-I686-NEXT:    pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4]
1353; CHECK-I686-NEXT:    pshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
1354; CHECK-I686-NEXT:    retl
1355  %1 = load <8 x half>, ptr %p, align 8
1356  %2 = shufflevector <8 x half> %1, <8 x half> poison, <8 x i32> <i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
1357  ret <8 x half> %2
1358}
1359
1360attributes #0 = { nounwind }
1361