1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s --check-prefix=X32-NOF16C
3; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=f16c | FileCheck %s --check-prefix=X32-F16C
4; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s --check-prefix=X64-NOF16C
5; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=f16c | FileCheck %s --check-prefix=X64-F16C
6
7@a = global half 0xH0000, align 2
8@b = global half 0xH0000, align 2
9@c = global half 0xH0000, align 2
10
11define float @half_to_float() strictfp {
12; X32-NOF16C-LABEL: half_to_float:
13; X32-NOF16C:       ## %bb.0:
14; X32-NOF16C-NEXT:    subl $12, %esp
15; X32-NOF16C-NEXT:    .cfi_def_cfa_offset 16
16; X32-NOF16C-NEXT:    movzwl _a, %eax
17; X32-NOF16C-NEXT:    movl %eax, (%esp)
18; X32-NOF16C-NEXT:    calll ___extendhfsf2
19; X32-NOF16C-NEXT:    addl $12, %esp
20; X32-NOF16C-NEXT:    retl
21;
22; X32-F16C-LABEL: half_to_float:
23; X32-F16C:       ## %bb.0:
24; X32-F16C-NEXT:    pushl %eax
25; X32-F16C-NEXT:    .cfi_def_cfa_offset 8
26; X32-F16C-NEXT:    movzwl _a, %eax
27; X32-F16C-NEXT:    vmovd %eax, %xmm0
28; X32-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
29; X32-F16C-NEXT:    vmovss %xmm0, (%esp)
30; X32-F16C-NEXT:    flds (%esp)
31; X32-F16C-NEXT:    wait
32; X32-F16C-NEXT:    popl %eax
33; X32-F16C-NEXT:    retl
34;
35; X64-NOF16C-LABEL: half_to_float:
36; X64-NOF16C:       ## %bb.0:
37; X64-NOF16C-NEXT:    pushq %rax
38; X64-NOF16C-NEXT:    .cfi_def_cfa_offset 16
39; X64-NOF16C-NEXT:    pinsrw $0, _a(%rip), %xmm0
40; X64-NOF16C-NEXT:    callq ___extendhfsf2
41; X64-NOF16C-NEXT:    popq %rax
42; X64-NOF16C-NEXT:    retq
43;
44; X64-F16C-LABEL: half_to_float:
45; X64-F16C:       ## %bb.0:
46; X64-F16C-NEXT:    movzwl _a(%rip), %eax
47; X64-F16C-NEXT:    vmovd %eax, %xmm0
48; X64-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
49; X64-F16C-NEXT:    retq
50  %1 = load half, ptr @a, align 2
51  %2 = tail call float @llvm.experimental.constrained.fpext.f32.f16(half %1, metadata !"fpexcept.strict") #0
52  ret float %2
53}
54
55define double @half_to_double() strictfp {
56; X32-NOF16C-LABEL: half_to_double:
57; X32-NOF16C:       ## %bb.0:
58; X32-NOF16C-NEXT:    subl $12, %esp
59; X32-NOF16C-NEXT:    .cfi_def_cfa_offset 16
60; X32-NOF16C-NEXT:    movzwl _a, %eax
61; X32-NOF16C-NEXT:    movl %eax, (%esp)
62; X32-NOF16C-NEXT:    calll ___extendhfsf2
63; X32-NOF16C-NEXT:    addl $12, %esp
64; X32-NOF16C-NEXT:    retl
65;
66; X32-F16C-LABEL: half_to_double:
67; X32-F16C:       ## %bb.0:
68; X32-F16C-NEXT:    subl $12, %esp
69; X32-F16C-NEXT:    .cfi_def_cfa_offset 16
70; X32-F16C-NEXT:    movzwl _a, %eax
71; X32-F16C-NEXT:    vmovd %eax, %xmm0
72; X32-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
73; X32-F16C-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
74; X32-F16C-NEXT:    vmovsd %xmm0, (%esp)
75; X32-F16C-NEXT:    fldl (%esp)
76; X32-F16C-NEXT:    wait
77; X32-F16C-NEXT:    addl $12, %esp
78; X32-F16C-NEXT:    retl
79;
80; X64-NOF16C-LABEL: half_to_double:
81; X64-NOF16C:       ## %bb.0:
82; X64-NOF16C-NEXT:    pushq %rax
83; X64-NOF16C-NEXT:    .cfi_def_cfa_offset 16
84; X64-NOF16C-NEXT:    pinsrw $0, _a(%rip), %xmm0
85; X64-NOF16C-NEXT:    callq ___extendhfsf2
86; X64-NOF16C-NEXT:    cvtss2sd %xmm0, %xmm0
87; X64-NOF16C-NEXT:    popq %rax
88; X64-NOF16C-NEXT:    retq
89;
90; X64-F16C-LABEL: half_to_double:
91; X64-F16C:       ## %bb.0:
92; X64-F16C-NEXT:    movzwl _a(%rip), %eax
93; X64-F16C-NEXT:    vmovd %eax, %xmm0
94; X64-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
95; X64-F16C-NEXT:    vcvtss2sd %xmm0, %xmm0, %xmm0
96; X64-F16C-NEXT:    retq
97  %1 = load half, ptr @a, align 2
98  %2 = tail call double @llvm.experimental.constrained.fpext.f64.f16(half %1, metadata !"fpexcept.strict") #0
99  ret double %2
100}
101
102define x86_fp80 @half_to_fp80() strictfp {
103; X32-NOF16C-LABEL: half_to_fp80:
104; X32-NOF16C:       ## %bb.0:
105; X32-NOF16C-NEXT:    subl $12, %esp
106; X32-NOF16C-NEXT:    .cfi_def_cfa_offset 16
107; X32-NOF16C-NEXT:    movzwl _a, %eax
108; X32-NOF16C-NEXT:    movl %eax, (%esp)
109; X32-NOF16C-NEXT:    calll ___extendhfsf2
110; X32-NOF16C-NEXT:    addl $12, %esp
111; X32-NOF16C-NEXT:    retl
112;
113; X32-F16C-LABEL: half_to_fp80:
114; X32-F16C:       ## %bb.0:
115; X32-F16C-NEXT:    subl $12, %esp
116; X32-F16C-NEXT:    .cfi_def_cfa_offset 16
117; X32-F16C-NEXT:    vpinsrw $0, _a, %xmm0, %xmm0
118; X32-F16C-NEXT:    vpextrw $0, %xmm0, (%esp)
119; X32-F16C-NEXT:    calll ___extendhfxf2
120; X32-F16C-NEXT:    addl $12, %esp
121; X32-F16C-NEXT:    retl
122;
123; X64-NOF16C-LABEL: half_to_fp80:
124; X64-NOF16C:       ## %bb.0:
125; X64-NOF16C-NEXT:    pushq %rax
126; X64-NOF16C-NEXT:    .cfi_def_cfa_offset 16
127; X64-NOF16C-NEXT:    pinsrw $0, _a(%rip), %xmm0
128; X64-NOF16C-NEXT:    callq ___extendhfxf2
129; X64-NOF16C-NEXT:    popq %rax
130; X64-NOF16C-NEXT:    retq
131;
132; X64-F16C-LABEL: half_to_fp80:
133; X64-F16C:       ## %bb.0:
134; X64-F16C-NEXT:    pushq %rax
135; X64-F16C-NEXT:    .cfi_def_cfa_offset 16
136; X64-F16C-NEXT:    vpinsrw $0, _a(%rip), %xmm0, %xmm0
137; X64-F16C-NEXT:    callq ___extendhfxf2
138; X64-F16C-NEXT:    popq %rax
139; X64-F16C-NEXT:    retq
140  %1 = load half, ptr @a, align 2
141  %2 = tail call x86_fp80 @llvm.experimental.constrained.fpext.f80.f16(half %1, metadata !"fpexcept.strict") #0
142  ret x86_fp80 %2
143}
144
145define void @float_to_half(float %0) strictfp {
146; X32-NOF16C-LABEL: float_to_half:
147; X32-NOF16C:       ## %bb.0:
148; X32-NOF16C-NEXT:    subl $12, %esp
149; X32-NOF16C-NEXT:    .cfi_def_cfa_offset 16
150; X32-NOF16C-NEXT:    flds {{[0-9]+}}(%esp)
151; X32-NOF16C-NEXT:    fstps (%esp)
152; X32-NOF16C-NEXT:    wait
153; X32-NOF16C-NEXT:    calll ___truncsfhf2
154; X32-NOF16C-NEXT:    movw %ax, _a
155; X32-NOF16C-NEXT:    addl $12, %esp
156; X32-NOF16C-NEXT:    retl
157;
158; X32-F16C-LABEL: float_to_half:
159; X32-F16C:       ## %bb.0:
160; X32-F16C-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
161; X32-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
162; X32-F16C-NEXT:    vmovd %xmm0, %eax
163; X32-F16C-NEXT:    movw %ax, _a
164; X32-F16C-NEXT:    retl
165;
166; X64-NOF16C-LABEL: float_to_half:
167; X64-NOF16C:       ## %bb.0:
168; X64-NOF16C-NEXT:    pushq %rax
169; X64-NOF16C-NEXT:    .cfi_def_cfa_offset 16
170; X64-NOF16C-NEXT:    callq ___truncsfhf2
171; X64-NOF16C-NEXT:    pextrw $0, %xmm0, %eax
172; X64-NOF16C-NEXT:    movw %ax, _a(%rip)
173; X64-NOF16C-NEXT:    popq %rax
174; X64-NOF16C-NEXT:    retq
175;
176; X64-F16C-LABEL: float_to_half:
177; X64-F16C:       ## %bb.0:
178; X64-F16C-NEXT:    vxorps %xmm1, %xmm1, %xmm1
179; X64-F16C-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
180; X64-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
181; X64-F16C-NEXT:    vmovd %xmm0, %eax
182; X64-F16C-NEXT:    movw %ax, _a(%rip)
183; X64-F16C-NEXT:    retq
184  %2 = tail call half @llvm.experimental.constrained.fptrunc.f16.f32(float %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
185  store half %2, ptr @a, align 2
186  ret void
187}
188
189define void @double_to_half(double %0) strictfp {
190; X32-NOF16C-LABEL: double_to_half:
191; X32-NOF16C:       ## %bb.0:
192; X32-NOF16C-NEXT:    subl $12, %esp
193; X32-NOF16C-NEXT:    .cfi_def_cfa_offset 16
194; X32-NOF16C-NEXT:    fldl {{[0-9]+}}(%esp)
195; X32-NOF16C-NEXT:    fstpl (%esp)
196; X32-NOF16C-NEXT:    wait
197; X32-NOF16C-NEXT:    calll ___truncdfhf2
198; X32-NOF16C-NEXT:    movw %ax, _a
199; X32-NOF16C-NEXT:    addl $12, %esp
200; X32-NOF16C-NEXT:    retl
201;
202; X32-F16C-LABEL: double_to_half:
203; X32-F16C:       ## %bb.0:
204; X32-F16C-NEXT:    subl $12, %esp
205; X32-F16C-NEXT:    .cfi_def_cfa_offset 16
206; X32-F16C-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
207; X32-F16C-NEXT:    vmovq %xmm0, (%esp)
208; X32-F16C-NEXT:    calll ___truncdfhf2
209; X32-F16C-NEXT:    vpextrw $0, %xmm0, _a
210; X32-F16C-NEXT:    addl $12, %esp
211; X32-F16C-NEXT:    retl
212;
213; X64-NOF16C-LABEL: double_to_half:
214; X64-NOF16C:       ## %bb.0:
215; X64-NOF16C-NEXT:    pushq %rax
216; X64-NOF16C-NEXT:    .cfi_def_cfa_offset 16
217; X64-NOF16C-NEXT:    callq ___truncdfhf2
218; X64-NOF16C-NEXT:    pextrw $0, %xmm0, %eax
219; X64-NOF16C-NEXT:    movw %ax, _a(%rip)
220; X64-NOF16C-NEXT:    popq %rax
221; X64-NOF16C-NEXT:    retq
222;
223; X64-F16C-LABEL: double_to_half:
224; X64-F16C:       ## %bb.0:
225; X64-F16C-NEXT:    pushq %rax
226; X64-F16C-NEXT:    .cfi_def_cfa_offset 16
227; X64-F16C-NEXT:    callq ___truncdfhf2
228; X64-F16C-NEXT:    vpextrw $0, %xmm0, _a(%rip)
229; X64-F16C-NEXT:    popq %rax
230; X64-F16C-NEXT:    retq
231  %2 = tail call half @llvm.experimental.constrained.fptrunc.f16.f64(double %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
232  store half %2, ptr @a, align 2
233  ret void
234}
235
236define void @fp80_to_half(x86_fp80 %0) strictfp {
237; X32-NOF16C-LABEL: fp80_to_half:
238; X32-NOF16C:       ## %bb.0:
239; X32-NOF16C-NEXT:    subl $28, %esp
240; X32-NOF16C-NEXT:    .cfi_def_cfa_offset 32
241; X32-NOF16C-NEXT:    fldt {{[0-9]+}}(%esp)
242; X32-NOF16C-NEXT:    fstpt (%esp)
243; X32-NOF16C-NEXT:    wait
244; X32-NOF16C-NEXT:    calll ___truncxfhf2
245; X32-NOF16C-NEXT:    movw %ax, _a
246; X32-NOF16C-NEXT:    addl $28, %esp
247; X32-NOF16C-NEXT:    retl
248;
249; X32-F16C-LABEL: fp80_to_half:
250; X32-F16C:       ## %bb.0:
251; X32-F16C-NEXT:    subl $28, %esp
252; X32-F16C-NEXT:    .cfi_def_cfa_offset 32
253; X32-F16C-NEXT:    fldt {{[0-9]+}}(%esp)
254; X32-F16C-NEXT:    fstpt (%esp)
255; X32-F16C-NEXT:    wait
256; X32-F16C-NEXT:    calll ___truncxfhf2
257; X32-F16C-NEXT:    vpextrw $0, %xmm0, _a
258; X32-F16C-NEXT:    addl $28, %esp
259; X32-F16C-NEXT:    retl
260;
261; X64-NOF16C-LABEL: fp80_to_half:
262; X64-NOF16C:       ## %bb.0:
263; X64-NOF16C-NEXT:    subq $24, %rsp
264; X64-NOF16C-NEXT:    .cfi_def_cfa_offset 32
265; X64-NOF16C-NEXT:    fldt {{[0-9]+}}(%rsp)
266; X64-NOF16C-NEXT:    fstpt (%rsp)
267; X64-NOF16C-NEXT:    wait
268; X64-NOF16C-NEXT:    callq ___truncxfhf2
269; X64-NOF16C-NEXT:    pextrw $0, %xmm0, %eax
270; X64-NOF16C-NEXT:    movw %ax, _a(%rip)
271; X64-NOF16C-NEXT:    addq $24, %rsp
272; X64-NOF16C-NEXT:    retq
273;
274; X64-F16C-LABEL: fp80_to_half:
275; X64-F16C:       ## %bb.0:
276; X64-F16C-NEXT:    subq $24, %rsp
277; X64-F16C-NEXT:    .cfi_def_cfa_offset 32
278; X64-F16C-NEXT:    fldt {{[0-9]+}}(%rsp)
279; X64-F16C-NEXT:    fstpt (%rsp)
280; X64-F16C-NEXT:    wait
281; X64-F16C-NEXT:    callq ___truncxfhf2
282; X64-F16C-NEXT:    vpextrw $0, %xmm0, _a(%rip)
283; X64-F16C-NEXT:    addq $24, %rsp
284; X64-F16C-NEXT:    retq
285  %2 = tail call half @llvm.experimental.constrained.fptrunc.f16.f80(x86_fp80 %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
286  store half %2, ptr @a, align 2
287  ret void
288}
289
290define void @add() strictfp {
291; X32-NOF16C-LABEL: add:
292; X32-NOF16C:       ## %bb.0:
293; X32-NOF16C-NEXT:    subl $12, %esp
294; X32-NOF16C-NEXT:    .cfi_def_cfa_offset 16
295; X32-NOF16C-NEXT:    movzwl _a, %eax
296; X32-NOF16C-NEXT:    movl %eax, (%esp)
297; X32-NOF16C-NEXT:    calll ___extendhfsf2
298; X32-NOF16C-NEXT:    fstps {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill
299; X32-NOF16C-NEXT:    wait
300; X32-NOF16C-NEXT:    movzwl _b, %eax
301; X32-NOF16C-NEXT:    movl %eax, (%esp)
302; X32-NOF16C-NEXT:    calll ___extendhfsf2
303; X32-NOF16C-NEXT:    flds {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Reload
304; X32-NOF16C-NEXT:    faddp %st, %st(1)
305; X32-NOF16C-NEXT:    fstps (%esp)
306; X32-NOF16C-NEXT:    wait
307; X32-NOF16C-NEXT:    calll ___truncsfhf2
308; X32-NOF16C-NEXT:    movw %ax, _c
309; X32-NOF16C-NEXT:    addl $12, %esp
310; X32-NOF16C-NEXT:    retl
311;
312; X32-F16C-LABEL: add:
313; X32-F16C:       ## %bb.0:
314; X32-F16C-NEXT:    movzwl _a, %eax
315; X32-F16C-NEXT:    vmovd %eax, %xmm0
316; X32-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
317; X32-F16C-NEXT:    movzwl _b, %eax
318; X32-F16C-NEXT:    vmovd %eax, %xmm1
319; X32-F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
320; X32-F16C-NEXT:    vaddss %xmm1, %xmm0, %xmm0
321; X32-F16C-NEXT:    vxorps %xmm1, %xmm1, %xmm1
322; X32-F16C-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
323; X32-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
324; X32-F16C-NEXT:    vmovd %xmm0, %eax
325; X32-F16C-NEXT:    movw %ax, _c
326; X32-F16C-NEXT:    retl
327;
328; X64-NOF16C-LABEL: add:
329; X64-NOF16C:       ## %bb.0:
330; X64-NOF16C-NEXT:    pushq %rax
331; X64-NOF16C-NEXT:    .cfi_def_cfa_offset 16
332; X64-NOF16C-NEXT:    pinsrw $0, _a(%rip), %xmm0
333; X64-NOF16C-NEXT:    callq ___extendhfsf2
334; X64-NOF16C-NEXT:    movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill
335; X64-NOF16C-NEXT:    pinsrw $0, _b(%rip), %xmm0
336; X64-NOF16C-NEXT:    callq ___extendhfsf2
337; X64-NOF16C-NEXT:    addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 4-byte Folded Reload
338; X64-NOF16C-NEXT:    callq ___truncsfhf2
339; X64-NOF16C-NEXT:    pextrw $0, %xmm0, %eax
340; X64-NOF16C-NEXT:    movw %ax, _c(%rip)
341; X64-NOF16C-NEXT:    popq %rax
342; X64-NOF16C-NEXT:    retq
343;
344; X64-F16C-LABEL: add:
345; X64-F16C:       ## %bb.0:
346; X64-F16C-NEXT:    movzwl _a(%rip), %eax
347; X64-F16C-NEXT:    vmovd %eax, %xmm0
348; X64-F16C-NEXT:    vcvtph2ps %xmm0, %xmm0
349; X64-F16C-NEXT:    movzwl _b(%rip), %eax
350; X64-F16C-NEXT:    vmovd %eax, %xmm1
351; X64-F16C-NEXT:    vcvtph2ps %xmm1, %xmm1
352; X64-F16C-NEXT:    vaddss %xmm1, %xmm0, %xmm0
353; X64-F16C-NEXT:    vxorps %xmm1, %xmm1, %xmm1
354; X64-F16C-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3]
355; X64-F16C-NEXT:    vcvtps2ph $4, %xmm0, %xmm0
356; X64-F16C-NEXT:    vmovd %xmm0, %eax
357; X64-F16C-NEXT:    movw %ax, _c(%rip)
358; X64-F16C-NEXT:    retq
359  %1 = load half, ptr @a, align 2
360  %2 = tail call float @llvm.experimental.constrained.fpext.f32.f16(half %1, metadata !"fpexcept.strict") #0
361  %3 = load half, ptr @b, align 2
362  %4 = tail call float @llvm.experimental.constrained.fpext.f32.f16(half %3, metadata !"fpexcept.strict") #0
363  %5 = tail call float @llvm.experimental.constrained.fadd.f32(float %2, float %4, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
364  %6 = tail call half @llvm.experimental.constrained.fptrunc.f16.f32(float %5, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
365  store half %6, ptr @c, align 2
366  ret void
367}
368
369declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata)
370declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata)
371declare x86_fp80 @llvm.experimental.constrained.fpext.f80.f16(half, metadata)
372declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
373declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata)
374declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata)
375declare half @llvm.experimental.constrained.fptrunc.f16.f80(x86_fp80, metadata, metadata)
376
377attributes #0 = { strictfp }
378
379