1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=i686-apple-darwin | FileCheck %s --check-prefix=X32-NOF16C 3; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=f16c | FileCheck %s --check-prefix=X32-F16C 4; RUN: llc < %s -mtriple=x86_64-apple-darwin | FileCheck %s --check-prefix=X64-NOF16C 5; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=f16c | FileCheck %s --check-prefix=X64-F16C 6 7@a = global half 0xH0000, align 2 8@b = global half 0xH0000, align 2 9@c = global half 0xH0000, align 2 10 11define float @half_to_float() strictfp { 12; X32-NOF16C-LABEL: half_to_float: 13; X32-NOF16C: ## %bb.0: 14; X32-NOF16C-NEXT: subl $12, %esp 15; X32-NOF16C-NEXT: .cfi_def_cfa_offset 16 16; X32-NOF16C-NEXT: movzwl _a, %eax 17; X32-NOF16C-NEXT: movl %eax, (%esp) 18; X32-NOF16C-NEXT: calll ___extendhfsf2 19; X32-NOF16C-NEXT: addl $12, %esp 20; X32-NOF16C-NEXT: retl 21; 22; X32-F16C-LABEL: half_to_float: 23; X32-F16C: ## %bb.0: 24; X32-F16C-NEXT: pushl %eax 25; X32-F16C-NEXT: .cfi_def_cfa_offset 8 26; X32-F16C-NEXT: movzwl _a, %eax 27; X32-F16C-NEXT: vmovd %eax, %xmm0 28; X32-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 29; X32-F16C-NEXT: vmovss %xmm0, (%esp) 30; X32-F16C-NEXT: flds (%esp) 31; X32-F16C-NEXT: wait 32; X32-F16C-NEXT: popl %eax 33; X32-F16C-NEXT: retl 34; 35; X64-NOF16C-LABEL: half_to_float: 36; X64-NOF16C: ## %bb.0: 37; X64-NOF16C-NEXT: pushq %rax 38; X64-NOF16C-NEXT: .cfi_def_cfa_offset 16 39; X64-NOF16C-NEXT: pinsrw $0, _a(%rip), %xmm0 40; X64-NOF16C-NEXT: callq ___extendhfsf2 41; X64-NOF16C-NEXT: popq %rax 42; X64-NOF16C-NEXT: retq 43; 44; X64-F16C-LABEL: half_to_float: 45; X64-F16C: ## %bb.0: 46; X64-F16C-NEXT: movzwl _a(%rip), %eax 47; X64-F16C-NEXT: vmovd %eax, %xmm0 48; X64-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 49; X64-F16C-NEXT: retq 50 %1 = load half, ptr @a, align 2 51 %2 = tail call float @llvm.experimental.constrained.fpext.f32.f16(half %1, metadata !"fpexcept.strict") #0 52 ret float %2 53} 54 55define double @half_to_double() strictfp { 56; X32-NOF16C-LABEL: half_to_double: 57; X32-NOF16C: ## %bb.0: 58; X32-NOF16C-NEXT: subl $12, %esp 59; X32-NOF16C-NEXT: .cfi_def_cfa_offset 16 60; X32-NOF16C-NEXT: movzwl _a, %eax 61; X32-NOF16C-NEXT: movl %eax, (%esp) 62; X32-NOF16C-NEXT: calll ___extendhfsf2 63; X32-NOF16C-NEXT: addl $12, %esp 64; X32-NOF16C-NEXT: retl 65; 66; X32-F16C-LABEL: half_to_double: 67; X32-F16C: ## %bb.0: 68; X32-F16C-NEXT: subl $12, %esp 69; X32-F16C-NEXT: .cfi_def_cfa_offset 16 70; X32-F16C-NEXT: movzwl _a, %eax 71; X32-F16C-NEXT: vmovd %eax, %xmm0 72; X32-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 73; X32-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 74; X32-F16C-NEXT: vmovsd %xmm0, (%esp) 75; X32-F16C-NEXT: fldl (%esp) 76; X32-F16C-NEXT: wait 77; X32-F16C-NEXT: addl $12, %esp 78; X32-F16C-NEXT: retl 79; 80; X64-NOF16C-LABEL: half_to_double: 81; X64-NOF16C: ## %bb.0: 82; X64-NOF16C-NEXT: pushq %rax 83; X64-NOF16C-NEXT: .cfi_def_cfa_offset 16 84; X64-NOF16C-NEXT: pinsrw $0, _a(%rip), %xmm0 85; X64-NOF16C-NEXT: callq ___extendhfsf2 86; X64-NOF16C-NEXT: cvtss2sd %xmm0, %xmm0 87; X64-NOF16C-NEXT: popq %rax 88; X64-NOF16C-NEXT: retq 89; 90; X64-F16C-LABEL: half_to_double: 91; X64-F16C: ## %bb.0: 92; X64-F16C-NEXT: movzwl _a(%rip), %eax 93; X64-F16C-NEXT: vmovd %eax, %xmm0 94; X64-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 95; X64-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 96; X64-F16C-NEXT: retq 97 %1 = load half, ptr @a, align 2 98 %2 = tail call double @llvm.experimental.constrained.fpext.f64.f16(half %1, metadata !"fpexcept.strict") #0 99 ret double %2 100} 101 102define x86_fp80 @half_to_fp80() strictfp { 103; X32-NOF16C-LABEL: half_to_fp80: 104; X32-NOF16C: ## %bb.0: 105; X32-NOF16C-NEXT: subl $12, %esp 106; X32-NOF16C-NEXT: .cfi_def_cfa_offset 16 107; X32-NOF16C-NEXT: movzwl _a, %eax 108; X32-NOF16C-NEXT: movl %eax, (%esp) 109; X32-NOF16C-NEXT: calll ___extendhfsf2 110; X32-NOF16C-NEXT: addl $12, %esp 111; X32-NOF16C-NEXT: retl 112; 113; X32-F16C-LABEL: half_to_fp80: 114; X32-F16C: ## %bb.0: 115; X32-F16C-NEXT: subl $12, %esp 116; X32-F16C-NEXT: .cfi_def_cfa_offset 16 117; X32-F16C-NEXT: vpinsrw $0, _a, %xmm0, %xmm0 118; X32-F16C-NEXT: vpextrw $0, %xmm0, (%esp) 119; X32-F16C-NEXT: calll ___extendhfxf2 120; X32-F16C-NEXT: addl $12, %esp 121; X32-F16C-NEXT: retl 122; 123; X64-NOF16C-LABEL: half_to_fp80: 124; X64-NOF16C: ## %bb.0: 125; X64-NOF16C-NEXT: pushq %rax 126; X64-NOF16C-NEXT: .cfi_def_cfa_offset 16 127; X64-NOF16C-NEXT: pinsrw $0, _a(%rip), %xmm0 128; X64-NOF16C-NEXT: callq ___extendhfxf2 129; X64-NOF16C-NEXT: popq %rax 130; X64-NOF16C-NEXT: retq 131; 132; X64-F16C-LABEL: half_to_fp80: 133; X64-F16C: ## %bb.0: 134; X64-F16C-NEXT: pushq %rax 135; X64-F16C-NEXT: .cfi_def_cfa_offset 16 136; X64-F16C-NEXT: vpinsrw $0, _a(%rip), %xmm0, %xmm0 137; X64-F16C-NEXT: callq ___extendhfxf2 138; X64-F16C-NEXT: popq %rax 139; X64-F16C-NEXT: retq 140 %1 = load half, ptr @a, align 2 141 %2 = tail call x86_fp80 @llvm.experimental.constrained.fpext.f80.f16(half %1, metadata !"fpexcept.strict") #0 142 ret x86_fp80 %2 143} 144 145define void @float_to_half(float %0) strictfp { 146; X32-NOF16C-LABEL: float_to_half: 147; X32-NOF16C: ## %bb.0: 148; X32-NOF16C-NEXT: subl $12, %esp 149; X32-NOF16C-NEXT: .cfi_def_cfa_offset 16 150; X32-NOF16C-NEXT: flds {{[0-9]+}}(%esp) 151; X32-NOF16C-NEXT: fstps (%esp) 152; X32-NOF16C-NEXT: wait 153; X32-NOF16C-NEXT: calll ___truncsfhf2 154; X32-NOF16C-NEXT: movw %ax, _a 155; X32-NOF16C-NEXT: addl $12, %esp 156; X32-NOF16C-NEXT: retl 157; 158; X32-F16C-LABEL: float_to_half: 159; X32-F16C: ## %bb.0: 160; X32-F16C-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 161; X32-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 162; X32-F16C-NEXT: vmovd %xmm0, %eax 163; X32-F16C-NEXT: movw %ax, _a 164; X32-F16C-NEXT: retl 165; 166; X64-NOF16C-LABEL: float_to_half: 167; X64-NOF16C: ## %bb.0: 168; X64-NOF16C-NEXT: pushq %rax 169; X64-NOF16C-NEXT: .cfi_def_cfa_offset 16 170; X64-NOF16C-NEXT: callq ___truncsfhf2 171; X64-NOF16C-NEXT: pextrw $0, %xmm0, %eax 172; X64-NOF16C-NEXT: movw %ax, _a(%rip) 173; X64-NOF16C-NEXT: popq %rax 174; X64-NOF16C-NEXT: retq 175; 176; X64-F16C-LABEL: float_to_half: 177; X64-F16C: ## %bb.0: 178; X64-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1 179; X64-F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 180; X64-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 181; X64-F16C-NEXT: vmovd %xmm0, %eax 182; X64-F16C-NEXT: movw %ax, _a(%rip) 183; X64-F16C-NEXT: retq 184 %2 = tail call half @llvm.experimental.constrained.fptrunc.f16.f32(float %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 185 store half %2, ptr @a, align 2 186 ret void 187} 188 189define void @double_to_half(double %0) strictfp { 190; X32-NOF16C-LABEL: double_to_half: 191; X32-NOF16C: ## %bb.0: 192; X32-NOF16C-NEXT: subl $12, %esp 193; X32-NOF16C-NEXT: .cfi_def_cfa_offset 16 194; X32-NOF16C-NEXT: fldl {{[0-9]+}}(%esp) 195; X32-NOF16C-NEXT: fstpl (%esp) 196; X32-NOF16C-NEXT: wait 197; X32-NOF16C-NEXT: calll ___truncdfhf2 198; X32-NOF16C-NEXT: movw %ax, _a 199; X32-NOF16C-NEXT: addl $12, %esp 200; X32-NOF16C-NEXT: retl 201; 202; X32-F16C-LABEL: double_to_half: 203; X32-F16C: ## %bb.0: 204; X32-F16C-NEXT: subl $12, %esp 205; X32-F16C-NEXT: .cfi_def_cfa_offset 16 206; X32-F16C-NEXT: vmovq {{.*#+}} xmm0 = mem[0],zero 207; X32-F16C-NEXT: vmovq %xmm0, (%esp) 208; X32-F16C-NEXT: calll ___truncdfhf2 209; X32-F16C-NEXT: vpextrw $0, %xmm0, _a 210; X32-F16C-NEXT: addl $12, %esp 211; X32-F16C-NEXT: retl 212; 213; X64-NOF16C-LABEL: double_to_half: 214; X64-NOF16C: ## %bb.0: 215; X64-NOF16C-NEXT: pushq %rax 216; X64-NOF16C-NEXT: .cfi_def_cfa_offset 16 217; X64-NOF16C-NEXT: callq ___truncdfhf2 218; X64-NOF16C-NEXT: pextrw $0, %xmm0, %eax 219; X64-NOF16C-NEXT: movw %ax, _a(%rip) 220; X64-NOF16C-NEXT: popq %rax 221; X64-NOF16C-NEXT: retq 222; 223; X64-F16C-LABEL: double_to_half: 224; X64-F16C: ## %bb.0: 225; X64-F16C-NEXT: pushq %rax 226; X64-F16C-NEXT: .cfi_def_cfa_offset 16 227; X64-F16C-NEXT: callq ___truncdfhf2 228; X64-F16C-NEXT: vpextrw $0, %xmm0, _a(%rip) 229; X64-F16C-NEXT: popq %rax 230; X64-F16C-NEXT: retq 231 %2 = tail call half @llvm.experimental.constrained.fptrunc.f16.f64(double %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 232 store half %2, ptr @a, align 2 233 ret void 234} 235 236define void @fp80_to_half(x86_fp80 %0) strictfp { 237; X32-NOF16C-LABEL: fp80_to_half: 238; X32-NOF16C: ## %bb.0: 239; X32-NOF16C-NEXT: subl $28, %esp 240; X32-NOF16C-NEXT: .cfi_def_cfa_offset 32 241; X32-NOF16C-NEXT: fldt {{[0-9]+}}(%esp) 242; X32-NOF16C-NEXT: fstpt (%esp) 243; X32-NOF16C-NEXT: wait 244; X32-NOF16C-NEXT: calll ___truncxfhf2 245; X32-NOF16C-NEXT: movw %ax, _a 246; X32-NOF16C-NEXT: addl $28, %esp 247; X32-NOF16C-NEXT: retl 248; 249; X32-F16C-LABEL: fp80_to_half: 250; X32-F16C: ## %bb.0: 251; X32-F16C-NEXT: subl $28, %esp 252; X32-F16C-NEXT: .cfi_def_cfa_offset 32 253; X32-F16C-NEXT: fldt {{[0-9]+}}(%esp) 254; X32-F16C-NEXT: fstpt (%esp) 255; X32-F16C-NEXT: wait 256; X32-F16C-NEXT: calll ___truncxfhf2 257; X32-F16C-NEXT: vpextrw $0, %xmm0, _a 258; X32-F16C-NEXT: addl $28, %esp 259; X32-F16C-NEXT: retl 260; 261; X64-NOF16C-LABEL: fp80_to_half: 262; X64-NOF16C: ## %bb.0: 263; X64-NOF16C-NEXT: subq $24, %rsp 264; X64-NOF16C-NEXT: .cfi_def_cfa_offset 32 265; X64-NOF16C-NEXT: fldt {{[0-9]+}}(%rsp) 266; X64-NOF16C-NEXT: fstpt (%rsp) 267; X64-NOF16C-NEXT: wait 268; X64-NOF16C-NEXT: callq ___truncxfhf2 269; X64-NOF16C-NEXT: pextrw $0, %xmm0, %eax 270; X64-NOF16C-NEXT: movw %ax, _a(%rip) 271; X64-NOF16C-NEXT: addq $24, %rsp 272; X64-NOF16C-NEXT: retq 273; 274; X64-F16C-LABEL: fp80_to_half: 275; X64-F16C: ## %bb.0: 276; X64-F16C-NEXT: subq $24, %rsp 277; X64-F16C-NEXT: .cfi_def_cfa_offset 32 278; X64-F16C-NEXT: fldt {{[0-9]+}}(%rsp) 279; X64-F16C-NEXT: fstpt (%rsp) 280; X64-F16C-NEXT: wait 281; X64-F16C-NEXT: callq ___truncxfhf2 282; X64-F16C-NEXT: vpextrw $0, %xmm0, _a(%rip) 283; X64-F16C-NEXT: addq $24, %rsp 284; X64-F16C-NEXT: retq 285 %2 = tail call half @llvm.experimental.constrained.fptrunc.f16.f80(x86_fp80 %0, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 286 store half %2, ptr @a, align 2 287 ret void 288} 289 290define void @add() strictfp { 291; X32-NOF16C-LABEL: add: 292; X32-NOF16C: ## %bb.0: 293; X32-NOF16C-NEXT: subl $12, %esp 294; X32-NOF16C-NEXT: .cfi_def_cfa_offset 16 295; X32-NOF16C-NEXT: movzwl _a, %eax 296; X32-NOF16C-NEXT: movl %eax, (%esp) 297; X32-NOF16C-NEXT: calll ___extendhfsf2 298; X32-NOF16C-NEXT: fstps {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Spill 299; X32-NOF16C-NEXT: wait 300; X32-NOF16C-NEXT: movzwl _b, %eax 301; X32-NOF16C-NEXT: movl %eax, (%esp) 302; X32-NOF16C-NEXT: calll ___extendhfsf2 303; X32-NOF16C-NEXT: flds {{[-0-9]+}}(%e{{[sb]}}p) ## 4-byte Folded Reload 304; X32-NOF16C-NEXT: faddp %st, %st(1) 305; X32-NOF16C-NEXT: fstps (%esp) 306; X32-NOF16C-NEXT: wait 307; X32-NOF16C-NEXT: calll ___truncsfhf2 308; X32-NOF16C-NEXT: movw %ax, _c 309; X32-NOF16C-NEXT: addl $12, %esp 310; X32-NOF16C-NEXT: retl 311; 312; X32-F16C-LABEL: add: 313; X32-F16C: ## %bb.0: 314; X32-F16C-NEXT: movzwl _a, %eax 315; X32-F16C-NEXT: vmovd %eax, %xmm0 316; X32-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 317; X32-F16C-NEXT: movzwl _b, %eax 318; X32-F16C-NEXT: vmovd %eax, %xmm1 319; X32-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 320; X32-F16C-NEXT: vaddss %xmm1, %xmm0, %xmm0 321; X32-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1 322; X32-F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 323; X32-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 324; X32-F16C-NEXT: vmovd %xmm0, %eax 325; X32-F16C-NEXT: movw %ax, _c 326; X32-F16C-NEXT: retl 327; 328; X64-NOF16C-LABEL: add: 329; X64-NOF16C: ## %bb.0: 330; X64-NOF16C-NEXT: pushq %rax 331; X64-NOF16C-NEXT: .cfi_def_cfa_offset 16 332; X64-NOF16C-NEXT: pinsrw $0, _a(%rip), %xmm0 333; X64-NOF16C-NEXT: callq ___extendhfsf2 334; X64-NOF16C-NEXT: movd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) ## 4-byte Folded Spill 335; X64-NOF16C-NEXT: pinsrw $0, _b(%rip), %xmm0 336; X64-NOF16C-NEXT: callq ___extendhfsf2 337; X64-NOF16C-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 ## 4-byte Folded Reload 338; X64-NOF16C-NEXT: callq ___truncsfhf2 339; X64-NOF16C-NEXT: pextrw $0, %xmm0, %eax 340; X64-NOF16C-NEXT: movw %ax, _c(%rip) 341; X64-NOF16C-NEXT: popq %rax 342; X64-NOF16C-NEXT: retq 343; 344; X64-F16C-LABEL: add: 345; X64-F16C: ## %bb.0: 346; X64-F16C-NEXT: movzwl _a(%rip), %eax 347; X64-F16C-NEXT: vmovd %eax, %xmm0 348; X64-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 349; X64-F16C-NEXT: movzwl _b(%rip), %eax 350; X64-F16C-NEXT: vmovd %eax, %xmm1 351; X64-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 352; X64-F16C-NEXT: vaddss %xmm1, %xmm0, %xmm0 353; X64-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1 354; X64-F16C-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1,2,3] 355; X64-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 356; X64-F16C-NEXT: vmovd %xmm0, %eax 357; X64-F16C-NEXT: movw %ax, _c(%rip) 358; X64-F16C-NEXT: retq 359 %1 = load half, ptr @a, align 2 360 %2 = tail call float @llvm.experimental.constrained.fpext.f32.f16(half %1, metadata !"fpexcept.strict") #0 361 %3 = load half, ptr @b, align 2 362 %4 = tail call float @llvm.experimental.constrained.fpext.f32.f16(half %3, metadata !"fpexcept.strict") #0 363 %5 = tail call float @llvm.experimental.constrained.fadd.f32(float %2, float %4, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 364 %6 = tail call half @llvm.experimental.constrained.fptrunc.f16.f32(float %5, metadata !"round.tonearest", metadata !"fpexcept.strict") #0 365 store half %6, ptr @c, align 2 366 ret void 367} 368 369declare float @llvm.experimental.constrained.fpext.f32.f16(half, metadata) 370declare double @llvm.experimental.constrained.fpext.f64.f16(half, metadata) 371declare x86_fp80 @llvm.experimental.constrained.fpext.f80.f16(half, metadata) 372declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata) 373declare half @llvm.experimental.constrained.fptrunc.f16.f32(float, metadata, metadata) 374declare half @llvm.experimental.constrained.fptrunc.f16.f64(double, metadata, metadata) 375declare half @llvm.experimental.constrained.fptrunc.f16.f80(x86_fp80, metadata, metadata) 376 377attributes #0 = { strictfp } 378 379