1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=1 \ 3; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWON,BWON-NOF16C 4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=0 \ 5; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWOFF 6; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+f16c -fixup-byte-word-insts=1 \ 7; RUN: | FileCheck %s -check-prefixes=CHECK,BWON,BWON-F16C 8; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -fixup-byte-word-insts=0 \ 9; RUN: | FileCheck %s -check-prefixes=CHECK-I686 10 11define void @test_load_store(half* %in, half* %out) #0 { 12; BWON-LABEL: test_load_store: 13; BWON: # %bb.0: 14; BWON-NEXT: movzwl (%rdi), %eax 15; BWON-NEXT: movw %ax, (%rsi) 16; BWON-NEXT: retq 17; 18; BWOFF-LABEL: test_load_store: 19; BWOFF: # %bb.0: 20; BWOFF-NEXT: movw (%rdi), %ax 21; BWOFF-NEXT: movw %ax, (%rsi) 22; BWOFF-NEXT: retq 23; 24; CHECK-I686-LABEL: test_load_store: 25; CHECK-I686: # %bb.0: 26; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 27; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ecx 28; CHECK-I686-NEXT: movw (%ecx), %cx 29; CHECK-I686-NEXT: movw %cx, (%eax) 30; CHECK-I686-NEXT: retl 31 %val = load half, half* %in 32 store half %val, half* %out 33 ret void 34} 35 36define i16 @test_bitcast_from_half(half* %addr) #0 { 37; BWON-LABEL: test_bitcast_from_half: 38; BWON: # %bb.0: 39; BWON-NEXT: movzwl (%rdi), %eax 40; BWON-NEXT: retq 41; 42; BWOFF-LABEL: test_bitcast_from_half: 43; BWOFF: # %bb.0: 44; BWOFF-NEXT: movw (%rdi), %ax 45; BWOFF-NEXT: retq 46; 47; CHECK-I686-LABEL: test_bitcast_from_half: 48; CHECK-I686: # %bb.0: 49; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 50; CHECK-I686-NEXT: movw (%eax), %ax 51; CHECK-I686-NEXT: retl 52 %val = load half, half* %addr 53 %val_int = bitcast half %val to i16 54 ret i16 %val_int 55} 56 57define void @test_bitcast_to_half(half* %addr, i16 %in) #0 { 58; CHECK-LABEL: test_bitcast_to_half: 59; CHECK: # %bb.0: 60; CHECK-NEXT: movw %si, (%rdi) 61; CHECK-NEXT: retq 62; 63; CHECK-I686-LABEL: test_bitcast_to_half: 64; CHECK-I686: # %bb.0: 65; CHECK-I686-NEXT: movw {{[0-9]+}}(%esp), %ax 66; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ecx 67; CHECK-I686-NEXT: movw %ax, (%ecx) 68; CHECK-I686-NEXT: retl 69 %val_fp = bitcast i16 %in to half 70 store half %val_fp, half* %addr 71 ret void 72} 73 74define float @test_extend32(half* %addr) #0 { 75; CHECK-LIBCALL-LABEL: test_extend32: 76; CHECK-LIBCALL: # %bb.0: 77; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 78; CHECK-LIBCALL-NEXT: jmp __gnu_h2f_ieee@PLT # TAILCALL 79; 80; BWON-F16C-LABEL: test_extend32: 81; BWON-F16C: # %bb.0: 82; BWON-F16C-NEXT: movzwl (%rdi), %eax 83; BWON-F16C-NEXT: vmovd %eax, %xmm0 84; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 85; BWON-F16C-NEXT: retq 86; 87; CHECK-I686-LABEL: test_extend32: 88; CHECK-I686: # %bb.0: 89; CHECK-I686-NEXT: subl $12, %esp 90; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 91; CHECK-I686-NEXT: movzwl (%eax), %eax 92; CHECK-I686-NEXT: movl %eax, (%esp) 93; CHECK-I686-NEXT: calll __gnu_h2f_ieee 94; CHECK-I686-NEXT: addl $12, %esp 95; CHECK-I686-NEXT: retl 96 %val16 = load half, half* %addr 97 %val32 = fpext half %val16 to float 98 ret float %val32 99} 100 101define double @test_extend64(half* %addr) #0 { 102; CHECK-LIBCALL-LABEL: test_extend64: 103; CHECK-LIBCALL: # %bb.0: 104; CHECK-LIBCALL-NEXT: pushq %rax 105; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 106; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee@PLT 107; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 108; CHECK-LIBCALL-NEXT: popq %rax 109; CHECK-LIBCALL-NEXT: retq 110; 111; BWON-F16C-LABEL: test_extend64: 112; BWON-F16C: # %bb.0: 113; BWON-F16C-NEXT: movzwl (%rdi), %eax 114; BWON-F16C-NEXT: vmovd %eax, %xmm0 115; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 116; BWON-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 117; BWON-F16C-NEXT: retq 118; 119; CHECK-I686-LABEL: test_extend64: 120; CHECK-I686: # %bb.0: 121; CHECK-I686-NEXT: subl $12, %esp 122; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 123; CHECK-I686-NEXT: movzwl (%eax), %eax 124; CHECK-I686-NEXT: movl %eax, (%esp) 125; CHECK-I686-NEXT: calll __gnu_h2f_ieee 126; CHECK-I686-NEXT: addl $12, %esp 127; CHECK-I686-NEXT: retl 128 %val16 = load half, half* %addr 129 %val32 = fpext half %val16 to double 130 ret double %val32 131} 132 133define void @test_trunc32(float %in, half* %addr) #0 { 134; CHECK-LIBCALL-LABEL: test_trunc32: 135; CHECK-LIBCALL: # %bb.0: 136; CHECK-LIBCALL-NEXT: pushq %rbx 137; CHECK-LIBCALL-NEXT: movq %rdi, %rbx 138; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee@PLT 139; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 140; CHECK-LIBCALL-NEXT: popq %rbx 141; CHECK-LIBCALL-NEXT: retq 142; 143; BWON-F16C-LABEL: test_trunc32: 144; BWON-F16C: # %bb.0: 145; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 146; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rdi) 147; BWON-F16C-NEXT: retq 148; 149; CHECK-I686-LABEL: test_trunc32: 150; CHECK-I686: # %bb.0: 151; CHECK-I686-NEXT: pushl %esi 152; CHECK-I686-NEXT: subl $8, %esp 153; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 154; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 155; CHECK-I686-NEXT: movss %xmm0, (%esp) 156; CHECK-I686-NEXT: calll __gnu_f2h_ieee 157; CHECK-I686-NEXT: movw %ax, (%esi) 158; CHECK-I686-NEXT: addl $8, %esp 159; CHECK-I686-NEXT: popl %esi 160; CHECK-I686-NEXT: retl 161 %val16 = fptrunc float %in to half 162 store half %val16, half* %addr 163 ret void 164} 165 166define void @test_trunc64(double %in, half* %addr) #0 { 167; CHECK-LABEL: test_trunc64: 168; CHECK: # %bb.0: 169; CHECK-NEXT: pushq %rbx 170; CHECK-NEXT: movq %rdi, %rbx 171; CHECK-NEXT: callq __truncdfhf2@PLT 172; CHECK-NEXT: movw %ax, (%rbx) 173; CHECK-NEXT: popq %rbx 174; CHECK-NEXT: retq 175; 176; CHECK-I686-LABEL: test_trunc64: 177; CHECK-I686: # %bb.0: 178; CHECK-I686-NEXT: pushl %esi 179; CHECK-I686-NEXT: subl $8, %esp 180; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 181; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 182; CHECK-I686-NEXT: movsd %xmm0, (%esp) 183; CHECK-I686-NEXT: calll __truncdfhf2 184; CHECK-I686-NEXT: movw %ax, (%esi) 185; CHECK-I686-NEXT: addl $8, %esp 186; CHECK-I686-NEXT: popl %esi 187; CHECK-I686-NEXT: retl 188 %val16 = fptrunc double %in to half 189 store half %val16, half* %addr 190 ret void 191} 192 193define i64 @test_fptosi_i64(half* %p) #0 { 194; CHECK-LIBCALL-LABEL: test_fptosi_i64: 195; CHECK-LIBCALL: # %bb.0: 196; CHECK-LIBCALL-NEXT: pushq %rax 197; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 198; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee@PLT 199; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax 200; CHECK-LIBCALL-NEXT: popq %rcx 201; CHECK-LIBCALL-NEXT: retq 202; 203; BWON-F16C-LABEL: test_fptosi_i64: 204; BWON-F16C: # %bb.0: 205; BWON-F16C-NEXT: movzwl (%rdi), %eax 206; BWON-F16C-NEXT: vmovd %eax, %xmm0 207; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 208; BWON-F16C-NEXT: vcvttss2si %xmm0, %rax 209; BWON-F16C-NEXT: retq 210; 211; CHECK-I686-LABEL: test_fptosi_i64: 212; CHECK-I686: # %bb.0: 213; CHECK-I686-NEXT: subl $28, %esp 214; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 215; CHECK-I686-NEXT: movzwl (%eax), %eax 216; CHECK-I686-NEXT: movl %eax, (%esp) 217; CHECK-I686-NEXT: calll __gnu_h2f_ieee 218; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 219; CHECK-I686-NEXT: flds {{[0-9]+}}(%esp) 220; CHECK-I686-NEXT: fnstcw {{[0-9]+}}(%esp) 221; CHECK-I686-NEXT: movzwl {{[0-9]+}}(%esp), %eax 222; CHECK-I686-NEXT: orl $3072, %eax # imm = 0xC00 223; CHECK-I686-NEXT: movw %ax, {{[0-9]+}}(%esp) 224; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp) 225; CHECK-I686-NEXT: fistpll {{[0-9]+}}(%esp) 226; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp) 227; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 228; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %edx 229; CHECK-I686-NEXT: addl $28, %esp 230; CHECK-I686-NEXT: retl 231 %a = load half, half* %p, align 2 232 %r = fptosi half %a to i64 233 ret i64 %r 234} 235 236define void @test_sitofp_i64(i64 %a, half* %p) #0 { 237; CHECK-LIBCALL-LABEL: test_sitofp_i64: 238; CHECK-LIBCALL: # %bb.0: 239; CHECK-LIBCALL-NEXT: pushq %rbx 240; CHECK-LIBCALL-NEXT: movq %rsi, %rbx 241; CHECK-LIBCALL-NEXT: cvtsi2ss %rdi, %xmm0 242; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee@PLT 243; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 244; CHECK-LIBCALL-NEXT: popq %rbx 245; CHECK-LIBCALL-NEXT: retq 246; 247; BWON-F16C-LABEL: test_sitofp_i64: 248; BWON-F16C: # %bb.0: 249; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 250; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 251; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rsi) 252; BWON-F16C-NEXT: retq 253; 254; CHECK-I686-LABEL: test_sitofp_i64: 255; CHECK-I686: # %bb.0: 256; CHECK-I686-NEXT: pushl %esi 257; CHECK-I686-NEXT: subl $24, %esp 258; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 259; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 260; CHECK-I686-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) 261; CHECK-I686-NEXT: fildll {{[0-9]+}}(%esp) 262; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 263; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 264; CHECK-I686-NEXT: movss %xmm0, (%esp) 265; CHECK-I686-NEXT: calll __gnu_f2h_ieee 266; CHECK-I686-NEXT: movw %ax, (%esi) 267; CHECK-I686-NEXT: addl $24, %esp 268; CHECK-I686-NEXT: popl %esi 269; CHECK-I686-NEXT: retl 270 %r = sitofp i64 %a to half 271 store half %r, half* %p 272 ret void 273} 274 275define i64 @test_fptoui_i64(half* %p) #0 { 276; CHECK-LIBCALL-LABEL: test_fptoui_i64: 277; CHECK-LIBCALL: # %bb.0: 278; CHECK-LIBCALL-NEXT: pushq %rax 279; CHECK-LIBCALL-NEXT: movzwl (%rdi), %edi 280; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee@PLT 281; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rcx 282; CHECK-LIBCALL-NEXT: movq %rcx, %rdx 283; CHECK-LIBCALL-NEXT: sarq $63, %rdx 284; CHECK-LIBCALL-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 285; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax 286; CHECK-LIBCALL-NEXT: andq %rdx, %rax 287; CHECK-LIBCALL-NEXT: orq %rcx, %rax 288; CHECK-LIBCALL-NEXT: popq %rcx 289; CHECK-LIBCALL-NEXT: retq 290; 291; BWON-F16C-LABEL: test_fptoui_i64: 292; BWON-F16C: # %bb.0: 293; BWON-F16C-NEXT: movzwl (%rdi), %eax 294; BWON-F16C-NEXT: vmovd %eax, %xmm0 295; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 296; BWON-F16C-NEXT: vcvttss2si %xmm0, %rcx 297; BWON-F16C-NEXT: movq %rcx, %rdx 298; BWON-F16C-NEXT: sarq $63, %rdx 299; BWON-F16C-NEXT: vsubss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 300; BWON-F16C-NEXT: vcvttss2si %xmm0, %rax 301; BWON-F16C-NEXT: andq %rdx, %rax 302; BWON-F16C-NEXT: orq %rcx, %rax 303; BWON-F16C-NEXT: retq 304; 305; CHECK-I686-LABEL: test_fptoui_i64: 306; CHECK-I686: # %bb.0: 307; CHECK-I686-NEXT: subl $28, %esp 308; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 309; CHECK-I686-NEXT: movzwl (%eax), %eax 310; CHECK-I686-NEXT: movl %eax, (%esp) 311; CHECK-I686-NEXT: calll __gnu_h2f_ieee 312; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 313; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 314; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 315; CHECK-I686-NEXT: ucomiss %xmm1, %xmm0 316; CHECK-I686-NEXT: jae .LBB9_2 317; CHECK-I686-NEXT: # %bb.1: 318; CHECK-I686-NEXT: xorps %xmm1, %xmm1 319; CHECK-I686-NEXT: .LBB9_2: 320; CHECK-I686-NEXT: subss %xmm1, %xmm0 321; CHECK-I686-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 322; CHECK-I686-NEXT: setae %al 323; CHECK-I686-NEXT: flds {{[0-9]+}}(%esp) 324; CHECK-I686-NEXT: fnstcw {{[0-9]+}}(%esp) 325; CHECK-I686-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 326; CHECK-I686-NEXT: orl $3072, %ecx # imm = 0xC00 327; CHECK-I686-NEXT: movw %cx, {{[0-9]+}}(%esp) 328; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp) 329; CHECK-I686-NEXT: fistpll {{[0-9]+}}(%esp) 330; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp) 331; CHECK-I686-NEXT: movzbl %al, %edx 332; CHECK-I686-NEXT: shll $31, %edx 333; CHECK-I686-NEXT: xorl {{[0-9]+}}(%esp), %edx 334; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 335; CHECK-I686-NEXT: addl $28, %esp 336; CHECK-I686-NEXT: retl 337 %a = load half, half* %p, align 2 338 %r = fptoui half %a to i64 339 ret i64 %r 340} 341 342define void @test_uitofp_i64(i64 %a, half* %p) #0 { 343; CHECK-LIBCALL-LABEL: test_uitofp_i64: 344; CHECK-LIBCALL: # %bb.0: 345; CHECK-LIBCALL-NEXT: pushq %rbx 346; CHECK-LIBCALL-NEXT: movq %rsi, %rbx 347; CHECK-LIBCALL-NEXT: testq %rdi, %rdi 348; CHECK-LIBCALL-NEXT: js .LBB10_1 349; CHECK-LIBCALL-NEXT: # %bb.2: 350; CHECK-LIBCALL-NEXT: cvtsi2ss %rdi, %xmm0 351; CHECK-LIBCALL-NEXT: jmp .LBB10_3 352; CHECK-LIBCALL-NEXT: .LBB10_1: 353; CHECK-LIBCALL-NEXT: movq %rdi, %rax 354; CHECK-LIBCALL-NEXT: shrq %rax 355; CHECK-LIBCALL-NEXT: andl $1, %edi 356; CHECK-LIBCALL-NEXT: orq %rax, %rdi 357; CHECK-LIBCALL-NEXT: cvtsi2ss %rdi, %xmm0 358; CHECK-LIBCALL-NEXT: addss %xmm0, %xmm0 359; CHECK-LIBCALL-NEXT: .LBB10_3: 360; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee@PLT 361; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 362; CHECK-LIBCALL-NEXT: popq %rbx 363; CHECK-LIBCALL-NEXT: retq 364; 365; BWON-F16C-LABEL: test_uitofp_i64: 366; BWON-F16C: # %bb.0: 367; BWON-F16C-NEXT: testq %rdi, %rdi 368; BWON-F16C-NEXT: js .LBB10_1 369; BWON-F16C-NEXT: # %bb.2: 370; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 371; BWON-F16C-NEXT: jmp .LBB10_3 372; BWON-F16C-NEXT: .LBB10_1: 373; BWON-F16C-NEXT: movq %rdi, %rax 374; BWON-F16C-NEXT: shrq %rax 375; BWON-F16C-NEXT: andl $1, %edi 376; BWON-F16C-NEXT: orq %rax, %rdi 377; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 378; BWON-F16C-NEXT: vaddss %xmm0, %xmm0, %xmm0 379; BWON-F16C-NEXT: .LBB10_3: 380; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 381; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rsi) 382; BWON-F16C-NEXT: retq 383; 384; CHECK-I686-LABEL: test_uitofp_i64: 385; CHECK-I686: # %bb.0: 386; CHECK-I686-NEXT: pushl %esi 387; CHECK-I686-NEXT: subl $24, %esp 388; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 389; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 390; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 391; CHECK-I686-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) 392; CHECK-I686-NEXT: shrl $31, %eax 393; CHECK-I686-NEXT: fildll {{[0-9]+}}(%esp) 394; CHECK-I686-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 395; CHECK-I686-NEXT: fstps (%esp) 396; CHECK-I686-NEXT: calll __gnu_f2h_ieee 397; CHECK-I686-NEXT: movw %ax, (%esi) 398; CHECK-I686-NEXT: addl $24, %esp 399; CHECK-I686-NEXT: popl %esi 400; CHECK-I686-NEXT: retl 401 %r = uitofp i64 %a to half 402 store half %r, half* %p 403 ret void 404} 405 406define <4 x float> @test_extend32_vec4(<4 x half>* %p) #0 { 407; CHECK-LIBCALL-LABEL: test_extend32_vec4: 408; CHECK-LIBCALL: # %bb.0: 409; CHECK-LIBCALL-NEXT: subq $88, %rsp 410; CHECK-LIBCALL-NEXT: movl (%rdi), %eax 411; CHECK-LIBCALL-NEXT: movl 4(%rdi), %ecx 412; CHECK-LIBCALL-NEXT: movl %eax, (%rsp) 413; CHECK-LIBCALL-NEXT: movl %ecx, {{[0-9]+}}(%rsp) 414; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 415; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 416; CHECK-LIBCALL-NEXT: movdqa {{[0-9]+}}(%rsp), %xmm0 417; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 418; CHECK-LIBCALL-NEXT: pextrw $1, %xmm0, %edi 419; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee@PLT 420; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 421; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 422; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %edi 423; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee@PLT 424; CHECK-LIBCALL-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 425; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 426; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 427; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 428; CHECK-LIBCALL-NEXT: pextrw $1, %xmm0, %edi 429; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee@PLT 430; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 431; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 432; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %edi 433; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee@PLT 434; CHECK-LIBCALL-NEXT: punpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 435; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 436; CHECK-LIBCALL-NEXT: punpcklqdq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 437; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0] 438; CHECK-LIBCALL-NEXT: addq $88, %rsp 439; CHECK-LIBCALL-NEXT: retq 440; 441; BWON-F16C-LABEL: test_extend32_vec4: 442; BWON-F16C: # %bb.0: 443; BWON-F16C-NEXT: vcvtph2ps (%rdi), %xmm0 444; BWON-F16C-NEXT: retq 445; 446; CHECK-I686-LABEL: test_extend32_vec4: 447; CHECK-I686: # %bb.0: 448; CHECK-I686-NEXT: subl $124, %esp 449; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 450; CHECK-I686-NEXT: movl (%eax), %ecx 451; CHECK-I686-NEXT: movl 4(%eax), %eax 452; CHECK-I686-NEXT: movl %eax, {{[0-9]+}}(%esp) 453; CHECK-I686-NEXT: movl %ecx, {{[0-9]+}}(%esp) 454; CHECK-I686-NEXT: movaps {{[0-9]+}}(%esp), %xmm0 455; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 456; CHECK-I686-NEXT: movdqa {{[0-9]+}}(%esp), %xmm0 457; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 458; CHECK-I686-NEXT: pextrw $1, %xmm0, %eax 459; CHECK-I686-NEXT: movl %eax, (%esp) 460; CHECK-I686-NEXT: calll __gnu_h2f_ieee 461; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 462; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 463; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 464; CHECK-I686-NEXT: movl %eax, (%esp) 465; CHECK-I686-NEXT: calll __gnu_h2f_ieee 466; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 467; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 468; CHECK-I686-NEXT: pextrw $1, %xmm0, %eax 469; CHECK-I686-NEXT: movl %eax, (%esp) 470; CHECK-I686-NEXT: calll __gnu_h2f_ieee 471; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 472; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 473; CHECK-I686-NEXT: movl %eax, (%esp) 474; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 475; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 476; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 477; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 478; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 479; CHECK-I686-NEXT: calll __gnu_h2f_ieee 480; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 481; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 482; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 483; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 484; CHECK-I686-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 485; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 486; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 487; CHECK-I686-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 488; CHECK-I686-NEXT: addl $124, %esp 489; CHECK-I686-NEXT: retl 490 %a = load <4 x half>, <4 x half>* %p, align 8 491 %b = fpext <4 x half> %a to <4 x float> 492 ret <4 x float> %b 493} 494 495define <4 x double> @test_extend64_vec4(<4 x half>* %p) #0 { 496; CHECK-LIBCALL-LABEL: test_extend64_vec4: 497; CHECK-LIBCALL: # %bb.0: 498; CHECK-LIBCALL-NEXT: pushq %rbp 499; CHECK-LIBCALL-NEXT: pushq %r14 500; CHECK-LIBCALL-NEXT: pushq %rbx 501; CHECK-LIBCALL-NEXT: subq $32, %rsp 502; CHECK-LIBCALL-NEXT: movzwl 4(%rdi), %r14d 503; CHECK-LIBCALL-NEXT: movzwl 6(%rdi), %ebp 504; CHECK-LIBCALL-NEXT: movzwl (%rdi), %ebx 505; CHECK-LIBCALL-NEXT: movzwl 2(%rdi), %edi 506; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee@PLT 507; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 508; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 509; CHECK-LIBCALL-NEXT: movl %ebx, %edi 510; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee@PLT 511; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 512; CHECK-LIBCALL-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 513; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0] 514; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 515; CHECK-LIBCALL-NEXT: movl %ebp, %edi 516; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee@PLT 517; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 518; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 519; CHECK-LIBCALL-NEXT: movl %r14d, %edi 520; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee@PLT 521; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm1 522; CHECK-LIBCALL-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 523; CHECK-LIBCALL-NEXT: # xmm1 = xmm1[0],mem[0] 524; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 525; CHECK-LIBCALL-NEXT: addq $32, %rsp 526; CHECK-LIBCALL-NEXT: popq %rbx 527; CHECK-LIBCALL-NEXT: popq %r14 528; CHECK-LIBCALL-NEXT: popq %rbp 529; CHECK-LIBCALL-NEXT: retq 530; 531; BWON-F16C-LABEL: test_extend64_vec4: 532; BWON-F16C: # %bb.0: 533; BWON-F16C-NEXT: vcvtph2ps (%rdi), %xmm0 534; BWON-F16C-NEXT: vcvtps2pd %xmm0, %ymm0 535; BWON-F16C-NEXT: retq 536; 537; CHECK-I686-LABEL: test_extend64_vec4: 538; CHECK-I686: # %bb.0: 539; CHECK-I686-NEXT: pushl %ebx 540; CHECK-I686-NEXT: pushl %edi 541; CHECK-I686-NEXT: pushl %esi 542; CHECK-I686-NEXT: subl $64, %esp 543; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 544; CHECK-I686-NEXT: movzwl 6(%eax), %esi 545; CHECK-I686-NEXT: movzwl (%eax), %edi 546; CHECK-I686-NEXT: movzwl 2(%eax), %ebx 547; CHECK-I686-NEXT: movzwl 4(%eax), %eax 548; CHECK-I686-NEXT: movl %eax, (%esp) 549; CHECK-I686-NEXT: calll __gnu_h2f_ieee 550; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 551; CHECK-I686-NEXT: movl %ebx, (%esp) 552; CHECK-I686-NEXT: calll __gnu_h2f_ieee 553; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 554; CHECK-I686-NEXT: movl %edi, (%esp) 555; CHECK-I686-NEXT: calll __gnu_h2f_ieee 556; CHECK-I686-NEXT: movl %esi, (%esp) 557; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 558; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 559; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 560; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 561; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 562; CHECK-I686-NEXT: calll __gnu_h2f_ieee 563; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 564; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 565; CHECK-I686-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] 566; CHECK-I686-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 567; CHECK-I686-NEXT: movhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] 568; CHECK-I686-NEXT: addl $64, %esp 569; CHECK-I686-NEXT: popl %esi 570; CHECK-I686-NEXT: popl %edi 571; CHECK-I686-NEXT: popl %ebx 572; CHECK-I686-NEXT: retl 573 %a = load <4 x half>, <4 x half>* %p, align 8 574 %b = fpext <4 x half> %a to <4 x double> 575 ret <4 x double> %b 576} 577 578define void @test_trunc32_vec4(<4 x float> %a, <4 x half>* %p) #0 { 579; BWON-NOF16C-LABEL: test_trunc32_vec4: 580; BWON-NOF16C: # %bb.0: 581; BWON-NOF16C-NEXT: pushq %rbp 582; BWON-NOF16C-NEXT: pushq %r15 583; BWON-NOF16C-NEXT: pushq %r14 584; BWON-NOF16C-NEXT: pushq %rbx 585; BWON-NOF16C-NEXT: subq $24, %rsp 586; BWON-NOF16C-NEXT: movq %rdi, %rbx 587; BWON-NOF16C-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 588; BWON-NOF16C-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 589; BWON-NOF16C-NEXT: callq __gnu_f2h_ieee@PLT 590; BWON-NOF16C-NEXT: movl %eax, %r14d 591; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 592; BWON-NOF16C-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 593; BWON-NOF16C-NEXT: callq __gnu_f2h_ieee@PLT 594; BWON-NOF16C-NEXT: movl %eax, %r15d 595; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 596; BWON-NOF16C-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 597; BWON-NOF16C-NEXT: callq __gnu_f2h_ieee@PLT 598; BWON-NOF16C-NEXT: movl %eax, %ebp 599; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 600; BWON-NOF16C-NEXT: callq __gnu_f2h_ieee@PLT 601; BWON-NOF16C-NEXT: movw %ax, (%rbx) 602; BWON-NOF16C-NEXT: movw %bp, 6(%rbx) 603; BWON-NOF16C-NEXT: movw %r15w, 4(%rbx) 604; BWON-NOF16C-NEXT: movw %r14w, 2(%rbx) 605; BWON-NOF16C-NEXT: addq $24, %rsp 606; BWON-NOF16C-NEXT: popq %rbx 607; BWON-NOF16C-NEXT: popq %r14 608; BWON-NOF16C-NEXT: popq %r15 609; BWON-NOF16C-NEXT: popq %rbp 610; BWON-NOF16C-NEXT: retq 611; 612; BWOFF-LABEL: test_trunc32_vec4: 613; BWOFF: # %bb.0: 614; BWOFF-NEXT: pushq %rbp 615; BWOFF-NEXT: pushq %r15 616; BWOFF-NEXT: pushq %r14 617; BWOFF-NEXT: pushq %rbx 618; BWOFF-NEXT: subq $24, %rsp 619; BWOFF-NEXT: movq %rdi, %rbx 620; BWOFF-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 621; BWOFF-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 622; BWOFF-NEXT: callq __gnu_f2h_ieee@PLT 623; BWOFF-NEXT: movw %ax, %r14w 624; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 625; BWOFF-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 626; BWOFF-NEXT: callq __gnu_f2h_ieee@PLT 627; BWOFF-NEXT: movw %ax, %r15w 628; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 629; BWOFF-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 630; BWOFF-NEXT: callq __gnu_f2h_ieee@PLT 631; BWOFF-NEXT: movw %ax, %bp 632; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 633; BWOFF-NEXT: callq __gnu_f2h_ieee@PLT 634; BWOFF-NEXT: movw %ax, (%rbx) 635; BWOFF-NEXT: movw %bp, 6(%rbx) 636; BWOFF-NEXT: movw %r15w, 4(%rbx) 637; BWOFF-NEXT: movw %r14w, 2(%rbx) 638; BWOFF-NEXT: addq $24, %rsp 639; BWOFF-NEXT: popq %rbx 640; BWOFF-NEXT: popq %r14 641; BWOFF-NEXT: popq %r15 642; BWOFF-NEXT: popq %rbp 643; BWOFF-NEXT: retq 644; 645; BWON-F16C-LABEL: test_trunc32_vec4: 646; BWON-F16C: # %bb.0: 647; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, (%rdi) 648; BWON-F16C-NEXT: retq 649; 650; CHECK-I686-LABEL: test_trunc32_vec4: 651; CHECK-I686: # %bb.0: 652; CHECK-I686-NEXT: pushl %ebp 653; CHECK-I686-NEXT: pushl %ebx 654; CHECK-I686-NEXT: pushl %edi 655; CHECK-I686-NEXT: pushl %esi 656; CHECK-I686-NEXT: subl $44, %esp 657; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 658; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ebp 659; CHECK-I686-NEXT: movaps %xmm0, %xmm1 660; CHECK-I686-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] 661; CHECK-I686-NEXT: movss %xmm1, (%esp) 662; CHECK-I686-NEXT: calll __gnu_f2h_ieee 663; CHECK-I686-NEXT: movw %ax, %si 664; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 665; CHECK-I686-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 666; CHECK-I686-NEXT: movss %xmm0, (%esp) 667; CHECK-I686-NEXT: calll __gnu_f2h_ieee 668; CHECK-I686-NEXT: movw %ax, %di 669; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 670; CHECK-I686-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 671; CHECK-I686-NEXT: movss %xmm0, (%esp) 672; CHECK-I686-NEXT: calll __gnu_f2h_ieee 673; CHECK-I686-NEXT: movw %ax, %bx 674; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 675; CHECK-I686-NEXT: movss %xmm0, (%esp) 676; CHECK-I686-NEXT: calll __gnu_f2h_ieee 677; CHECK-I686-NEXT: movw %ax, (%ebp) 678; CHECK-I686-NEXT: movw %bx, 6(%ebp) 679; CHECK-I686-NEXT: movw %di, 4(%ebp) 680; CHECK-I686-NEXT: movw %si, 2(%ebp) 681; CHECK-I686-NEXT: addl $44, %esp 682; CHECK-I686-NEXT: popl %esi 683; CHECK-I686-NEXT: popl %edi 684; CHECK-I686-NEXT: popl %ebx 685; CHECK-I686-NEXT: popl %ebp 686; CHECK-I686-NEXT: retl 687 %v = fptrunc <4 x float> %a to <4 x half> 688 store <4 x half> %v, <4 x half>* %p 689 ret void 690} 691 692define void @test_trunc64_vec4(<4 x double> %a, <4 x half>* %p) #0 { 693; BWON-NOF16C-LABEL: test_trunc64_vec4: 694; BWON-NOF16C: # %bb.0: 695; BWON-NOF16C-NEXT: pushq %rbp 696; BWON-NOF16C-NEXT: pushq %r15 697; BWON-NOF16C-NEXT: pushq %r14 698; BWON-NOF16C-NEXT: pushq %rbx 699; BWON-NOF16C-NEXT: subq $40, %rsp 700; BWON-NOF16C-NEXT: movq %rdi, %rbx 701; BWON-NOF16C-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill 702; BWON-NOF16C-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 703; BWON-NOF16C-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 704; BWON-NOF16C-NEXT: callq __truncdfhf2@PLT 705; BWON-NOF16C-NEXT: movl %eax, %r14d 706; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 707; BWON-NOF16C-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 708; BWON-NOF16C-NEXT: callq __truncdfhf2@PLT 709; BWON-NOF16C-NEXT: movl %eax, %r15d 710; BWON-NOF16C-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 711; BWON-NOF16C-NEXT: callq __truncdfhf2@PLT 712; BWON-NOF16C-NEXT: movl %eax, %ebp 713; BWON-NOF16C-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 714; BWON-NOF16C-NEXT: callq __truncdfhf2@PLT 715; BWON-NOF16C-NEXT: movw %ax, 4(%rbx) 716; BWON-NOF16C-NEXT: movw %bp, (%rbx) 717; BWON-NOF16C-NEXT: movw %r15w, 6(%rbx) 718; BWON-NOF16C-NEXT: movw %r14w, 2(%rbx) 719; BWON-NOF16C-NEXT: addq $40, %rsp 720; BWON-NOF16C-NEXT: popq %rbx 721; BWON-NOF16C-NEXT: popq %r14 722; BWON-NOF16C-NEXT: popq %r15 723; BWON-NOF16C-NEXT: popq %rbp 724; BWON-NOF16C-NEXT: retq 725; 726; BWOFF-LABEL: test_trunc64_vec4: 727; BWOFF: # %bb.0: 728; BWOFF-NEXT: pushq %rbp 729; BWOFF-NEXT: pushq %r15 730; BWOFF-NEXT: pushq %r14 731; BWOFF-NEXT: pushq %rbx 732; BWOFF-NEXT: subq $40, %rsp 733; BWOFF-NEXT: movq %rdi, %rbx 734; BWOFF-NEXT: movaps %xmm1, (%rsp) # 16-byte Spill 735; BWOFF-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 736; BWOFF-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 737; BWOFF-NEXT: callq __truncdfhf2@PLT 738; BWOFF-NEXT: movw %ax, %r14w 739; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 740; BWOFF-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 741; BWOFF-NEXT: callq __truncdfhf2@PLT 742; BWOFF-NEXT: movw %ax, %r15w 743; BWOFF-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 744; BWOFF-NEXT: callq __truncdfhf2@PLT 745; BWOFF-NEXT: movw %ax, %bp 746; BWOFF-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 747; BWOFF-NEXT: callq __truncdfhf2@PLT 748; BWOFF-NEXT: movw %ax, 4(%rbx) 749; BWOFF-NEXT: movw %bp, (%rbx) 750; BWOFF-NEXT: movw %r15w, 6(%rbx) 751; BWOFF-NEXT: movw %r14w, 2(%rbx) 752; BWOFF-NEXT: addq $40, %rsp 753; BWOFF-NEXT: popq %rbx 754; BWOFF-NEXT: popq %r14 755; BWOFF-NEXT: popq %r15 756; BWOFF-NEXT: popq %rbp 757; BWOFF-NEXT: retq 758; 759; BWON-F16C-LABEL: test_trunc64_vec4: 760; BWON-F16C: # %bb.0: 761; BWON-F16C-NEXT: pushq %rbp 762; BWON-F16C-NEXT: pushq %r15 763; BWON-F16C-NEXT: pushq %r14 764; BWON-F16C-NEXT: pushq %rbx 765; BWON-F16C-NEXT: subq $56, %rsp 766; BWON-F16C-NEXT: movq %rdi, %rbx 767; BWON-F16C-NEXT: vmovupd %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 768; BWON-F16C-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 769; BWON-F16C-NEXT: vzeroupper 770; BWON-F16C-NEXT: callq __truncdfhf2@PLT 771; BWON-F16C-NEXT: movl %eax, %r14d 772; BWON-F16C-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 773; BWON-F16C-NEXT: vextractf128 $1, %ymm0, %xmm0 774; BWON-F16C-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill 775; BWON-F16C-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 776; BWON-F16C-NEXT: vzeroupper 777; BWON-F16C-NEXT: callq __truncdfhf2@PLT 778; BWON-F16C-NEXT: movl %eax, %r15d 779; BWON-F16C-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 780; BWON-F16C-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 781; BWON-F16C-NEXT: vzeroupper 782; BWON-F16C-NEXT: callq __truncdfhf2@PLT 783; BWON-F16C-NEXT: movl %eax, %ebp 784; BWON-F16C-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 785; BWON-F16C-NEXT: callq __truncdfhf2@PLT 786; BWON-F16C-NEXT: movw %ax, 4(%rbx) 787; BWON-F16C-NEXT: movw %bp, (%rbx) 788; BWON-F16C-NEXT: movw %r15w, 6(%rbx) 789; BWON-F16C-NEXT: movw %r14w, 2(%rbx) 790; BWON-F16C-NEXT: addq $56, %rsp 791; BWON-F16C-NEXT: popq %rbx 792; BWON-F16C-NEXT: popq %r14 793; BWON-F16C-NEXT: popq %r15 794; BWON-F16C-NEXT: popq %rbp 795; BWON-F16C-NEXT: retq 796; 797; CHECK-I686-LABEL: test_trunc64_vec4: 798; CHECK-I686: # %bb.0: 799; CHECK-I686-NEXT: pushl %ebp 800; CHECK-I686-NEXT: pushl %ebx 801; CHECK-I686-NEXT: pushl %edi 802; CHECK-I686-NEXT: pushl %esi 803; CHECK-I686-NEXT: subl $60, %esp 804; CHECK-I686-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 805; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 806; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ebp 807; CHECK-I686-NEXT: movlps %xmm0, (%esp) 808; CHECK-I686-NEXT: calll __truncdfhf2 809; CHECK-I686-NEXT: movw %ax, %si 810; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 811; CHECK-I686-NEXT: movhps %xmm0, (%esp) 812; CHECK-I686-NEXT: calll __truncdfhf2 813; CHECK-I686-NEXT: movw %ax, %di 814; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 815; CHECK-I686-NEXT: movlps %xmm0, (%esp) 816; CHECK-I686-NEXT: calll __truncdfhf2 817; CHECK-I686-NEXT: movw %ax, %bx 818; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 819; CHECK-I686-NEXT: movhps %xmm0, (%esp) 820; CHECK-I686-NEXT: calll __truncdfhf2 821; CHECK-I686-NEXT: movw %ax, 6(%ebp) 822; CHECK-I686-NEXT: movw %bx, 4(%ebp) 823; CHECK-I686-NEXT: movw %di, 2(%ebp) 824; CHECK-I686-NEXT: movw %si, (%ebp) 825; CHECK-I686-NEXT: addl $60, %esp 826; CHECK-I686-NEXT: popl %esi 827; CHECK-I686-NEXT: popl %edi 828; CHECK-I686-NEXT: popl %ebx 829; CHECK-I686-NEXT: popl %ebp 830; CHECK-I686-NEXT: retl 831 %v = fptrunc <4 x double> %a to <4 x half> 832 store <4 x half> %v, <4 x half>* %p 833 ret void 834} 835 836declare float @test_floatret(); 837 838; On i686, if SSE2 is available, the return value from test_floatret is loaded 839; to f80 and then rounded to f32. The DAG combiner should not combine this 840; fp_round and the subsequent fptrunc from float to half. 841define half @test_f80trunc_nodagcombine() #0 { 842; CHECK-LIBCALL-LABEL: test_f80trunc_nodagcombine: 843; CHECK-LIBCALL: # %bb.0: 844; CHECK-LIBCALL-NEXT: pushq %rax 845; CHECK-LIBCALL-NEXT: callq test_floatret@PLT 846; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee@PLT 847; CHECK-LIBCALL-NEXT: popq %rcx 848; CHECK-LIBCALL-NEXT: retq 849; 850; BWON-F16C-LABEL: test_f80trunc_nodagcombine: 851; BWON-F16C: # %bb.0: 852; BWON-F16C-NEXT: pushq %rax 853; BWON-F16C-NEXT: callq test_floatret@PLT 854; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 855; BWON-F16C-NEXT: vmovd %xmm0, %eax 856; BWON-F16C-NEXT: # kill: def $ax killed $ax killed $eax 857; BWON-F16C-NEXT: popq %rcx 858; BWON-F16C-NEXT: retq 859; 860; CHECK-I686-LABEL: test_f80trunc_nodagcombine: 861; CHECK-I686: # %bb.0: 862; CHECK-I686-NEXT: subl $12, %esp 863; CHECK-I686-NEXT: calll test_floatret@PLT 864; CHECK-I686-NEXT: fstps (%esp) 865; CHECK-I686-NEXT: calll __gnu_f2h_ieee 866; CHECK-I686-NEXT: addl $12, %esp 867; CHECK-I686-NEXT: retl 868 %1 = call float @test_floatret() 869 %2 = fptrunc float %1 to half 870 ret half %2 871} 872 873 874 875 876define float @test_sitofp_fadd_i32(i32 %a, half* %b) #0 { 877; CHECK-LIBCALL-LABEL: test_sitofp_fadd_i32: 878; CHECK-LIBCALL: # %bb.0: 879; CHECK-LIBCALL-NEXT: pushq %rbx 880; CHECK-LIBCALL-NEXT: subq $16, %rsp 881; CHECK-LIBCALL-NEXT: movzwl (%rsi), %ebx 882; CHECK-LIBCALL-NEXT: cvtsi2ss %edi, %xmm0 883; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee@PLT 884; CHECK-LIBCALL-NEXT: movzwl %ax, %edi 885; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee@PLT 886; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 887; CHECK-LIBCALL-NEXT: movl %ebx, %edi 888; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee@PLT 889; CHECK-LIBCALL-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 890; CHECK-LIBCALL-NEXT: callq __gnu_f2h_ieee@PLT 891; CHECK-LIBCALL-NEXT: movzwl %ax, %edi 892; CHECK-LIBCALL-NEXT: addq $16, %rsp 893; CHECK-LIBCALL-NEXT: popq %rbx 894; CHECK-LIBCALL-NEXT: jmp __gnu_h2f_ieee@PLT # TAILCALL 895; 896; BWON-F16C-LABEL: test_sitofp_fadd_i32: 897; BWON-F16C: # %bb.0: 898; BWON-F16C-NEXT: movzwl (%rsi), %eax 899; BWON-F16C-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 900; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 901; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 902; BWON-F16C-NEXT: vmovd %eax, %xmm1 903; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 904; BWON-F16C-NEXT: vaddss %xmm0, %xmm1, %xmm0 905; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 906; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 907; BWON-F16C-NEXT: retq 908; 909; CHECK-I686-LABEL: test_sitofp_fadd_i32: 910; CHECK-I686: # %bb.0: 911; CHECK-I686-NEXT: pushl %edi 912; CHECK-I686-NEXT: pushl %esi 913; CHECK-I686-NEXT: subl $20, %esp 914; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 915; CHECK-I686-NEXT: movzwl (%eax), %edi 916; CHECK-I686-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 917; CHECK-I686-NEXT: movss %xmm0, (%esp) 918; CHECK-I686-NEXT: calll __gnu_f2h_ieee 919; CHECK-I686-NEXT: movw %ax, %si 920; CHECK-I686-NEXT: movl %edi, (%esp) 921; CHECK-I686-NEXT: calll __gnu_h2f_ieee 922; CHECK-I686-NEXT: movzwl %si, %eax 923; CHECK-I686-NEXT: movl %eax, (%esp) 924; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 925; CHECK-I686-NEXT: calll __gnu_h2f_ieee 926; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 927; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 928; CHECK-I686-NEXT: addss {{[0-9]+}}(%esp), %xmm0 929; CHECK-I686-NEXT: movss %xmm0, (%esp) 930; CHECK-I686-NEXT: calll __gnu_f2h_ieee 931; CHECK-I686-NEXT: movzwl %ax, %eax 932; CHECK-I686-NEXT: movl %eax, (%esp) 933; CHECK-I686-NEXT: calll __gnu_h2f_ieee 934; CHECK-I686-NEXT: addl $20, %esp 935; CHECK-I686-NEXT: popl %esi 936; CHECK-I686-NEXT: popl %edi 937; CHECK-I686-NEXT: retl 938 %tmp0 = load half, half* %b 939 %tmp1 = sitofp i32 %a to half 940 %tmp2 = fadd half %tmp0, %tmp1 941 %tmp3 = fpext half %tmp2 to float 942 ret float %tmp3 943} 944 945define half @PR40273(half) #0 { 946; CHECK-LIBCALL-LABEL: PR40273: 947; CHECK-LIBCALL: # %bb.0: 948; CHECK-LIBCALL-NEXT: pushq %rax 949; CHECK-LIBCALL-NEXT: movzwl %di, %edi 950; CHECK-LIBCALL-NEXT: callq __gnu_h2f_ieee@PLT 951; CHECK-LIBCALL-NEXT: xorl %eax, %eax 952; CHECK-LIBCALL-NEXT: xorps %xmm1, %xmm1 953; CHECK-LIBCALL-NEXT: ucomiss %xmm1, %xmm0 954; CHECK-LIBCALL-NEXT: movl $15360, %ecx # imm = 0x3C00 955; CHECK-LIBCALL-NEXT: cmovnel %ecx, %eax 956; CHECK-LIBCALL-NEXT: cmovpl %ecx, %eax 957; CHECK-LIBCALL-NEXT: # kill: def $ax killed $ax killed $eax 958; CHECK-LIBCALL-NEXT: popq %rcx 959; CHECK-LIBCALL-NEXT: retq 960; 961; BWON-F16C-LABEL: PR40273: 962; BWON-F16C: # %bb.0: 963; BWON-F16C-NEXT: movzwl %di, %eax 964; BWON-F16C-NEXT: vmovd %eax, %xmm0 965; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 966; BWON-F16C-NEXT: xorl %eax, %eax 967; BWON-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1 968; BWON-F16C-NEXT: vucomiss %xmm1, %xmm0 969; BWON-F16C-NEXT: movl $15360, %ecx # imm = 0x3C00 970; BWON-F16C-NEXT: cmovnel %ecx, %eax 971; BWON-F16C-NEXT: cmovpl %ecx, %eax 972; BWON-F16C-NEXT: # kill: def $ax killed $ax killed $eax 973; BWON-F16C-NEXT: retq 974; 975; CHECK-I686-LABEL: PR40273: 976; CHECK-I686: # %bb.0: 977; CHECK-I686-NEXT: subl $12, %esp 978; CHECK-I686-NEXT: movzwl {{[0-9]+}}(%esp), %eax 979; CHECK-I686-NEXT: movl %eax, (%esp) 980; CHECK-I686-NEXT: calll __gnu_h2f_ieee 981; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 982; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 983; CHECK-I686-NEXT: xorl %eax, %eax 984; CHECK-I686-NEXT: xorps %xmm1, %xmm1 985; CHECK-I686-NEXT: ucomiss %xmm1, %xmm0 986; CHECK-I686-NEXT: movl $15360, %ecx # imm = 0x3C00 987; CHECK-I686-NEXT: cmovnel %ecx, %eax 988; CHECK-I686-NEXT: cmovpl %ecx, %eax 989; CHECK-I686-NEXT: # kill: def $ax killed $ax killed $eax 990; CHECK-I686-NEXT: addl $12, %esp 991; CHECK-I686-NEXT: retl 992 %2 = fcmp une half %0, 0xH0000 993 %3 = uitofp i1 %2 to half 994 ret half %3 995} 996 997attributes #0 = { nounwind } 998