1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=1 \ 3; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWON 4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=0 \ 5; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWOFF 6; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+f16c -fixup-byte-word-insts=1 \ 7; RUN: | FileCheck %s -check-prefixes=CHECK,BWON,BWON-F16C 8; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -fixup-byte-word-insts=0 \ 9; RUN: | FileCheck %s -check-prefixes=CHECK-I686 10 11define void @test_load_store(ptr %in, ptr %out) #0 { 12; CHECK-LIBCALL-LABEL: test_load_store: 13; CHECK-LIBCALL: # %bb.0: 14; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 15; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 16; CHECK-LIBCALL-NEXT: movw %ax, (%rsi) 17; CHECK-LIBCALL-NEXT: retq 18; 19; BWON-F16C-LABEL: test_load_store: 20; BWON-F16C: # %bb.0: 21; BWON-F16C-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 22; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rsi) 23; BWON-F16C-NEXT: retq 24; 25; CHECK-I686-LABEL: test_load_store: 26; CHECK-I686: # %bb.0: 27; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 28; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ecx 29; CHECK-I686-NEXT: pinsrw $0, (%ecx), %xmm0 30; CHECK-I686-NEXT: pextrw $0, %xmm0, %ecx 31; CHECK-I686-NEXT: movw %cx, (%eax) 32; CHECK-I686-NEXT: retl 33 %val = load half, ptr %in 34 store half %val, ptr %out 35 ret void 36} 37 38define i16 @test_bitcast_from_half(ptr %addr) #0 { 39; BWON-LABEL: test_bitcast_from_half: 40; BWON: # %bb.0: 41; BWON-NEXT: movzwl (%rdi), %eax 42; BWON-NEXT: retq 43; 44; BWOFF-LABEL: test_bitcast_from_half: 45; BWOFF: # %bb.0: 46; BWOFF-NEXT: movw (%rdi), %ax 47; BWOFF-NEXT: retq 48; 49; CHECK-I686-LABEL: test_bitcast_from_half: 50; CHECK-I686: # %bb.0: 51; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 52; CHECK-I686-NEXT: movw (%eax), %ax 53; CHECK-I686-NEXT: retl 54 %val = load half, ptr %addr 55 %val_int = bitcast half %val to i16 56 ret i16 %val_int 57} 58 59define void @test_bitcast_to_half(ptr %addr, i16 %in) #0 { 60; CHECK-LABEL: test_bitcast_to_half: 61; CHECK: # %bb.0: 62; CHECK-NEXT: movw %si, (%rdi) 63; CHECK-NEXT: retq 64; 65; CHECK-I686-LABEL: test_bitcast_to_half: 66; CHECK-I686: # %bb.0: 67; CHECK-I686-NEXT: movw {{[0-9]+}}(%esp), %ax 68; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ecx 69; CHECK-I686-NEXT: movw %ax, (%ecx) 70; CHECK-I686-NEXT: retl 71 %val_fp = bitcast i16 %in to half 72 store half %val_fp, ptr %addr 73 ret void 74} 75 76define float @test_extend32(ptr %addr) #0 { 77; CHECK-LIBCALL-LABEL: test_extend32: 78; CHECK-LIBCALL: # %bb.0: 79; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 80; CHECK-LIBCALL-NEXT: jmp __extendhfsf2@PLT # TAILCALL 81; 82; BWON-F16C-LABEL: test_extend32: 83; BWON-F16C: # %bb.0: 84; BWON-F16C-NEXT: movzwl (%rdi), %eax 85; BWON-F16C-NEXT: vmovd %eax, %xmm0 86; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 87; BWON-F16C-NEXT: retq 88; 89; CHECK-I686-LABEL: test_extend32: 90; CHECK-I686: # %bb.0: 91; CHECK-I686-NEXT: subl $12, %esp 92; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 93; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0 94; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 95; CHECK-I686-NEXT: movw %ax, (%esp) 96; CHECK-I686-NEXT: calll __extendhfsf2 97; CHECK-I686-NEXT: addl $12, %esp 98; CHECK-I686-NEXT: retl 99 %val16 = load half, ptr %addr 100 %val32 = fpext half %val16 to float 101 ret float %val32 102} 103 104define double @test_extend64(ptr %addr) #0 { 105; CHECK-LIBCALL-LABEL: test_extend64: 106; CHECK-LIBCALL: # %bb.0: 107; CHECK-LIBCALL-NEXT: pushq %rax 108; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 109; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 110; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 111; CHECK-LIBCALL-NEXT: popq %rax 112; CHECK-LIBCALL-NEXT: retq 113; 114; BWON-F16C-LABEL: test_extend64: 115; BWON-F16C: # %bb.0: 116; BWON-F16C-NEXT: movzwl (%rdi), %eax 117; BWON-F16C-NEXT: vmovd %eax, %xmm0 118; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 119; BWON-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 120; BWON-F16C-NEXT: retq 121; 122; CHECK-I686-LABEL: test_extend64: 123; CHECK-I686: # %bb.0: 124; CHECK-I686-NEXT: subl $12, %esp 125; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 126; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0 127; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 128; CHECK-I686-NEXT: movw %ax, (%esp) 129; CHECK-I686-NEXT: calll __extendhfsf2 130; CHECK-I686-NEXT: addl $12, %esp 131; CHECK-I686-NEXT: retl 132 %val16 = load half, ptr %addr 133 %val32 = fpext half %val16 to double 134 ret double %val32 135} 136 137define void @test_trunc32(float %in, ptr %addr) #0 { 138; CHECK-LIBCALL-LABEL: test_trunc32: 139; CHECK-LIBCALL: # %bb.0: 140; CHECK-LIBCALL-NEXT: pushq %rbx 141; CHECK-LIBCALL-NEXT: movq %rdi, %rbx 142; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 143; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 144; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 145; CHECK-LIBCALL-NEXT: popq %rbx 146; CHECK-LIBCALL-NEXT: retq 147; 148; BWON-F16C-LABEL: test_trunc32: 149; BWON-F16C: # %bb.0: 150; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 151; BWON-F16C-NEXT: vmovd %xmm0, %eax 152; BWON-F16C-NEXT: movw %ax, (%rdi) 153; BWON-F16C-NEXT: retq 154; 155; CHECK-I686-LABEL: test_trunc32: 156; CHECK-I686: # %bb.0: 157; CHECK-I686-NEXT: pushl %esi 158; CHECK-I686-NEXT: subl $8, %esp 159; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 160; CHECK-I686-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 161; CHECK-I686-NEXT: movd %xmm0, (%esp) 162; CHECK-I686-NEXT: calll __truncsfhf2 163; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 164; CHECK-I686-NEXT: movw %ax, (%esi) 165; CHECK-I686-NEXT: addl $8, %esp 166; CHECK-I686-NEXT: popl %esi 167; CHECK-I686-NEXT: retl 168 %val16 = fptrunc float %in to half 169 store half %val16, ptr %addr 170 ret void 171} 172 173define void @test_trunc64(double %in, ptr %addr) #0 { 174; CHECK-LIBCALL-LABEL: test_trunc64: 175; CHECK-LIBCALL: # %bb.0: 176; CHECK-LIBCALL-NEXT: pushq %rbx 177; CHECK-LIBCALL-NEXT: movq %rdi, %rbx 178; CHECK-LIBCALL-NEXT: callq __truncdfhf2@PLT 179; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 180; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 181; CHECK-LIBCALL-NEXT: popq %rbx 182; CHECK-LIBCALL-NEXT: retq 183; 184; BWON-F16C-LABEL: test_trunc64: 185; BWON-F16C: # %bb.0: 186; BWON-F16C-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 187; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 188; BWON-F16C-NEXT: vmovd %xmm0, %eax 189; BWON-F16C-NEXT: movw %ax, (%rdi) 190; BWON-F16C-NEXT: retq 191; 192; CHECK-I686-LABEL: test_trunc64: 193; CHECK-I686: # %bb.0: 194; CHECK-I686-NEXT: pushl %esi 195; CHECK-I686-NEXT: subl $8, %esp 196; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 197; CHECK-I686-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 198; CHECK-I686-NEXT: movq %xmm0, (%esp) 199; CHECK-I686-NEXT: calll __truncdfhf2 200; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 201; CHECK-I686-NEXT: movw %ax, (%esi) 202; CHECK-I686-NEXT: addl $8, %esp 203; CHECK-I686-NEXT: popl %esi 204; CHECK-I686-NEXT: retl 205 %val16 = fptrunc double %in to half 206 store half %val16, ptr %addr 207 ret void 208} 209 210define i64 @test_fptosi_i64(ptr %p) #0 { 211; CHECK-LIBCALL-LABEL: test_fptosi_i64: 212; CHECK-LIBCALL: # %bb.0: 213; CHECK-LIBCALL-NEXT: pushq %rax 214; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 215; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 216; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax 217; CHECK-LIBCALL-NEXT: popq %rcx 218; CHECK-LIBCALL-NEXT: retq 219; 220; BWON-F16C-LABEL: test_fptosi_i64: 221; BWON-F16C: # %bb.0: 222; BWON-F16C-NEXT: movzwl (%rdi), %eax 223; BWON-F16C-NEXT: vmovd %eax, %xmm0 224; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 225; BWON-F16C-NEXT: vcvttss2si %xmm0, %rax 226; BWON-F16C-NEXT: retq 227; 228; CHECK-I686-LABEL: test_fptosi_i64: 229; CHECK-I686: # %bb.0: 230; CHECK-I686-NEXT: subl $12, %esp 231; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 232; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0 233; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 234; CHECK-I686-NEXT: movw %ax, (%esp) 235; CHECK-I686-NEXT: calll __fixhfdi 236; CHECK-I686-NEXT: addl $12, %esp 237; CHECK-I686-NEXT: retl 238 %a = load half, ptr %p, align 2 239 %r = fptosi half %a to i64 240 ret i64 %r 241} 242 243define void @test_sitofp_i64(i64 %a, ptr %p) #0 { 244; CHECK-LIBCALL-LABEL: test_sitofp_i64: 245; CHECK-LIBCALL: # %bb.0: 246; CHECK-LIBCALL-NEXT: pushq %rbx 247; CHECK-LIBCALL-NEXT: movq %rsi, %rbx 248; CHECK-LIBCALL-NEXT: cvtsi2ss %rdi, %xmm0 249; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 250; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 251; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 252; CHECK-LIBCALL-NEXT: popq %rbx 253; CHECK-LIBCALL-NEXT: retq 254; 255; BWON-F16C-LABEL: test_sitofp_i64: 256; BWON-F16C: # %bb.0: 257; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 258; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 259; BWON-F16C-NEXT: vmovd %xmm0, %eax 260; BWON-F16C-NEXT: movw %ax, (%rsi) 261; BWON-F16C-NEXT: retq 262; 263; CHECK-I686-LABEL: test_sitofp_i64: 264; CHECK-I686: # %bb.0: 265; CHECK-I686-NEXT: pushl %esi 266; CHECK-I686-NEXT: subl $24, %esp 267; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 268; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 269; CHECK-I686-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) 270; CHECK-I686-NEXT: fildll {{[0-9]+}}(%esp) 271; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 272; CHECK-I686-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 273; CHECK-I686-NEXT: movd %xmm0, (%esp) 274; CHECK-I686-NEXT: calll __truncsfhf2 275; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 276; CHECK-I686-NEXT: movw %ax, (%esi) 277; CHECK-I686-NEXT: addl $24, %esp 278; CHECK-I686-NEXT: popl %esi 279; CHECK-I686-NEXT: retl 280 %r = sitofp i64 %a to half 281 store half %r, ptr %p 282 ret void 283} 284 285define i64 @test_fptoui_i64(ptr %p) #0 { 286; CHECK-LIBCALL-LABEL: test_fptoui_i64: 287; CHECK-LIBCALL: # %bb.0: 288; CHECK-LIBCALL-NEXT: pushq %rax 289; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 290; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 291; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rcx 292; CHECK-LIBCALL-NEXT: movq %rcx, %rdx 293; CHECK-LIBCALL-NEXT: sarq $63, %rdx 294; CHECK-LIBCALL-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 295; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax 296; CHECK-LIBCALL-NEXT: andq %rdx, %rax 297; CHECK-LIBCALL-NEXT: orq %rcx, %rax 298; CHECK-LIBCALL-NEXT: popq %rcx 299; CHECK-LIBCALL-NEXT: retq 300; 301; BWON-F16C-LABEL: test_fptoui_i64: 302; BWON-F16C: # %bb.0: 303; BWON-F16C-NEXT: movzwl (%rdi), %eax 304; BWON-F16C-NEXT: vmovd %eax, %xmm0 305; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 306; BWON-F16C-NEXT: vcvttss2si %xmm0, %rcx 307; BWON-F16C-NEXT: movq %rcx, %rdx 308; BWON-F16C-NEXT: sarq $63, %rdx 309; BWON-F16C-NEXT: vsubss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 310; BWON-F16C-NEXT: vcvttss2si %xmm0, %rax 311; BWON-F16C-NEXT: andq %rdx, %rax 312; BWON-F16C-NEXT: orq %rcx, %rax 313; BWON-F16C-NEXT: retq 314; 315; CHECK-I686-LABEL: test_fptoui_i64: 316; CHECK-I686: # %bb.0: 317; CHECK-I686-NEXT: subl $12, %esp 318; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 319; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0 320; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 321; CHECK-I686-NEXT: movw %ax, (%esp) 322; CHECK-I686-NEXT: calll __fixunshfdi 323; CHECK-I686-NEXT: addl $12, %esp 324; CHECK-I686-NEXT: retl 325 %a = load half, ptr %p, align 2 326 %r = fptoui half %a to i64 327 ret i64 %r 328} 329 330define void @test_uitofp_i64(i64 %a, ptr %p) #0 { 331; CHECK-LIBCALL-LABEL: test_uitofp_i64: 332; CHECK-LIBCALL: # %bb.0: 333; CHECK-LIBCALL-NEXT: pushq %rbx 334; CHECK-LIBCALL-NEXT: movq %rsi, %rbx 335; CHECK-LIBCALL-NEXT: testq %rdi, %rdi 336; CHECK-LIBCALL-NEXT: js .LBB10_1 337; CHECK-LIBCALL-NEXT: # %bb.2: 338; CHECK-LIBCALL-NEXT: cvtsi2ss %rdi, %xmm0 339; CHECK-LIBCALL-NEXT: jmp .LBB10_3 340; CHECK-LIBCALL-NEXT: .LBB10_1: 341; CHECK-LIBCALL-NEXT: movq %rdi, %rax 342; CHECK-LIBCALL-NEXT: shrq %rax 343; CHECK-LIBCALL-NEXT: andl $1, %edi 344; CHECK-LIBCALL-NEXT: orq %rax, %rdi 345; CHECK-LIBCALL-NEXT: cvtsi2ss %rdi, %xmm0 346; CHECK-LIBCALL-NEXT: addss %xmm0, %xmm0 347; CHECK-LIBCALL-NEXT: .LBB10_3: 348; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 349; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 350; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 351; CHECK-LIBCALL-NEXT: popq %rbx 352; CHECK-LIBCALL-NEXT: retq 353; 354; BWON-F16C-LABEL: test_uitofp_i64: 355; BWON-F16C: # %bb.0: 356; BWON-F16C-NEXT: testq %rdi, %rdi 357; BWON-F16C-NEXT: js .LBB10_1 358; BWON-F16C-NEXT: # %bb.2: 359; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 360; BWON-F16C-NEXT: jmp .LBB10_3 361; BWON-F16C-NEXT: .LBB10_1: 362; BWON-F16C-NEXT: movq %rdi, %rax 363; BWON-F16C-NEXT: shrq %rax 364; BWON-F16C-NEXT: andl $1, %edi 365; BWON-F16C-NEXT: orq %rax, %rdi 366; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 367; BWON-F16C-NEXT: vaddss %xmm0, %xmm0, %xmm0 368; BWON-F16C-NEXT: .LBB10_3: 369; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 370; BWON-F16C-NEXT: vmovd %xmm0, %eax 371; BWON-F16C-NEXT: movw %ax, (%rsi) 372; BWON-F16C-NEXT: retq 373; 374; CHECK-I686-LABEL: test_uitofp_i64: 375; CHECK-I686: # %bb.0: 376; CHECK-I686-NEXT: pushl %esi 377; CHECK-I686-NEXT: subl $24, %esp 378; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 379; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 380; CHECK-I686-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 381; CHECK-I686-NEXT: movq %xmm0, {{[0-9]+}}(%esp) 382; CHECK-I686-NEXT: shrl $31, %eax 383; CHECK-I686-NEXT: fildll {{[0-9]+}}(%esp) 384; CHECK-I686-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 385; CHECK-I686-NEXT: fstps (%esp) 386; CHECK-I686-NEXT: calll __truncsfhf2 387; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 388; CHECK-I686-NEXT: movw %ax, (%esi) 389; CHECK-I686-NEXT: addl $24, %esp 390; CHECK-I686-NEXT: popl %esi 391; CHECK-I686-NEXT: retl 392 %r = uitofp i64 %a to half 393 store half %r, ptr %p 394 ret void 395} 396 397define <4 x float> @test_extend32_vec4(ptr %p) #0 { 398; CHECK-LIBCALL-LABEL: test_extend32_vec4: 399; CHECK-LIBCALL: # %bb.0: 400; CHECK-LIBCALL-NEXT: subq $72, %rsp 401; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 402; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 403; CHECK-LIBCALL-NEXT: pinsrw $0, 2(%rdi), %xmm0 404; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 405; CHECK-LIBCALL-NEXT: pinsrw $0, 4(%rdi), %xmm0 406; CHECK-LIBCALL-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill 407; CHECK-LIBCALL-NEXT: pinsrw $0, 6(%rdi), %xmm0 408; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 409; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 410; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 411; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 412; CHECK-LIBCALL-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 413; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 414; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 415; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 416; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 417; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 418; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 419; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 420; CHECK-LIBCALL-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 421; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 422; CHECK-LIBCALL-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 423; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0] 424; CHECK-LIBCALL-NEXT: addq $72, %rsp 425; CHECK-LIBCALL-NEXT: retq 426; 427; BWON-F16C-LABEL: test_extend32_vec4: 428; BWON-F16C: # %bb.0: 429; BWON-F16C-NEXT: vcvtph2ps (%rdi), %xmm0 430; BWON-F16C-NEXT: retq 431; 432; CHECK-I686-LABEL: test_extend32_vec4: 433; CHECK-I686: # %bb.0: 434; CHECK-I686-NEXT: pushl %esi 435; CHECK-I686-NEXT: subl $88, %esp 436; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 437; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0 438; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 439; CHECK-I686-NEXT: pinsrw $0, 6(%eax), %xmm0 440; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 441; CHECK-I686-NEXT: pinsrw $0, 4(%eax), %xmm0 442; CHECK-I686-NEXT: pinsrw $0, 2(%eax), %xmm1 443; CHECK-I686-NEXT: pextrw $0, %xmm1, %eax 444; CHECK-I686-NEXT: movw %ax, (%esp) 445; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi 446; CHECK-I686-NEXT: calll __extendhfsf2 447; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 448; CHECK-I686-NEXT: movw %si, (%esp) 449; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 450; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi 451; CHECK-I686-NEXT: calll __extendhfsf2 452; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 453; CHECK-I686-NEXT: movw %si, (%esp) 454; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 455; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi 456; CHECK-I686-NEXT: calll __extendhfsf2 457; CHECK-I686-NEXT: movw %si, (%esp) 458; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 459; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 460; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 461; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 462; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 463; CHECK-I686-NEXT: calll __extendhfsf2 464; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 465; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 466; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 467; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 468; CHECK-I686-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 469; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 470; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 471; CHECK-I686-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 472; CHECK-I686-NEXT: addl $88, %esp 473; CHECK-I686-NEXT: popl %esi 474; CHECK-I686-NEXT: retl 475 %a = load <4 x half>, ptr %p, align 8 476 %b = fpext <4 x half> %a to <4 x float> 477 ret <4 x float> %b 478} 479 480define <4 x double> @test_extend64_vec4(ptr %p) #0 { 481; CHECK-LIBCALL-LABEL: test_extend64_vec4: 482; CHECK-LIBCALL: # %bb.0: 483; CHECK-LIBCALL-NEXT: subq $72, %rsp 484; CHECK-LIBCALL-NEXT: pinsrw $0, 4(%rdi), %xmm0 485; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 486; CHECK-LIBCALL-NEXT: pinsrw $0, 6(%rdi), %xmm0 487; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 488; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 489; CHECK-LIBCALL-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill 490; CHECK-LIBCALL-NEXT: pinsrw $0, 2(%rdi), %xmm0 491; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 492; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 493; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 494; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 495; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 496; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 497; CHECK-LIBCALL-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 498; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0] 499; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 500; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 501; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 502; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 503; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 504; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 505; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 506; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm1 507; CHECK-LIBCALL-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 508; CHECK-LIBCALL-NEXT: # xmm1 = xmm1[0],mem[0] 509; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 510; CHECK-LIBCALL-NEXT: addq $72, %rsp 511; CHECK-LIBCALL-NEXT: retq 512; 513; BWON-F16C-LABEL: test_extend64_vec4: 514; BWON-F16C: # %bb.0: 515; BWON-F16C-NEXT: vcvtph2ps (%rdi), %xmm0 516; BWON-F16C-NEXT: vcvtps2pd %xmm0, %ymm0 517; BWON-F16C-NEXT: retq 518; 519; CHECK-I686-LABEL: test_extend64_vec4: 520; CHECK-I686: # %bb.0: 521; CHECK-I686-NEXT: pushl %esi 522; CHECK-I686-NEXT: subl $104, %esp 523; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 524; CHECK-I686-NEXT: pinsrw $0, 6(%eax), %xmm0 525; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 526; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0 527; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 528; CHECK-I686-NEXT: pinsrw $0, 2(%eax), %xmm0 529; CHECK-I686-NEXT: pinsrw $0, 4(%eax), %xmm1 530; CHECK-I686-NEXT: pextrw $0, %xmm1, %eax 531; CHECK-I686-NEXT: movw %ax, (%esp) 532; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi 533; CHECK-I686-NEXT: calll __extendhfsf2 534; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 535; CHECK-I686-NEXT: movw %si, (%esp) 536; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 537; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi 538; CHECK-I686-NEXT: calll __extendhfsf2 539; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 540; CHECK-I686-NEXT: movw %si, (%esp) 541; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 542; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi 543; CHECK-I686-NEXT: calll __extendhfsf2 544; CHECK-I686-NEXT: movw %si, (%esp) 545; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 546; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 547; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 548; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 549; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 550; CHECK-I686-NEXT: calll __extendhfsf2 551; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 552; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 553; CHECK-I686-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] 554; CHECK-I686-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 555; CHECK-I686-NEXT: movhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] 556; CHECK-I686-NEXT: addl $104, %esp 557; CHECK-I686-NEXT: popl %esi 558; CHECK-I686-NEXT: retl 559 %a = load <4 x half>, ptr %p, align 8 560 %b = fpext <4 x half> %a to <4 x double> 561 ret <4 x double> %b 562} 563 564define void @test_trunc32_vec4(<4 x float> %a, ptr %p) #0 { 565; CHECK-LIBCALL-LABEL: test_trunc32_vec4: 566; CHECK-LIBCALL: # %bb.0: 567; CHECK-LIBCALL-NEXT: pushq %rbx 568; CHECK-LIBCALL-NEXT: subq $64, %rsp 569; CHECK-LIBCALL-NEXT: movq %rdi, %rbx 570; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 571; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 572; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 573; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 574; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 575; CHECK-LIBCALL-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 576; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 577; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 578; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 579; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 580; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 581; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 582; CHECK-LIBCALL-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload 583; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 584; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 585; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 586; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 587; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 588; CHECK-LIBCALL-NEXT: movw %ax, 6(%rbx) 589; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 590; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 591; CHECK-LIBCALL-NEXT: movw %ax, 4(%rbx) 592; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 593; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 594; CHECK-LIBCALL-NEXT: movw %ax, 2(%rbx) 595; CHECK-LIBCALL-NEXT: addq $64, %rsp 596; CHECK-LIBCALL-NEXT: popq %rbx 597; CHECK-LIBCALL-NEXT: retq 598; 599; BWON-F16C-LABEL: test_trunc32_vec4: 600; BWON-F16C: # %bb.0: 601; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, (%rdi) 602; BWON-F16C-NEXT: retq 603; 604; CHECK-I686-LABEL: test_trunc32_vec4: 605; CHECK-I686: # %bb.0: 606; CHECK-I686-NEXT: pushl %esi 607; CHECK-I686-NEXT: subl $88, %esp 608; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 609; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 610; CHECK-I686-NEXT: movaps %xmm0, %xmm1 611; CHECK-I686-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] 612; CHECK-I686-NEXT: movss %xmm1, (%esp) 613; CHECK-I686-NEXT: calll __truncsfhf2 614; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 615; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 616; CHECK-I686-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 617; CHECK-I686-NEXT: movss %xmm0, (%esp) 618; CHECK-I686-NEXT: calll __truncsfhf2 619; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 620; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 621; CHECK-I686-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 622; CHECK-I686-NEXT: movss %xmm0, (%esp) 623; CHECK-I686-NEXT: calll __truncsfhf2 624; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 625; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 626; CHECK-I686-NEXT: movd %xmm0, (%esp) 627; CHECK-I686-NEXT: calll __truncsfhf2 628; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 629; CHECK-I686-NEXT: movw %ax, (%esi) 630; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 631; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 632; CHECK-I686-NEXT: movw %ax, 6(%esi) 633; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 634; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 635; CHECK-I686-NEXT: movw %ax, 4(%esi) 636; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 637; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 638; CHECK-I686-NEXT: movw %ax, 2(%esi) 639; CHECK-I686-NEXT: addl $88, %esp 640; CHECK-I686-NEXT: popl %esi 641; CHECK-I686-NEXT: retl 642 %v = fptrunc <4 x float> %a to <4 x half> 643 store <4 x half> %v, ptr %p 644 ret void 645} 646 647define void @test_trunc64_vec4(<4 x double> %a, ptr %p) #0 { 648; CHECK-LIBCALL-LABEL: test_trunc64_vec4: 649; CHECK-LIBCALL: # %bb.0: 650; CHECK-LIBCALL-NEXT: pushq %rbx 651; CHECK-LIBCALL-NEXT: subq $64, %rsp 652; CHECK-LIBCALL-NEXT: movq %rdi, %rbx 653; CHECK-LIBCALL-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 654; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 655; CHECK-LIBCALL-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 656; CHECK-LIBCALL-NEXT: callq __truncdfhf2@PLT 657; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 658; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 659; CHECK-LIBCALL-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 660; CHECK-LIBCALL-NEXT: callq __truncdfhf2@PLT 661; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 662; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 663; CHECK-LIBCALL-NEXT: callq __truncdfhf2@PLT 664; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 665; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 666; CHECK-LIBCALL-NEXT: callq __truncdfhf2@PLT 667; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 668; CHECK-LIBCALL-NEXT: movw %ax, 4(%rbx) 669; CHECK-LIBCALL-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload 670; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 671; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 672; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 673; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 674; CHECK-LIBCALL-NEXT: movw %ax, 6(%rbx) 675; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 676; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 677; CHECK-LIBCALL-NEXT: movw %ax, 2(%rbx) 678; CHECK-LIBCALL-NEXT: addq $64, %rsp 679; CHECK-LIBCALL-NEXT: popq %rbx 680; CHECK-LIBCALL-NEXT: retq 681; 682; BWON-F16C-LABEL: test_trunc64_vec4: 683; BWON-F16C: # %bb.0: 684; BWON-F16C-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 685; BWON-F16C-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm1 686; BWON-F16C-NEXT: vcvtps2ph $4, %xmm1, %xmm1 687; BWON-F16C-NEXT: vmovd %xmm1, %eax 688; BWON-F16C-NEXT: vextractf128 $1, %ymm0, %xmm1 689; BWON-F16C-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 690; BWON-F16C-NEXT: vcvtsd2ss %xmm2, %xmm2, %xmm2 691; BWON-F16C-NEXT: vcvtps2ph $4, %xmm2, %xmm2 692; BWON-F16C-NEXT: vmovd %xmm2, %ecx 693; BWON-F16C-NEXT: vcvtsd2ss %xmm0, %xmm0, %xmm0 694; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 695; BWON-F16C-NEXT: vmovd %xmm0, %edx 696; BWON-F16C-NEXT: vcvtsd2ss %xmm1, %xmm1, %xmm0 697; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 698; BWON-F16C-NEXT: vmovd %xmm0, %esi 699; BWON-F16C-NEXT: movw %si, 4(%rdi) 700; BWON-F16C-NEXT: movw %dx, (%rdi) 701; BWON-F16C-NEXT: movw %cx, 6(%rdi) 702; BWON-F16C-NEXT: movw %ax, 2(%rdi) 703; BWON-F16C-NEXT: vzeroupper 704; BWON-F16C-NEXT: retq 705; 706; CHECK-I686-LABEL: test_trunc64_vec4: 707; CHECK-I686: # %bb.0: 708; CHECK-I686-NEXT: pushl %esi 709; CHECK-I686-NEXT: subl $88, %esp 710; CHECK-I686-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 711; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 712; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 713; CHECK-I686-NEXT: movlps %xmm0, (%esp) 714; CHECK-I686-NEXT: calll __truncdfhf2 715; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 716; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 717; CHECK-I686-NEXT: movhps %xmm0, (%esp) 718; CHECK-I686-NEXT: calll __truncdfhf2 719; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 720; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 721; CHECK-I686-NEXT: movlps %xmm0, (%esp) 722; CHECK-I686-NEXT: calll __truncdfhf2 723; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 724; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 725; CHECK-I686-NEXT: movhps %xmm0, (%esp) 726; CHECK-I686-NEXT: calll __truncdfhf2 727; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 728; CHECK-I686-NEXT: movw %ax, 6(%esi) 729; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 730; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 731; CHECK-I686-NEXT: movw %ax, 4(%esi) 732; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 733; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 734; CHECK-I686-NEXT: movw %ax, 2(%esi) 735; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 736; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 737; CHECK-I686-NEXT: movw %ax, (%esi) 738; CHECK-I686-NEXT: addl $88, %esp 739; CHECK-I686-NEXT: popl %esi 740; CHECK-I686-NEXT: retl 741 %v = fptrunc <4 x double> %a to <4 x half> 742 store <4 x half> %v, ptr %p 743 ret void 744} 745 746declare float @test_floatret(); 747 748; On i686, if SSE2 is available, the return value from test_floatret is loaded 749; to f80 and then rounded to f32. The DAG combiner should not combine this 750; fp_round and the subsequent fptrunc from float to half. 751define half @test_f80trunc_nodagcombine() #0 { 752; CHECK-LIBCALL-LABEL: test_f80trunc_nodagcombine: 753; CHECK-LIBCALL: # %bb.0: 754; CHECK-LIBCALL-NEXT: pushq %rax 755; CHECK-LIBCALL-NEXT: callq test_floatret@PLT 756; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 757; CHECK-LIBCALL-NEXT: popq %rax 758; CHECK-LIBCALL-NEXT: retq 759; 760; BWON-F16C-LABEL: test_f80trunc_nodagcombine: 761; BWON-F16C: # %bb.0: 762; BWON-F16C-NEXT: pushq %rax 763; BWON-F16C-NEXT: callq test_floatret@PLT 764; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 765; BWON-F16C-NEXT: vmovd %xmm0, %eax 766; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 767; BWON-F16C-NEXT: popq %rax 768; BWON-F16C-NEXT: retq 769; 770; CHECK-I686-LABEL: test_f80trunc_nodagcombine: 771; CHECK-I686: # %bb.0: 772; CHECK-I686-NEXT: subl $12, %esp 773; CHECK-I686-NEXT: calll test_floatret@PLT 774; CHECK-I686-NEXT: fstps (%esp) 775; CHECK-I686-NEXT: calll __truncsfhf2 776; CHECK-I686-NEXT: addl $12, %esp 777; CHECK-I686-NEXT: retl 778 %1 = call float @test_floatret() 779 %2 = fptrunc float %1 to half 780 ret half %2 781} 782 783 784 785 786define float @test_sitofp_fadd_i32(i32 %a, ptr %b) #0 { 787; CHECK-LIBCALL-LABEL: test_sitofp_fadd_i32: 788; CHECK-LIBCALL: # %bb.0: 789; CHECK-LIBCALL-NEXT: subq $40, %rsp 790; CHECK-LIBCALL-NEXT: pinsrw $0, (%rsi), %xmm0 791; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 792; CHECK-LIBCALL-NEXT: xorps %xmm0, %xmm0 793; CHECK-LIBCALL-NEXT: cvtsi2ss %edi, %xmm0 794; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 795; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 796; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 797; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 798; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 799; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 800; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero 801; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 802; CHECK-LIBCALL-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 803; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 804; CHECK-LIBCALL-NEXT: addq $40, %rsp 805; CHECK-LIBCALL-NEXT: jmp __extendhfsf2@PLT # TAILCALL 806; 807; BWON-F16C-LABEL: test_sitofp_fadd_i32: 808; BWON-F16C: # %bb.0: 809; BWON-F16C-NEXT: movzwl (%rsi), %eax 810; BWON-F16C-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 811; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 812; BWON-F16C-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 813; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 814; BWON-F16C-NEXT: vmovd %eax, %xmm1 815; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 816; BWON-F16C-NEXT: vaddss %xmm0, %xmm1, %xmm0 817; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 818; BWON-F16C-NEXT: vmovd %xmm0, %eax 819; BWON-F16C-NEXT: movzwl %ax, %eax 820; BWON-F16C-NEXT: vmovd %eax, %xmm0 821; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 822; BWON-F16C-NEXT: retq 823; 824; CHECK-I686-LABEL: test_sitofp_fadd_i32: 825; CHECK-I686: # %bb.0: 826; CHECK-I686-NEXT: subl $60, %esp 827; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 828; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0 829; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 830; CHECK-I686-NEXT: xorps %xmm0, %xmm0 831; CHECK-I686-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 832; CHECK-I686-NEXT: movss %xmm0, (%esp) 833; CHECK-I686-NEXT: calll __truncsfhf2 834; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 835; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 836; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 837; CHECK-I686-NEXT: movw %ax, (%esp) 838; CHECK-I686-NEXT: calll __extendhfsf2 839; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 840; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 841; CHECK-I686-NEXT: movw %ax, (%esp) 842; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 843; CHECK-I686-NEXT: calll __extendhfsf2 844; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 845; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 846; CHECK-I686-NEXT: addss {{[0-9]+}}(%esp), %xmm0 847; CHECK-I686-NEXT: movss %xmm0, (%esp) 848; CHECK-I686-NEXT: calll __truncsfhf2 849; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 850; CHECK-I686-NEXT: movw %ax, (%esp) 851; CHECK-I686-NEXT: calll __extendhfsf2 852; CHECK-I686-NEXT: addl $60, %esp 853; CHECK-I686-NEXT: retl 854 %tmp0 = load half, ptr %b 855 %tmp1 = sitofp i32 %a to half 856 %tmp2 = fadd half %tmp0, %tmp1 857 %tmp3 = fpext half %tmp2 to float 858 ret float %tmp3 859} 860 861define half @PR40273(half) #0 { 862; CHECK-LIBCALL-LABEL: PR40273: 863; CHECK-LIBCALL: # %bb.0: 864; CHECK-LIBCALL-NEXT: pushq %rax 865; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 866; CHECK-LIBCALL-NEXT: xorl %eax, %eax 867; CHECK-LIBCALL-NEXT: xorps %xmm1, %xmm1 868; CHECK-LIBCALL-NEXT: ucomiss %xmm1, %xmm0 869; CHECK-LIBCALL-NEXT: movl $15360, %ecx # imm = 0x3C00 870; CHECK-LIBCALL-NEXT: cmovnel %ecx, %eax 871; CHECK-LIBCALL-NEXT: cmovpl %ecx, %eax 872; CHECK-LIBCALL-NEXT: pinsrw $0, %eax, %xmm0 873; CHECK-LIBCALL-NEXT: popq %rax 874; CHECK-LIBCALL-NEXT: retq 875; 876; BWON-F16C-LABEL: PR40273: 877; BWON-F16C: # %bb.0: 878; BWON-F16C-NEXT: vpextrw $0, %xmm0, %eax 879; BWON-F16C-NEXT: movzwl %ax, %eax 880; BWON-F16C-NEXT: vmovd %eax, %xmm0 881; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 882; BWON-F16C-NEXT: xorl %eax, %eax 883; BWON-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1 884; BWON-F16C-NEXT: vucomiss %xmm1, %xmm0 885; BWON-F16C-NEXT: movl $15360, %ecx # imm = 0x3C00 886; BWON-F16C-NEXT: cmovnel %ecx, %eax 887; BWON-F16C-NEXT: cmovpl %ecx, %eax 888; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 889; BWON-F16C-NEXT: retq 890; 891; CHECK-I686-LABEL: PR40273: 892; CHECK-I686: # %bb.0: 893; CHECK-I686-NEXT: subl $12, %esp 894; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 895; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 896; CHECK-I686-NEXT: movw %ax, (%esp) 897; CHECK-I686-NEXT: calll __extendhfsf2 898; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 899; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 900; CHECK-I686-NEXT: xorl %eax, %eax 901; CHECK-I686-NEXT: xorps %xmm1, %xmm1 902; CHECK-I686-NEXT: ucomiss %xmm1, %xmm0 903; CHECK-I686-NEXT: movl $15360, %ecx # imm = 0x3C00 904; CHECK-I686-NEXT: cmovnel %ecx, %eax 905; CHECK-I686-NEXT: cmovpl %ecx, %eax 906; CHECK-I686-NEXT: pinsrw $0, %eax, %xmm0 907; CHECK-I686-NEXT: addl $12, %esp 908; CHECK-I686-NEXT: retl 909 %2 = fcmp une half %0, 0xH0000 910 %3 = uitofp i1 %2 to half 911 ret half %3 912} 913 914define dso_local void @brcond(half %0) { 915; CHECK-LIBCALL-LABEL: brcond: 916; CHECK-LIBCALL: # %bb.0: # %entry 917; CHECK-LIBCALL-NEXT: pushq %rax 918; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 16 919; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 920; CHECK-LIBCALL-NEXT: xorps %xmm1, %xmm1 921; CHECK-LIBCALL-NEXT: ucomiss %xmm1, %xmm0 922; CHECK-LIBCALL-NEXT: setp %al 923; CHECK-LIBCALL-NEXT: setne %cl 924; CHECK-LIBCALL-NEXT: orb %al, %cl 925; CHECK-LIBCALL-NEXT: jne .LBB18_2 926; CHECK-LIBCALL-NEXT: # %bb.1: # %if.then 927; CHECK-LIBCALL-NEXT: popq %rax 928; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 8 929; CHECK-LIBCALL-NEXT: retq 930; CHECK-LIBCALL-NEXT: .LBB18_2: # %if.end 931; 932; BWON-F16C-LABEL: brcond: 933; BWON-F16C: # %bb.0: # %entry 934; BWON-F16C-NEXT: vpextrw $0, %xmm0, %eax 935; BWON-F16C-NEXT: movzwl %ax, %eax 936; BWON-F16C-NEXT: vmovd %eax, %xmm0 937; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 938; BWON-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1 939; BWON-F16C-NEXT: vucomiss %xmm1, %xmm0 940; BWON-F16C-NEXT: setp %al 941; BWON-F16C-NEXT: setne %cl 942; BWON-F16C-NEXT: orb %al, %cl 943; BWON-F16C-NEXT: jne .LBB18_2 944; BWON-F16C-NEXT: # %bb.1: # %if.then 945; BWON-F16C-NEXT: retq 946; BWON-F16C-NEXT: .LBB18_2: # %if.end 947; 948; CHECK-I686-LABEL: brcond: 949; CHECK-I686: # %bb.0: # %entry 950; CHECK-I686-NEXT: subl $12, %esp 951; CHECK-I686-NEXT: .cfi_def_cfa_offset 16 952; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 953; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 954; CHECK-I686-NEXT: movw %ax, (%esp) 955; CHECK-I686-NEXT: calll __extendhfsf2 956; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 957; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 958; CHECK-I686-NEXT: xorps %xmm1, %xmm1 959; CHECK-I686-NEXT: ucomiss %xmm1, %xmm0 960; CHECK-I686-NEXT: setp %al 961; CHECK-I686-NEXT: setne %cl 962; CHECK-I686-NEXT: orb %al, %cl 963; CHECK-I686-NEXT: jne .LBB18_2 964; CHECK-I686-NEXT: # %bb.1: # %if.then 965; CHECK-I686-NEXT: addl $12, %esp 966; CHECK-I686-NEXT: .cfi_def_cfa_offset 4 967; CHECK-I686-NEXT: retl 968; CHECK-I686-NEXT: .LBB18_2: # %if.end 969entry: 970 %cmp = fcmp oeq half 0xH0000, %0 971 br i1 %cmp, label %if.then, label %if.end 972 973if.then: ; preds = %entry 974 ret void 975 976if.end: ; preds = %entry 977 unreachable 978} 979 980define half @test_sqrt(half %0) { 981; CHECK-LIBCALL-LABEL: test_sqrt: 982; CHECK-LIBCALL: # %bb.0: # %entry 983; CHECK-LIBCALL-NEXT: pushq %rax 984; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 16 985; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 986; CHECK-LIBCALL-NEXT: sqrtss %xmm0, %xmm0 987; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 988; CHECK-LIBCALL-NEXT: popq %rax 989; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 8 990; CHECK-LIBCALL-NEXT: retq 991; 992; BWON-F16C-LABEL: test_sqrt: 993; BWON-F16C: # %bb.0: # %entry 994; BWON-F16C-NEXT: vpextrw $0, %xmm0, %eax 995; BWON-F16C-NEXT: movzwl %ax, %eax 996; BWON-F16C-NEXT: vmovd %eax, %xmm0 997; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 998; BWON-F16C-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 999; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 1000; BWON-F16C-NEXT: vmovd %xmm0, %eax 1001; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 1002; BWON-F16C-NEXT: retq 1003; 1004; CHECK-I686-LABEL: test_sqrt: 1005; CHECK-I686: # %bb.0: # %entry 1006; CHECK-I686-NEXT: subl $12, %esp 1007; CHECK-I686-NEXT: .cfi_def_cfa_offset 16 1008; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 1009; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 1010; CHECK-I686-NEXT: movw %ax, (%esp) 1011; CHECK-I686-NEXT: calll __extendhfsf2 1012; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1013; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1014; CHECK-I686-NEXT: sqrtss %xmm0, %xmm0 1015; CHECK-I686-NEXT: movss %xmm0, (%esp) 1016; CHECK-I686-NEXT: calll __truncsfhf2 1017; CHECK-I686-NEXT: addl $12, %esp 1018; CHECK-I686-NEXT: .cfi_def_cfa_offset 4 1019; CHECK-I686-NEXT: retl 1020entry: 1021 %1 = call half @llvm.sqrt.f16(half %0) 1022 ret half %1 1023} 1024 1025declare half @llvm.sqrt.f16(half) 1026 1027define void @main.158() local_unnamed_addr #0 { 1028; CHECK-LIBCALL-LABEL: main.158: 1029; CHECK-LIBCALL: # %bb.0: # %entry 1030; CHECK-LIBCALL-NEXT: pushq %rax 1031; CHECK-LIBCALL-NEXT: xorps %xmm0, %xmm0 1032; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 1033; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1034; CHECK-LIBCALL-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1035; CHECK-LIBCALL-NEXT: ucomiss %xmm0, %xmm1 1036; CHECK-LIBCALL-NEXT: xorps %xmm0, %xmm0 1037; CHECK-LIBCALL-NEXT: jae .LBB20_2 1038; CHECK-LIBCALL-NEXT: # %bb.1: # %entry 1039; CHECK-LIBCALL-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1040; CHECK-LIBCALL-NEXT: .LBB20_2: # %entry 1041; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 1042; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 1043; CHECK-LIBCALL-NEXT: movw %ax, (%rax) 1044; CHECK-LIBCALL-NEXT: popq %rax 1045; CHECK-LIBCALL-NEXT: retq 1046; 1047; BWON-F16C-LABEL: main.158: 1048; BWON-F16C: # %bb.0: # %entry 1049; BWON-F16C-NEXT: vxorps %xmm0, %xmm0, %xmm0 1050; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 1051; BWON-F16C-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1052; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 1053; BWON-F16C-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1054; BWON-F16C-NEXT: vucomiss %xmm0, %xmm1 1055; BWON-F16C-NEXT: vxorps %xmm0, %xmm0, %xmm0 1056; BWON-F16C-NEXT: jae .LBB20_2 1057; BWON-F16C-NEXT: # %bb.1: # %entry 1058; BWON-F16C-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1059; BWON-F16C-NEXT: .LBB20_2: # %entry 1060; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 1061; BWON-F16C-NEXT: vmovd %xmm0, %eax 1062; BWON-F16C-NEXT: movw %ax, (%rax) 1063; BWON-F16C-NEXT: retq 1064; 1065; CHECK-I686-LABEL: main.158: 1066; CHECK-I686: # %bb.0: # %entry 1067; CHECK-I686-NEXT: subl $12, %esp 1068; CHECK-I686-NEXT: pxor %xmm0, %xmm0 1069; CHECK-I686-NEXT: movd %xmm0, (%esp) 1070; CHECK-I686-NEXT: calll __truncsfhf2 1071; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 1072; CHECK-I686-NEXT: movw %ax, (%esp) 1073; CHECK-I686-NEXT: calll __extendhfsf2 1074; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1075; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1076; CHECK-I686-NEXT: ucomiss {{[0-9]+}}(%esp), %xmm0 1077; CHECK-I686-NEXT: xorps %xmm0, %xmm0 1078; CHECK-I686-NEXT: jae .LBB20_2 1079; CHECK-I686-NEXT: # %bb.1: # %entry 1080; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1081; CHECK-I686-NEXT: .LBB20_2: # %entry 1082; CHECK-I686-NEXT: movss %xmm0, (%esp) 1083; CHECK-I686-NEXT: calll __truncsfhf2 1084; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 1085; CHECK-I686-NEXT: movw %ax, (%eax) 1086; CHECK-I686-NEXT: addl $12, %esp 1087; CHECK-I686-NEXT: retl 1088entry: 1089 %0 = tail call half @llvm.fabs.f16(half undef) 1090 %1 = fpext half %0 to float 1091 %compare.2 = fcmp ole half %0, 0xH4800 1092 %multiply.95 = fmul float %1, 5.000000e-01 1093 %add.82 = fadd float %multiply.95, -2.000000e+00 1094 %multiply.68 = fmul float %add.82, 0.000000e+00 1095 %subtract.65 = fsub float %multiply.68, 0.000000e+00 1096 %multiply.57 = fmul float undef, 0.000000e+00 1097 %2 = select i1 %compare.2, float 0.000000e+00, float %multiply.57 1098 %3 = fptrunc float %2 to half 1099 store half %3, ptr undef, align 2 1100 ret void 1101} 1102 1103define void @main.45() local_unnamed_addr { 1104; CHECK-LIBCALL-LABEL: main.45: 1105; CHECK-LIBCALL: # %bb.0: # %entry 1106; CHECK-LIBCALL-NEXT: pushq %rbp 1107; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 16 1108; CHECK-LIBCALL-NEXT: pushq %r15 1109; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 24 1110; CHECK-LIBCALL-NEXT: pushq %r14 1111; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 32 1112; CHECK-LIBCALL-NEXT: pushq %rbx 1113; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 40 1114; CHECK-LIBCALL-NEXT: pushq %rax 1115; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 48 1116; CHECK-LIBCALL-NEXT: .cfi_offset %rbx, -40 1117; CHECK-LIBCALL-NEXT: .cfi_offset %r14, -32 1118; CHECK-LIBCALL-NEXT: .cfi_offset %r15, -24 1119; CHECK-LIBCALL-NEXT: .cfi_offset %rbp, -16 1120; CHECK-LIBCALL-NEXT: pinsrw $0, (%rax), %xmm0 1121; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 1122; CHECK-LIBCALL-NEXT: movd %eax, %xmm1 1123; CHECK-LIBCALL-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1124; CHECK-LIBCALL-NEXT: movq %xmm1, %rbx 1125; CHECK-LIBCALL-NEXT: movq %rbx, %r14 1126; CHECK-LIBCALL-NEXT: shrq $48, %r14 1127; CHECK-LIBCALL-NEXT: movq %rbx, %r15 1128; CHECK-LIBCALL-NEXT: shrq $32, %r15 1129; CHECK-LIBCALL-NEXT: movl %ebx, %ebp 1130; CHECK-LIBCALL-NEXT: shrl $16, %ebp 1131; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1132; CHECK-LIBCALL-NEXT: ucomiss %xmm0, %xmm0 1133; CHECK-LIBCALL-NEXT: movl $32256, %eax # imm = 0x7E00 1134; CHECK-LIBCALL-NEXT: cmovpl %eax, %ebp 1135; CHECK-LIBCALL-NEXT: cmovpl %eax, %r15d 1136; CHECK-LIBCALL-NEXT: cmovpl %eax, %r14d 1137; CHECK-LIBCALL-NEXT: cmovpl %eax, %ebx 1138; CHECK-LIBCALL-NEXT: movw %bx, (%rax) 1139; CHECK-LIBCALL-NEXT: movw %r14w, (%rax) 1140; CHECK-LIBCALL-NEXT: movw %r15w, (%rax) 1141; CHECK-LIBCALL-NEXT: movw %bp, (%rax) 1142; CHECK-LIBCALL-NEXT: addq $8, %rsp 1143; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 40 1144; CHECK-LIBCALL-NEXT: popq %rbx 1145; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 32 1146; CHECK-LIBCALL-NEXT: popq %r14 1147; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 24 1148; CHECK-LIBCALL-NEXT: popq %r15 1149; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 16 1150; CHECK-LIBCALL-NEXT: popq %rbp 1151; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 8 1152; CHECK-LIBCALL-NEXT: retq 1153; 1154; BWON-F16C-LABEL: main.45: 1155; BWON-F16C: # %bb.0: # %entry 1156; BWON-F16C-NEXT: movzwl (%rax), %eax 1157; BWON-F16C-NEXT: vmovd %eax, %xmm0 1158; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7] 1159; BWON-F16C-NEXT: vmovq %xmm1, %rax 1160; BWON-F16C-NEXT: movq %rax, %rcx 1161; BWON-F16C-NEXT: shrq $48, %rcx 1162; BWON-F16C-NEXT: movq %rax, %rdx 1163; BWON-F16C-NEXT: shrq $32, %rdx 1164; BWON-F16C-NEXT: movl %eax, %esi 1165; BWON-F16C-NEXT: shrl $16, %esi 1166; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 1167; BWON-F16C-NEXT: vucomiss %xmm0, %xmm0 1168; BWON-F16C-NEXT: movl $32256, %edi # imm = 0x7E00 1169; BWON-F16C-NEXT: cmovpl %edi, %esi 1170; BWON-F16C-NEXT: cmovpl %edi, %edx 1171; BWON-F16C-NEXT: cmovpl %edi, %ecx 1172; BWON-F16C-NEXT: cmovpl %edi, %eax 1173; BWON-F16C-NEXT: movw %ax, (%rax) 1174; BWON-F16C-NEXT: movw %cx, (%rax) 1175; BWON-F16C-NEXT: movw %dx, (%rax) 1176; BWON-F16C-NEXT: movw %si, (%rax) 1177; BWON-F16C-NEXT: retq 1178; 1179; CHECK-I686-LABEL: main.45: 1180; CHECK-I686: # %bb.0: # %entry 1181; CHECK-I686-NEXT: pushl %edi 1182; CHECK-I686-NEXT: .cfi_def_cfa_offset 8 1183; CHECK-I686-NEXT: pushl %esi 1184; CHECK-I686-NEXT: .cfi_def_cfa_offset 12 1185; CHECK-I686-NEXT: subl $20, %esp 1186; CHECK-I686-NEXT: .cfi_def_cfa_offset 32 1187; CHECK-I686-NEXT: .cfi_offset %esi, -12 1188; CHECK-I686-NEXT: .cfi_offset %edi, -8 1189; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0 1190; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 1191; CHECK-I686-NEXT: movd %eax, %xmm0 1192; CHECK-I686-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1193; CHECK-I686-NEXT: movd %xmm0, %esi 1194; CHECK-I686-NEXT: movl %esi, %edi 1195; CHECK-I686-NEXT: shrl $16, %edi 1196; CHECK-I686-NEXT: movw %ax, (%esp) 1197; CHECK-I686-NEXT: calll __extendhfsf2 1198; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1199; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1200; CHECK-I686-NEXT: ucomiss %xmm0, %xmm0 1201; CHECK-I686-NEXT: movl $32256, %eax # imm = 0x7E00 1202; CHECK-I686-NEXT: cmovpl %eax, %esi 1203; CHECK-I686-NEXT: cmovpl %eax, %edi 1204; CHECK-I686-NEXT: movw %di, (%eax) 1205; CHECK-I686-NEXT: movw %si, (%eax) 1206; CHECK-I686-NEXT: addl $20, %esp 1207; CHECK-I686-NEXT: .cfi_def_cfa_offset 12 1208; CHECK-I686-NEXT: popl %esi 1209; CHECK-I686-NEXT: .cfi_def_cfa_offset 8 1210; CHECK-I686-NEXT: popl %edi 1211; CHECK-I686-NEXT: .cfi_def_cfa_offset 4 1212; CHECK-I686-NEXT: retl 1213entry: 1214 %0 = load half, ptr undef, align 8 1215 %1 = bitcast half %0 to i16 1216 %broadcast.splatinsert = insertelement <4 x half> poison, half %0, i64 0 1217 %broadcast.splat = shufflevector <4 x half> %broadcast.splatinsert, <4 x half> poison, <4 x i32> zeroinitializer 1218 %broadcast.splatinsert13 = insertelement <4 x i16> poison, i16 %1, i64 0 1219 %broadcast.splat14 = shufflevector <4 x i16> %broadcast.splatinsert13, <4 x i16> poison, <4 x i32> zeroinitializer 1220 %2 = fcmp uno <4 x half> %broadcast.splat, zeroinitializer 1221 %3 = add <4 x i16> zeroinitializer, %broadcast.splat14 1222 %4 = select i1 undef, <4 x i16> undef, <4 x i16> %3 1223 %5 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> %4 1224 %6 = bitcast <4 x i16> %5 to <4 x half> 1225 %7 = select <4 x i1> %2, <4 x half> <half 0xH7E00, half 0xH7E00, half 0xH7E00, half 0xH7E00>, <4 x half> %6 1226 store <4 x half> %7, ptr undef, align 16 1227 ret void 1228} 1229 1230define half @fcopysign(half %x, half %y) { 1231; CHECK-LIBCALL-LABEL: fcopysign: 1232; CHECK-LIBCALL: # %bb.0: 1233; CHECK-LIBCALL-NEXT: pextrw $0, %xmm1, %eax 1234; CHECK-LIBCALL-NEXT: andl $-32768, %eax # imm = 0x8000 1235; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %ecx 1236; CHECK-LIBCALL-NEXT: andl $32767, %ecx # imm = 0x7FFF 1237; CHECK-LIBCALL-NEXT: orl %eax, %ecx 1238; CHECK-LIBCALL-NEXT: pinsrw $0, %ecx, %xmm0 1239; CHECK-LIBCALL-NEXT: retq 1240; 1241; BWON-F16C-LABEL: fcopysign: 1242; BWON-F16C: # %bb.0: 1243; BWON-F16C-NEXT: vpextrw $0, %xmm1, %eax 1244; BWON-F16C-NEXT: andl $-32768, %eax # imm = 0x8000 1245; BWON-F16C-NEXT: vpextrw $0, %xmm0, %ecx 1246; BWON-F16C-NEXT: andl $32767, %ecx # imm = 0x7FFF 1247; BWON-F16C-NEXT: orl %eax, %ecx 1248; BWON-F16C-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0 1249; BWON-F16C-NEXT: retq 1250; 1251; CHECK-I686-LABEL: fcopysign: 1252; CHECK-I686: # %bb.0: 1253; CHECK-I686-NEXT: movl $-32768, %eax # imm = 0x8000 1254; CHECK-I686-NEXT: andl {{[0-9]+}}(%esp), %eax 1255; CHECK-I686-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 1256; CHECK-I686-NEXT: andl $32767, %ecx # imm = 0x7FFF 1257; CHECK-I686-NEXT: orl %eax, %ecx 1258; CHECK-I686-NEXT: pinsrw $0, %ecx, %xmm0 1259; CHECK-I686-NEXT: retl 1260 %a = call half @llvm.copysign.f16(half %x, half %y) 1261 ret half %a 1262} 1263 1264declare half @llvm.fabs.f16(half) 1265declare half @llvm.copysign.f16(half, half) 1266 1267attributes #0 = { nounwind } 1268