1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=1 \ 3; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWON 4; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=-f16c -fixup-byte-word-insts=0 \ 5; RUN: | FileCheck %s -check-prefixes=CHECK,CHECK-LIBCALL,BWOFF 6; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+f16c -fixup-byte-word-insts=1 \ 7; RUN: | FileCheck %s -check-prefixes=CHECK,BWON,BWON-F16C 8; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr +sse2 -fixup-byte-word-insts=0 \ 9; RUN: | FileCheck %s -check-prefixes=CHECK-I686 10 11define void @test_load_store(ptr %in, ptr %out) #0 { 12; CHECK-LIBCALL-LABEL: test_load_store: 13; CHECK-LIBCALL: # %bb.0: 14; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 15; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 16; CHECK-LIBCALL-NEXT: movw %ax, (%rsi) 17; CHECK-LIBCALL-NEXT: retq 18; 19; BWON-F16C-LABEL: test_load_store: 20; BWON-F16C: # %bb.0: 21; BWON-F16C-NEXT: vpinsrw $0, (%rdi), %xmm0, %xmm0 22; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rsi) 23; BWON-F16C-NEXT: retq 24; 25; CHECK-I686-LABEL: test_load_store: 26; CHECK-I686: # %bb.0: 27; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 28; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ecx 29; CHECK-I686-NEXT: pinsrw $0, (%ecx), %xmm0 30; CHECK-I686-NEXT: pextrw $0, %xmm0, %ecx 31; CHECK-I686-NEXT: movw %cx, (%eax) 32; CHECK-I686-NEXT: retl 33 %val = load half, ptr %in 34 store half %val, ptr %out 35 ret void 36} 37 38define i16 @test_bitcast_from_half(ptr %addr) #0 { 39; BWON-LABEL: test_bitcast_from_half: 40; BWON: # %bb.0: 41; BWON-NEXT: movzwl (%rdi), %eax 42; BWON-NEXT: retq 43; 44; BWOFF-LABEL: test_bitcast_from_half: 45; BWOFF: # %bb.0: 46; BWOFF-NEXT: movw (%rdi), %ax 47; BWOFF-NEXT: retq 48; 49; CHECK-I686-LABEL: test_bitcast_from_half: 50; CHECK-I686: # %bb.0: 51; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 52; CHECK-I686-NEXT: movw (%eax), %ax 53; CHECK-I686-NEXT: retl 54 %val = load half, ptr %addr 55 %val_int = bitcast half %val to i16 56 ret i16 %val_int 57} 58 59define void @test_bitcast_to_half(ptr %addr, i16 %in) #0 { 60; CHECK-LABEL: test_bitcast_to_half: 61; CHECK: # %bb.0: 62; CHECK-NEXT: movw %si, (%rdi) 63; CHECK-NEXT: retq 64; 65; CHECK-I686-LABEL: test_bitcast_to_half: 66; CHECK-I686: # %bb.0: 67; CHECK-I686-NEXT: movw {{[0-9]+}}(%esp), %ax 68; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %ecx 69; CHECK-I686-NEXT: movw %ax, (%ecx) 70; CHECK-I686-NEXT: retl 71 %val_fp = bitcast i16 %in to half 72 store half %val_fp, ptr %addr 73 ret void 74} 75 76define float @test_extend32(ptr %addr) #0 { 77; CHECK-LIBCALL-LABEL: test_extend32: 78; CHECK-LIBCALL: # %bb.0: 79; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 80; CHECK-LIBCALL-NEXT: jmp __extendhfsf2@PLT # TAILCALL 81; 82; BWON-F16C-LABEL: test_extend32: 83; BWON-F16C: # %bb.0: 84; BWON-F16C-NEXT: movzwl (%rdi), %eax 85; BWON-F16C-NEXT: vmovd %eax, %xmm0 86; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 87; BWON-F16C-NEXT: retq 88; 89; CHECK-I686-LABEL: test_extend32: 90; CHECK-I686: # %bb.0: 91; CHECK-I686-NEXT: subl $12, %esp 92; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 93; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0 94; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 95; CHECK-I686-NEXT: movw %ax, (%esp) 96; CHECK-I686-NEXT: calll __extendhfsf2 97; CHECK-I686-NEXT: addl $12, %esp 98; CHECK-I686-NEXT: retl 99 %val16 = load half, ptr %addr 100 %val32 = fpext half %val16 to float 101 ret float %val32 102} 103 104define double @test_extend64(ptr %addr) #0 { 105; CHECK-LIBCALL-LABEL: test_extend64: 106; CHECK-LIBCALL: # %bb.0: 107; CHECK-LIBCALL-NEXT: pushq %rax 108; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 109; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 110; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 111; CHECK-LIBCALL-NEXT: popq %rax 112; CHECK-LIBCALL-NEXT: retq 113; 114; BWON-F16C-LABEL: test_extend64: 115; BWON-F16C: # %bb.0: 116; BWON-F16C-NEXT: movzwl (%rdi), %eax 117; BWON-F16C-NEXT: vmovd %eax, %xmm0 118; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 119; BWON-F16C-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 120; BWON-F16C-NEXT: retq 121; 122; CHECK-I686-LABEL: test_extend64: 123; CHECK-I686: # %bb.0: 124; CHECK-I686-NEXT: subl $12, %esp 125; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 126; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0 127; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 128; CHECK-I686-NEXT: movw %ax, (%esp) 129; CHECK-I686-NEXT: calll __extendhfsf2 130; CHECK-I686-NEXT: addl $12, %esp 131; CHECK-I686-NEXT: retl 132 %val16 = load half, ptr %addr 133 %val32 = fpext half %val16 to double 134 ret double %val32 135} 136 137define void @test_trunc32(float %in, ptr %addr) #0 { 138; CHECK-LIBCALL-LABEL: test_trunc32: 139; CHECK-LIBCALL: # %bb.0: 140; CHECK-LIBCALL-NEXT: pushq %rbx 141; CHECK-LIBCALL-NEXT: movq %rdi, %rbx 142; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 143; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 144; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 145; CHECK-LIBCALL-NEXT: popq %rbx 146; CHECK-LIBCALL-NEXT: retq 147; 148; BWON-F16C-LABEL: test_trunc32: 149; BWON-F16C: # %bb.0: 150; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 151; BWON-F16C-NEXT: vmovd %xmm0, %eax 152; BWON-F16C-NEXT: movw %ax, (%rdi) 153; BWON-F16C-NEXT: retq 154; 155; CHECK-I686-LABEL: test_trunc32: 156; CHECK-I686: # %bb.0: 157; CHECK-I686-NEXT: pushl %esi 158; CHECK-I686-NEXT: subl $8, %esp 159; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 160; CHECK-I686-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 161; CHECK-I686-NEXT: movd %xmm0, (%esp) 162; CHECK-I686-NEXT: calll __truncsfhf2 163; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 164; CHECK-I686-NEXT: movw %ax, (%esi) 165; CHECK-I686-NEXT: addl $8, %esp 166; CHECK-I686-NEXT: popl %esi 167; CHECK-I686-NEXT: retl 168 %val16 = fptrunc float %in to half 169 store half %val16, ptr %addr 170 ret void 171} 172 173define void @test_trunc64(double %in, ptr %addr) #0 { 174; CHECK-LIBCALL-LABEL: test_trunc64: 175; CHECK-LIBCALL: # %bb.0: 176; CHECK-LIBCALL-NEXT: pushq %rbx 177; CHECK-LIBCALL-NEXT: movq %rdi, %rbx 178; CHECK-LIBCALL-NEXT: callq __truncdfhf2@PLT 179; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 180; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 181; CHECK-LIBCALL-NEXT: popq %rbx 182; CHECK-LIBCALL-NEXT: retq 183; 184; BWON-F16C-LABEL: test_trunc64: 185; BWON-F16C: # %bb.0: 186; BWON-F16C-NEXT: pushq %rbx 187; BWON-F16C-NEXT: movq %rdi, %rbx 188; BWON-F16C-NEXT: callq __truncdfhf2@PLT 189; BWON-F16C-NEXT: vpextrw $0, %xmm0, (%rbx) 190; BWON-F16C-NEXT: popq %rbx 191; BWON-F16C-NEXT: retq 192; 193; CHECK-I686-LABEL: test_trunc64: 194; CHECK-I686: # %bb.0: 195; CHECK-I686-NEXT: pushl %esi 196; CHECK-I686-NEXT: subl $8, %esp 197; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 198; CHECK-I686-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 199; CHECK-I686-NEXT: movq %xmm0, (%esp) 200; CHECK-I686-NEXT: calll __truncdfhf2 201; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 202; CHECK-I686-NEXT: movw %ax, (%esi) 203; CHECK-I686-NEXT: addl $8, %esp 204; CHECK-I686-NEXT: popl %esi 205; CHECK-I686-NEXT: retl 206 %val16 = fptrunc double %in to half 207 store half %val16, ptr %addr 208 ret void 209} 210 211define i64 @test_fptosi_i64(ptr %p) #0 { 212; CHECK-LIBCALL-LABEL: test_fptosi_i64: 213; CHECK-LIBCALL: # %bb.0: 214; CHECK-LIBCALL-NEXT: pushq %rax 215; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 216; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 217; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax 218; CHECK-LIBCALL-NEXT: popq %rcx 219; CHECK-LIBCALL-NEXT: retq 220; 221; BWON-F16C-LABEL: test_fptosi_i64: 222; BWON-F16C: # %bb.0: 223; BWON-F16C-NEXT: movzwl (%rdi), %eax 224; BWON-F16C-NEXT: vmovd %eax, %xmm0 225; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 226; BWON-F16C-NEXT: vcvttss2si %xmm0, %rax 227; BWON-F16C-NEXT: retq 228; 229; CHECK-I686-LABEL: test_fptosi_i64: 230; CHECK-I686: # %bb.0: 231; CHECK-I686-NEXT: subl $28, %esp 232; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 233; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0 234; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 235; CHECK-I686-NEXT: movw %ax, (%esp) 236; CHECK-I686-NEXT: calll __extendhfsf2 237; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 238; CHECK-I686-NEXT: flds {{[0-9]+}}(%esp) 239; CHECK-I686-NEXT: fnstcw {{[0-9]+}}(%esp) 240; CHECK-I686-NEXT: movzwl {{[0-9]+}}(%esp), %eax 241; CHECK-I686-NEXT: orl $3072, %eax # imm = 0xC00 242; CHECK-I686-NEXT: movw %ax, {{[0-9]+}}(%esp) 243; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp) 244; CHECK-I686-NEXT: fistpll {{[0-9]+}}(%esp) 245; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp) 246; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 247; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %edx 248; CHECK-I686-NEXT: addl $28, %esp 249; CHECK-I686-NEXT: retl 250 %a = load half, ptr %p, align 2 251 %r = fptosi half %a to i64 252 ret i64 %r 253} 254 255define void @test_sitofp_i64(i64 %a, ptr %p) #0 { 256; CHECK-LIBCALL-LABEL: test_sitofp_i64: 257; CHECK-LIBCALL: # %bb.0: 258; CHECK-LIBCALL-NEXT: pushq %rbx 259; CHECK-LIBCALL-NEXT: movq %rsi, %rbx 260; CHECK-LIBCALL-NEXT: cvtsi2ss %rdi, %xmm0 261; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 262; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 263; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 264; CHECK-LIBCALL-NEXT: popq %rbx 265; CHECK-LIBCALL-NEXT: retq 266; 267; BWON-F16C-LABEL: test_sitofp_i64: 268; BWON-F16C: # %bb.0: 269; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 270; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 271; BWON-F16C-NEXT: vmovd %xmm0, %eax 272; BWON-F16C-NEXT: movw %ax, (%rsi) 273; BWON-F16C-NEXT: retq 274; 275; CHECK-I686-LABEL: test_sitofp_i64: 276; CHECK-I686: # %bb.0: 277; CHECK-I686-NEXT: pushl %esi 278; CHECK-I686-NEXT: subl $24, %esp 279; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 280; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 281; CHECK-I686-NEXT: movlps %xmm0, {{[0-9]+}}(%esp) 282; CHECK-I686-NEXT: fildll {{[0-9]+}}(%esp) 283; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 284; CHECK-I686-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 285; CHECK-I686-NEXT: movd %xmm0, (%esp) 286; CHECK-I686-NEXT: calll __truncsfhf2 287; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 288; CHECK-I686-NEXT: movw %ax, (%esi) 289; CHECK-I686-NEXT: addl $24, %esp 290; CHECK-I686-NEXT: popl %esi 291; CHECK-I686-NEXT: retl 292 %r = sitofp i64 %a to half 293 store half %r, ptr %p 294 ret void 295} 296 297define i64 @test_fptoui_i64(ptr %p) #0 { 298; CHECK-LIBCALL-LABEL: test_fptoui_i64: 299; CHECK-LIBCALL: # %bb.0: 300; CHECK-LIBCALL-NEXT: pushq %rax 301; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 302; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 303; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rcx 304; CHECK-LIBCALL-NEXT: movq %rcx, %rdx 305; CHECK-LIBCALL-NEXT: sarq $63, %rdx 306; CHECK-LIBCALL-NEXT: subss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0 307; CHECK-LIBCALL-NEXT: cvttss2si %xmm0, %rax 308; CHECK-LIBCALL-NEXT: andq %rdx, %rax 309; CHECK-LIBCALL-NEXT: orq %rcx, %rax 310; CHECK-LIBCALL-NEXT: popq %rcx 311; CHECK-LIBCALL-NEXT: retq 312; 313; BWON-F16C-LABEL: test_fptoui_i64: 314; BWON-F16C: # %bb.0: 315; BWON-F16C-NEXT: movzwl (%rdi), %eax 316; BWON-F16C-NEXT: vmovd %eax, %xmm0 317; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 318; BWON-F16C-NEXT: vcvttss2si %xmm0, %rcx 319; BWON-F16C-NEXT: movq %rcx, %rdx 320; BWON-F16C-NEXT: sarq $63, %rdx 321; BWON-F16C-NEXT: vsubss {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 322; BWON-F16C-NEXT: vcvttss2si %xmm0, %rax 323; BWON-F16C-NEXT: andq %rdx, %rax 324; BWON-F16C-NEXT: orq %rcx, %rax 325; BWON-F16C-NEXT: retq 326; 327; CHECK-I686-LABEL: test_fptoui_i64: 328; CHECK-I686: # %bb.0: 329; CHECK-I686-NEXT: subl $28, %esp 330; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 331; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0 332; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 333; CHECK-I686-NEXT: movw %ax, (%esp) 334; CHECK-I686-NEXT: calll __extendhfsf2 335; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 336; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 337; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 338; CHECK-I686-NEXT: ucomiss %xmm1, %xmm0 339; CHECK-I686-NEXT: jae .LBB9_2 340; CHECK-I686-NEXT: # %bb.1: 341; CHECK-I686-NEXT: xorps %xmm1, %xmm1 342; CHECK-I686-NEXT: .LBB9_2: 343; CHECK-I686-NEXT: subss %xmm1, %xmm0 344; CHECK-I686-NEXT: movss %xmm0, {{[0-9]+}}(%esp) 345; CHECK-I686-NEXT: setae %al 346; CHECK-I686-NEXT: flds {{[0-9]+}}(%esp) 347; CHECK-I686-NEXT: fnstcw {{[0-9]+}}(%esp) 348; CHECK-I686-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 349; CHECK-I686-NEXT: orl $3072, %ecx # imm = 0xC00 350; CHECK-I686-NEXT: movw %cx, {{[0-9]+}}(%esp) 351; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp) 352; CHECK-I686-NEXT: fistpll {{[0-9]+}}(%esp) 353; CHECK-I686-NEXT: fldcw {{[0-9]+}}(%esp) 354; CHECK-I686-NEXT: movzbl %al, %edx 355; CHECK-I686-NEXT: shll $31, %edx 356; CHECK-I686-NEXT: xorl {{[0-9]+}}(%esp), %edx 357; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 358; CHECK-I686-NEXT: addl $28, %esp 359; CHECK-I686-NEXT: retl 360 %a = load half, ptr %p, align 2 361 %r = fptoui half %a to i64 362 ret i64 %r 363} 364 365define void @test_uitofp_i64(i64 %a, ptr %p) #0 { 366; CHECK-LIBCALL-LABEL: test_uitofp_i64: 367; CHECK-LIBCALL: # %bb.0: 368; CHECK-LIBCALL-NEXT: pushq %rbx 369; CHECK-LIBCALL-NEXT: movq %rsi, %rbx 370; CHECK-LIBCALL-NEXT: testq %rdi, %rdi 371; CHECK-LIBCALL-NEXT: js .LBB10_1 372; CHECK-LIBCALL-NEXT: # %bb.2: 373; CHECK-LIBCALL-NEXT: cvtsi2ss %rdi, %xmm0 374; CHECK-LIBCALL-NEXT: jmp .LBB10_3 375; CHECK-LIBCALL-NEXT: .LBB10_1: 376; CHECK-LIBCALL-NEXT: movq %rdi, %rax 377; CHECK-LIBCALL-NEXT: shrq %rax 378; CHECK-LIBCALL-NEXT: andl $1, %edi 379; CHECK-LIBCALL-NEXT: orq %rax, %rdi 380; CHECK-LIBCALL-NEXT: cvtsi2ss %rdi, %xmm0 381; CHECK-LIBCALL-NEXT: addss %xmm0, %xmm0 382; CHECK-LIBCALL-NEXT: .LBB10_3: 383; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 384; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 385; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 386; CHECK-LIBCALL-NEXT: popq %rbx 387; CHECK-LIBCALL-NEXT: retq 388; 389; BWON-F16C-LABEL: test_uitofp_i64: 390; BWON-F16C: # %bb.0: 391; BWON-F16C-NEXT: testq %rdi, %rdi 392; BWON-F16C-NEXT: js .LBB10_1 393; BWON-F16C-NEXT: # %bb.2: 394; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 395; BWON-F16C-NEXT: jmp .LBB10_3 396; BWON-F16C-NEXT: .LBB10_1: 397; BWON-F16C-NEXT: movq %rdi, %rax 398; BWON-F16C-NEXT: shrq %rax 399; BWON-F16C-NEXT: andl $1, %edi 400; BWON-F16C-NEXT: orq %rax, %rdi 401; BWON-F16C-NEXT: vcvtsi2ss %rdi, %xmm0, %xmm0 402; BWON-F16C-NEXT: vaddss %xmm0, %xmm0, %xmm0 403; BWON-F16C-NEXT: .LBB10_3: 404; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 405; BWON-F16C-NEXT: vmovd %xmm0, %eax 406; BWON-F16C-NEXT: movw %ax, (%rsi) 407; BWON-F16C-NEXT: retq 408; 409; CHECK-I686-LABEL: test_uitofp_i64: 410; CHECK-I686: # %bb.0: 411; CHECK-I686-NEXT: pushl %esi 412; CHECK-I686-NEXT: subl $24, %esp 413; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 414; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 415; CHECK-I686-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 416; CHECK-I686-NEXT: movq %xmm0, {{[0-9]+}}(%esp) 417; CHECK-I686-NEXT: shrl $31, %eax 418; CHECK-I686-NEXT: fildll {{[0-9]+}}(%esp) 419; CHECK-I686-NEXT: fadds {{\.?LCPI[0-9]+_[0-9]+}}(,%eax,4) 420; CHECK-I686-NEXT: fstps (%esp) 421; CHECK-I686-NEXT: calll __truncsfhf2 422; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 423; CHECK-I686-NEXT: movw %ax, (%esi) 424; CHECK-I686-NEXT: addl $24, %esp 425; CHECK-I686-NEXT: popl %esi 426; CHECK-I686-NEXT: retl 427 %r = uitofp i64 %a to half 428 store half %r, ptr %p 429 ret void 430} 431 432define <4 x float> @test_extend32_vec4(ptr %p) #0 { 433; CHECK-LIBCALL-LABEL: test_extend32_vec4: 434; CHECK-LIBCALL: # %bb.0: 435; CHECK-LIBCALL-NEXT: subq $72, %rsp 436; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 437; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 438; CHECK-LIBCALL-NEXT: pinsrw $0, 2(%rdi), %xmm0 439; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 440; CHECK-LIBCALL-NEXT: pinsrw $0, 4(%rdi), %xmm0 441; CHECK-LIBCALL-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill 442; CHECK-LIBCALL-NEXT: pinsrw $0, 6(%rdi), %xmm0 443; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 444; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 445; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 446; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 447; CHECK-LIBCALL-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 448; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 449; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 450; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 451; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 452; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 453; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 454; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 455; CHECK-LIBCALL-NEXT: unpcklps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 456; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 457; CHECK-LIBCALL-NEXT: unpcklpd (%rsp), %xmm0 # 16-byte Folded Reload 458; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0] 459; CHECK-LIBCALL-NEXT: addq $72, %rsp 460; CHECK-LIBCALL-NEXT: retq 461; 462; BWON-F16C-LABEL: test_extend32_vec4: 463; BWON-F16C: # %bb.0: 464; BWON-F16C-NEXT: vcvtph2ps (%rdi), %xmm0 465; BWON-F16C-NEXT: retq 466; 467; CHECK-I686-LABEL: test_extend32_vec4: 468; CHECK-I686: # %bb.0: 469; CHECK-I686-NEXT: pushl %esi 470; CHECK-I686-NEXT: subl $88, %esp 471; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 472; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0 473; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 474; CHECK-I686-NEXT: pinsrw $0, 6(%eax), %xmm0 475; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 476; CHECK-I686-NEXT: pinsrw $0, 4(%eax), %xmm0 477; CHECK-I686-NEXT: pinsrw $0, 2(%eax), %xmm1 478; CHECK-I686-NEXT: pextrw $0, %xmm1, %eax 479; CHECK-I686-NEXT: movw %ax, (%esp) 480; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi 481; CHECK-I686-NEXT: calll __extendhfsf2 482; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 483; CHECK-I686-NEXT: movw %si, (%esp) 484; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 485; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi 486; CHECK-I686-NEXT: calll __extendhfsf2 487; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 488; CHECK-I686-NEXT: movw %si, (%esp) 489; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 490; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi 491; CHECK-I686-NEXT: calll __extendhfsf2 492; CHECK-I686-NEXT: movw %si, (%esp) 493; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 494; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 495; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 496; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 497; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 498; CHECK-I686-NEXT: calll __extendhfsf2 499; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 500; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 501; CHECK-I686-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 502; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 503; CHECK-I686-NEXT: movss {{.*#+}} xmm2 = mem[0],zero,zero,zero 504; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 505; CHECK-I686-NEXT: unpcklps {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 506; CHECK-I686-NEXT: movlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0] 507; CHECK-I686-NEXT: addl $88, %esp 508; CHECK-I686-NEXT: popl %esi 509; CHECK-I686-NEXT: retl 510 %a = load <4 x half>, ptr %p, align 8 511 %b = fpext <4 x half> %a to <4 x float> 512 ret <4 x float> %b 513} 514 515define <4 x double> @test_extend64_vec4(ptr %p) #0 { 516; CHECK-LIBCALL-LABEL: test_extend64_vec4: 517; CHECK-LIBCALL: # %bb.0: 518; CHECK-LIBCALL-NEXT: subq $72, %rsp 519; CHECK-LIBCALL-NEXT: pinsrw $0, 4(%rdi), %xmm0 520; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 521; CHECK-LIBCALL-NEXT: pinsrw $0, 6(%rdi), %xmm0 522; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 523; CHECK-LIBCALL-NEXT: pinsrw $0, (%rdi), %xmm0 524; CHECK-LIBCALL-NEXT: movdqa %xmm0, (%rsp) # 16-byte Spill 525; CHECK-LIBCALL-NEXT: pinsrw $0, 2(%rdi), %xmm0 526; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 527; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 528; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 529; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 530; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 531; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 532; CHECK-LIBCALL-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 533; CHECK-LIBCALL-NEXT: # xmm0 = xmm0[0],mem[0] 534; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 535; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 536; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 537; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm0 538; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 539; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 540; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 541; CHECK-LIBCALL-NEXT: cvtss2sd %xmm0, %xmm1 542; CHECK-LIBCALL-NEXT: unpcklpd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Folded Reload 543; CHECK-LIBCALL-NEXT: # xmm1 = xmm1[0],mem[0] 544; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 545; CHECK-LIBCALL-NEXT: addq $72, %rsp 546; CHECK-LIBCALL-NEXT: retq 547; 548; BWON-F16C-LABEL: test_extend64_vec4: 549; BWON-F16C: # %bb.0: 550; BWON-F16C-NEXT: vcvtph2ps (%rdi), %xmm0 551; BWON-F16C-NEXT: vcvtps2pd %xmm0, %ymm0 552; BWON-F16C-NEXT: retq 553; 554; CHECK-I686-LABEL: test_extend64_vec4: 555; CHECK-I686: # %bb.0: 556; CHECK-I686-NEXT: pushl %esi 557; CHECK-I686-NEXT: subl $104, %esp 558; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 559; CHECK-I686-NEXT: pinsrw $0, 6(%eax), %xmm0 560; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 561; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0 562; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 563; CHECK-I686-NEXT: pinsrw $0, 2(%eax), %xmm0 564; CHECK-I686-NEXT: pinsrw $0, 4(%eax), %xmm1 565; CHECK-I686-NEXT: pextrw $0, %xmm1, %eax 566; CHECK-I686-NEXT: movw %ax, (%esp) 567; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi 568; CHECK-I686-NEXT: calll __extendhfsf2 569; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 570; CHECK-I686-NEXT: movw %si, (%esp) 571; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 572; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi 573; CHECK-I686-NEXT: calll __extendhfsf2 574; CHECK-I686-NEXT: fstpt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Spill 575; CHECK-I686-NEXT: movw %si, (%esp) 576; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 577; CHECK-I686-NEXT: pextrw $0, %xmm0, %esi 578; CHECK-I686-NEXT: calll __extendhfsf2 579; CHECK-I686-NEXT: movw %si, (%esp) 580; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 581; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 582; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 583; CHECK-I686-NEXT: fldt {{[-0-9]+}}(%e{{[sb]}}p) # 10-byte Folded Reload 584; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 585; CHECK-I686-NEXT: calll __extendhfsf2 586; CHECK-I686-NEXT: fstpl {{[0-9]+}}(%esp) 587; CHECK-I686-NEXT: movsd {{.*#+}} xmm0 = mem[0],zero 588; CHECK-I686-NEXT: movhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1] 589; CHECK-I686-NEXT: movsd {{.*#+}} xmm1 = mem[0],zero 590; CHECK-I686-NEXT: movhps {{.*#+}} xmm1 = xmm1[0,1],mem[0,1] 591; CHECK-I686-NEXT: addl $104, %esp 592; CHECK-I686-NEXT: popl %esi 593; CHECK-I686-NEXT: retl 594 %a = load <4 x half>, ptr %p, align 8 595 %b = fpext <4 x half> %a to <4 x double> 596 ret <4 x double> %b 597} 598 599define void @test_trunc32_vec4(<4 x float> %a, ptr %p) #0 { 600; CHECK-LIBCALL-LABEL: test_trunc32_vec4: 601; CHECK-LIBCALL: # %bb.0: 602; CHECK-LIBCALL-NEXT: pushq %rbx 603; CHECK-LIBCALL-NEXT: subq $64, %rsp 604; CHECK-LIBCALL-NEXT: movq %rdi, %rbx 605; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 606; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm0 = xmm0[1,1,1,1] 607; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 608; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 609; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 610; CHECK-LIBCALL-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 611; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 612; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 613; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 614; CHECK-LIBCALL-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 615; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 616; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 617; CHECK-LIBCALL-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload 618; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 619; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 620; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 621; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 622; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 623; CHECK-LIBCALL-NEXT: movw %ax, 6(%rbx) 624; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 625; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 626; CHECK-LIBCALL-NEXT: movw %ax, 4(%rbx) 627; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 628; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 629; CHECK-LIBCALL-NEXT: movw %ax, 2(%rbx) 630; CHECK-LIBCALL-NEXT: addq $64, %rsp 631; CHECK-LIBCALL-NEXT: popq %rbx 632; CHECK-LIBCALL-NEXT: retq 633; 634; BWON-F16C-LABEL: test_trunc32_vec4: 635; BWON-F16C: # %bb.0: 636; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, (%rdi) 637; BWON-F16C-NEXT: retq 638; 639; CHECK-I686-LABEL: test_trunc32_vec4: 640; CHECK-I686: # %bb.0: 641; CHECK-I686-NEXT: pushl %esi 642; CHECK-I686-NEXT: subl $88, %esp 643; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 644; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 645; CHECK-I686-NEXT: movaps %xmm0, %xmm1 646; CHECK-I686-NEXT: shufps {{.*#+}} xmm1 = xmm1[1,1],xmm0[1,1] 647; CHECK-I686-NEXT: movss %xmm1, (%esp) 648; CHECK-I686-NEXT: calll __truncsfhf2 649; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 650; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 651; CHECK-I686-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 652; CHECK-I686-NEXT: movss %xmm0, (%esp) 653; CHECK-I686-NEXT: calll __truncsfhf2 654; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 655; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 656; CHECK-I686-NEXT: shufps {{.*#+}} xmm0 = xmm0[3,3,3,3] 657; CHECK-I686-NEXT: movss %xmm0, (%esp) 658; CHECK-I686-NEXT: calll __truncsfhf2 659; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 660; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 661; CHECK-I686-NEXT: movd %xmm0, (%esp) 662; CHECK-I686-NEXT: calll __truncsfhf2 663; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 664; CHECK-I686-NEXT: movw %ax, (%esi) 665; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 666; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 667; CHECK-I686-NEXT: movw %ax, 6(%esi) 668; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 669; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 670; CHECK-I686-NEXT: movw %ax, 4(%esi) 671; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 672; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 673; CHECK-I686-NEXT: movw %ax, 2(%esi) 674; CHECK-I686-NEXT: addl $88, %esp 675; CHECK-I686-NEXT: popl %esi 676; CHECK-I686-NEXT: retl 677 %v = fptrunc <4 x float> %a to <4 x half> 678 store <4 x half> %v, ptr %p 679 ret void 680} 681 682define void @test_trunc64_vec4(<4 x double> %a, ptr %p) #0 { 683; CHECK-LIBCALL-LABEL: test_trunc64_vec4: 684; CHECK-LIBCALL: # %bb.0: 685; CHECK-LIBCALL-NEXT: pushq %rbx 686; CHECK-LIBCALL-NEXT: subq $64, %rsp 687; CHECK-LIBCALL-NEXT: movq %rdi, %rbx 688; CHECK-LIBCALL-NEXT: movaps %xmm1, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 689; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 690; CHECK-LIBCALL-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 691; CHECK-LIBCALL-NEXT: callq __truncdfhf2@PLT 692; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 693; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 694; CHECK-LIBCALL-NEXT: movhlps {{.*#+}} xmm0 = xmm0[1,1] 695; CHECK-LIBCALL-NEXT: callq __truncdfhf2@PLT 696; CHECK-LIBCALL-NEXT: movaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 697; CHECK-LIBCALL-NEXT: movaps (%rsp), %xmm0 # 16-byte Reload 698; CHECK-LIBCALL-NEXT: callq __truncdfhf2@PLT 699; CHECK-LIBCALL-NEXT: movaps %xmm0, (%rsp) # 16-byte Spill 700; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 701; CHECK-LIBCALL-NEXT: callq __truncdfhf2@PLT 702; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 703; CHECK-LIBCALL-NEXT: movw %ax, 4(%rbx) 704; CHECK-LIBCALL-NEXT: movdqa (%rsp), %xmm0 # 16-byte Reload 705; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 706; CHECK-LIBCALL-NEXT: movw %ax, (%rbx) 707; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 708; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 709; CHECK-LIBCALL-NEXT: movw %ax, 6(%rbx) 710; CHECK-LIBCALL-NEXT: movdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 711; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 712; CHECK-LIBCALL-NEXT: movw %ax, 2(%rbx) 713; CHECK-LIBCALL-NEXT: addq $64, %rsp 714; CHECK-LIBCALL-NEXT: popq %rbx 715; CHECK-LIBCALL-NEXT: retq 716; 717; BWON-F16C-LABEL: test_trunc64_vec4: 718; BWON-F16C: # %bb.0: 719; BWON-F16C-NEXT: pushq %rbx 720; BWON-F16C-NEXT: subq $64, %rsp 721; BWON-F16C-NEXT: movq %rdi, %rbx 722; BWON-F16C-NEXT: vmovups %ymm0, {{[-0-9]+}}(%r{{[sb]}}p) # 32-byte Spill 723; BWON-F16C-NEXT: vextractf128 $1, %ymm0, %xmm0 724; BWON-F16C-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 725; BWON-F16C-NEXT: vzeroupper 726; BWON-F16C-NEXT: callq __truncdfhf2@PLT 727; BWON-F16C-NEXT: vmovaps %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 728; BWON-F16C-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload 729; BWON-F16C-NEXT: # xmm0 = mem[1,0] 730; BWON-F16C-NEXT: callq __truncdfhf2@PLT 731; BWON-F16C-NEXT: vmovdqa {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 16-byte Reload 732; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 733; BWON-F16C-NEXT: vmovdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 734; BWON-F16C-NEXT: vmovups {{[-0-9]+}}(%r{{[sb]}}p), %ymm0 # 32-byte Reload 735; BWON-F16C-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 736; BWON-F16C-NEXT: vzeroupper 737; BWON-F16C-NEXT: callq __truncdfhf2@PLT 738; BWON-F16C-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 739; BWON-F16C-NEXT: vpermilpd $1, {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Folded Reload 740; BWON-F16C-NEXT: # xmm0 = mem[1,0] 741; BWON-F16C-NEXT: callq __truncdfhf2@PLT 742; BWON-F16C-NEXT: vmovdqa (%rsp), %xmm1 # 16-byte Reload 743; BWON-F16C-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 744; BWON-F16C-NEXT: vpunpckldq {{[-0-9]+}}(%r{{[sb]}}p), %xmm0, %xmm0 # 16-byte Folded Reload 745; BWON-F16C-NEXT: # xmm0 = xmm0[0],mem[0],xmm0[1],mem[1] 746; BWON-F16C-NEXT: vmovq %xmm0, (%rbx) 747; BWON-F16C-NEXT: addq $64, %rsp 748; BWON-F16C-NEXT: popq %rbx 749; BWON-F16C-NEXT: retq 750; 751; CHECK-I686-LABEL: test_trunc64_vec4: 752; CHECK-I686: # %bb.0: 753; CHECK-I686-NEXT: pushl %esi 754; CHECK-I686-NEXT: subl $88, %esp 755; CHECK-I686-NEXT: movaps %xmm1, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 756; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 757; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %esi 758; CHECK-I686-NEXT: movlps %xmm0, (%esp) 759; CHECK-I686-NEXT: calll __truncdfhf2 760; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 761; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 762; CHECK-I686-NEXT: movhps %xmm0, (%esp) 763; CHECK-I686-NEXT: calll __truncdfhf2 764; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 765; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 766; CHECK-I686-NEXT: movlps %xmm0, (%esp) 767; CHECK-I686-NEXT: calll __truncdfhf2 768; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 769; CHECK-I686-NEXT: movaps {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 770; CHECK-I686-NEXT: movhps %xmm0, (%esp) 771; CHECK-I686-NEXT: calll __truncdfhf2 772; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 773; CHECK-I686-NEXT: movw %ax, 6(%esi) 774; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 775; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 776; CHECK-I686-NEXT: movw %ax, 4(%esi) 777; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 778; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 779; CHECK-I686-NEXT: movw %ax, 2(%esi) 780; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 781; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 782; CHECK-I686-NEXT: movw %ax, (%esi) 783; CHECK-I686-NEXT: addl $88, %esp 784; CHECK-I686-NEXT: popl %esi 785; CHECK-I686-NEXT: retl 786 %v = fptrunc <4 x double> %a to <4 x half> 787 store <4 x half> %v, ptr %p 788 ret void 789} 790 791declare float @test_floatret(); 792 793; On i686, if SSE2 is available, the return value from test_floatret is loaded 794; to f80 and then rounded to f32. The DAG combiner should not combine this 795; fp_round and the subsequent fptrunc from float to half. 796define half @test_f80trunc_nodagcombine() #0 { 797; CHECK-LIBCALL-LABEL: test_f80trunc_nodagcombine: 798; CHECK-LIBCALL: # %bb.0: 799; CHECK-LIBCALL-NEXT: pushq %rax 800; CHECK-LIBCALL-NEXT: callq test_floatret@PLT 801; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 802; CHECK-LIBCALL-NEXT: popq %rax 803; CHECK-LIBCALL-NEXT: retq 804; 805; BWON-F16C-LABEL: test_f80trunc_nodagcombine: 806; BWON-F16C: # %bb.0: 807; BWON-F16C-NEXT: pushq %rax 808; BWON-F16C-NEXT: callq test_floatret@PLT 809; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 810; BWON-F16C-NEXT: vmovd %xmm0, %eax 811; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 812; BWON-F16C-NEXT: popq %rax 813; BWON-F16C-NEXT: retq 814; 815; CHECK-I686-LABEL: test_f80trunc_nodagcombine: 816; CHECK-I686: # %bb.0: 817; CHECK-I686-NEXT: subl $12, %esp 818; CHECK-I686-NEXT: calll test_floatret@PLT 819; CHECK-I686-NEXT: fstps (%esp) 820; CHECK-I686-NEXT: calll __truncsfhf2 821; CHECK-I686-NEXT: addl $12, %esp 822; CHECK-I686-NEXT: retl 823 %1 = call float @test_floatret() 824 %2 = fptrunc float %1 to half 825 ret half %2 826} 827 828 829 830 831define float @test_sitofp_fadd_i32(i32 %a, ptr %b) #0 { 832; CHECK-LIBCALL-LABEL: test_sitofp_fadd_i32: 833; CHECK-LIBCALL: # %bb.0: 834; CHECK-LIBCALL-NEXT: subq $40, %rsp 835; CHECK-LIBCALL-NEXT: pinsrw $0, (%rsi), %xmm0 836; CHECK-LIBCALL-NEXT: movdqa %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 16-byte Spill 837; CHECK-LIBCALL-NEXT: xorps %xmm0, %xmm0 838; CHECK-LIBCALL-NEXT: cvtsi2ss %edi, %xmm0 839; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 840; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 841; CHECK-LIBCALL-NEXT: movaps {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 16-byte Reload 842; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 843; CHECK-LIBCALL-NEXT: movss %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 4-byte Spill 844; CHECK-LIBCALL-NEXT: movss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Reload 845; CHECK-LIBCALL-NEXT: # xmm0 = mem[0],zero,zero,zero 846; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 847; CHECK-LIBCALL-NEXT: addss {{[-0-9]+}}(%r{{[sb]}}p), %xmm0 # 4-byte Folded Reload 848; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 849; CHECK-LIBCALL-NEXT: addq $40, %rsp 850; CHECK-LIBCALL-NEXT: jmp __extendhfsf2@PLT # TAILCALL 851; 852; BWON-F16C-LABEL: test_sitofp_fadd_i32: 853; BWON-F16C: # %bb.0: 854; BWON-F16C-NEXT: movzwl (%rsi), %eax 855; BWON-F16C-NEXT: vcvtsi2ss %edi, %xmm0, %xmm0 856; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 857; BWON-F16C-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 858; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 859; BWON-F16C-NEXT: vmovd %eax, %xmm1 860; BWON-F16C-NEXT: vcvtph2ps %xmm1, %xmm1 861; BWON-F16C-NEXT: vaddss %xmm0, %xmm1, %xmm0 862; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 863; BWON-F16C-NEXT: vmovd %xmm0, %eax 864; BWON-F16C-NEXT: movzwl %ax, %eax 865; BWON-F16C-NEXT: vmovd %eax, %xmm0 866; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 867; BWON-F16C-NEXT: retq 868; 869; CHECK-I686-LABEL: test_sitofp_fadd_i32: 870; CHECK-I686: # %bb.0: 871; CHECK-I686-NEXT: subl $60, %esp 872; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 873; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0 874; CHECK-I686-NEXT: movdqa %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 875; CHECK-I686-NEXT: xorps %xmm0, %xmm0 876; CHECK-I686-NEXT: cvtsi2ssl {{[0-9]+}}(%esp), %xmm0 877; CHECK-I686-NEXT: movss %xmm0, (%esp) 878; CHECK-I686-NEXT: calll __truncsfhf2 879; CHECK-I686-NEXT: movaps %xmm0, {{[-0-9]+}}(%e{{[sb]}}p) # 16-byte Spill 880; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 881; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 882; CHECK-I686-NEXT: movw %ax, (%esp) 883; CHECK-I686-NEXT: calll __extendhfsf2 884; CHECK-I686-NEXT: movdqa {{[-0-9]+}}(%e{{[sb]}}p), %xmm0 # 16-byte Reload 885; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 886; CHECK-I686-NEXT: movw %ax, (%esp) 887; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 888; CHECK-I686-NEXT: calll __extendhfsf2 889; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 890; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 891; CHECK-I686-NEXT: addss {{[0-9]+}}(%esp), %xmm0 892; CHECK-I686-NEXT: movss %xmm0, (%esp) 893; CHECK-I686-NEXT: calll __truncsfhf2 894; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 895; CHECK-I686-NEXT: movw %ax, (%esp) 896; CHECK-I686-NEXT: calll __extendhfsf2 897; CHECK-I686-NEXT: addl $60, %esp 898; CHECK-I686-NEXT: retl 899 %tmp0 = load half, ptr %b 900 %tmp1 = sitofp i32 %a to half 901 %tmp2 = fadd half %tmp0, %tmp1 902 %tmp3 = fpext half %tmp2 to float 903 ret float %tmp3 904} 905 906define half @PR40273(half) #0 { 907; CHECK-LIBCALL-LABEL: PR40273: 908; CHECK-LIBCALL: # %bb.0: 909; CHECK-LIBCALL-NEXT: pushq %rax 910; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 911; CHECK-LIBCALL-NEXT: xorl %eax, %eax 912; CHECK-LIBCALL-NEXT: xorps %xmm1, %xmm1 913; CHECK-LIBCALL-NEXT: ucomiss %xmm1, %xmm0 914; CHECK-LIBCALL-NEXT: movl $15360, %ecx # imm = 0x3C00 915; CHECK-LIBCALL-NEXT: cmovnel %ecx, %eax 916; CHECK-LIBCALL-NEXT: cmovpl %ecx, %eax 917; CHECK-LIBCALL-NEXT: pinsrw $0, %eax, %xmm0 918; CHECK-LIBCALL-NEXT: popq %rax 919; CHECK-LIBCALL-NEXT: retq 920; 921; BWON-F16C-LABEL: PR40273: 922; BWON-F16C: # %bb.0: 923; BWON-F16C-NEXT: vpextrw $0, %xmm0, %eax 924; BWON-F16C-NEXT: movzwl %ax, %eax 925; BWON-F16C-NEXT: vmovd %eax, %xmm0 926; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 927; BWON-F16C-NEXT: xorl %eax, %eax 928; BWON-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1 929; BWON-F16C-NEXT: vucomiss %xmm1, %xmm0 930; BWON-F16C-NEXT: movl $15360, %ecx # imm = 0x3C00 931; BWON-F16C-NEXT: cmovnel %ecx, %eax 932; BWON-F16C-NEXT: cmovpl %ecx, %eax 933; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 934; BWON-F16C-NEXT: retq 935; 936; CHECK-I686-LABEL: PR40273: 937; CHECK-I686: # %bb.0: 938; CHECK-I686-NEXT: subl $12, %esp 939; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 940; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 941; CHECK-I686-NEXT: movw %ax, (%esp) 942; CHECK-I686-NEXT: calll __extendhfsf2 943; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 944; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 945; CHECK-I686-NEXT: xorl %eax, %eax 946; CHECK-I686-NEXT: xorps %xmm1, %xmm1 947; CHECK-I686-NEXT: ucomiss %xmm1, %xmm0 948; CHECK-I686-NEXT: movl $15360, %ecx # imm = 0x3C00 949; CHECK-I686-NEXT: cmovnel %ecx, %eax 950; CHECK-I686-NEXT: cmovpl %ecx, %eax 951; CHECK-I686-NEXT: pinsrw $0, %eax, %xmm0 952; CHECK-I686-NEXT: addl $12, %esp 953; CHECK-I686-NEXT: retl 954 %2 = fcmp une half %0, 0xH0000 955 %3 = uitofp i1 %2 to half 956 ret half %3 957} 958 959define dso_local void @brcond(half %0) { 960; CHECK-LIBCALL-LABEL: brcond: 961; CHECK-LIBCALL: # %bb.0: # %entry 962; CHECK-LIBCALL-NEXT: pushq %rax 963; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 16 964; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 965; CHECK-LIBCALL-NEXT: xorps %xmm1, %xmm1 966; CHECK-LIBCALL-NEXT: ucomiss %xmm1, %xmm0 967; CHECK-LIBCALL-NEXT: setp %al 968; CHECK-LIBCALL-NEXT: setne %cl 969; CHECK-LIBCALL-NEXT: orb %al, %cl 970; CHECK-LIBCALL-NEXT: jne .LBB18_2 971; CHECK-LIBCALL-NEXT: # %bb.1: # %if.then 972; CHECK-LIBCALL-NEXT: popq %rax 973; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 8 974; CHECK-LIBCALL-NEXT: retq 975; CHECK-LIBCALL-NEXT: .LBB18_2: # %if.end 976; 977; BWON-F16C-LABEL: brcond: 978; BWON-F16C: # %bb.0: # %entry 979; BWON-F16C-NEXT: vpextrw $0, %xmm0, %eax 980; BWON-F16C-NEXT: movzwl %ax, %eax 981; BWON-F16C-NEXT: vmovd %eax, %xmm0 982; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 983; BWON-F16C-NEXT: vxorps %xmm1, %xmm1, %xmm1 984; BWON-F16C-NEXT: vucomiss %xmm1, %xmm0 985; BWON-F16C-NEXT: setp %al 986; BWON-F16C-NEXT: setne %cl 987; BWON-F16C-NEXT: orb %al, %cl 988; BWON-F16C-NEXT: jne .LBB18_2 989; BWON-F16C-NEXT: # %bb.1: # %if.then 990; BWON-F16C-NEXT: retq 991; BWON-F16C-NEXT: .LBB18_2: # %if.end 992; 993; CHECK-I686-LABEL: brcond: 994; CHECK-I686: # %bb.0: # %entry 995; CHECK-I686-NEXT: subl $12, %esp 996; CHECK-I686-NEXT: .cfi_def_cfa_offset 16 997; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 998; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 999; CHECK-I686-NEXT: movw %ax, (%esp) 1000; CHECK-I686-NEXT: calll __extendhfsf2 1001; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1002; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1003; CHECK-I686-NEXT: xorps %xmm1, %xmm1 1004; CHECK-I686-NEXT: ucomiss %xmm1, %xmm0 1005; CHECK-I686-NEXT: setp %al 1006; CHECK-I686-NEXT: setne %cl 1007; CHECK-I686-NEXT: orb %al, %cl 1008; CHECK-I686-NEXT: jne .LBB18_2 1009; CHECK-I686-NEXT: # %bb.1: # %if.then 1010; CHECK-I686-NEXT: addl $12, %esp 1011; CHECK-I686-NEXT: .cfi_def_cfa_offset 4 1012; CHECK-I686-NEXT: retl 1013; CHECK-I686-NEXT: .LBB18_2: # %if.end 1014entry: 1015 %cmp = fcmp oeq half 0xH0000, %0 1016 br i1 %cmp, label %if.then, label %if.end 1017 1018if.then: ; preds = %entry 1019 ret void 1020 1021if.end: ; preds = %entry 1022 unreachable 1023} 1024 1025define half @test_sqrt(half %0) { 1026; CHECK-LIBCALL-LABEL: test_sqrt: 1027; CHECK-LIBCALL: # %bb.0: # %entry 1028; CHECK-LIBCALL-NEXT: pushq %rax 1029; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 16 1030; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1031; CHECK-LIBCALL-NEXT: sqrtss %xmm0, %xmm0 1032; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 1033; CHECK-LIBCALL-NEXT: popq %rax 1034; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 8 1035; CHECK-LIBCALL-NEXT: retq 1036; 1037; BWON-F16C-LABEL: test_sqrt: 1038; BWON-F16C: # %bb.0: # %entry 1039; BWON-F16C-NEXT: vpextrw $0, %xmm0, %eax 1040; BWON-F16C-NEXT: movzwl %ax, %eax 1041; BWON-F16C-NEXT: vmovd %eax, %xmm0 1042; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 1043; BWON-F16C-NEXT: vsqrtss %xmm0, %xmm0, %xmm0 1044; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 1045; BWON-F16C-NEXT: vmovd %xmm0, %eax 1046; BWON-F16C-NEXT: vpinsrw $0, %eax, %xmm0, %xmm0 1047; BWON-F16C-NEXT: retq 1048; 1049; CHECK-I686-LABEL: test_sqrt: 1050; CHECK-I686: # %bb.0: # %entry 1051; CHECK-I686-NEXT: subl $12, %esp 1052; CHECK-I686-NEXT: .cfi_def_cfa_offset 16 1053; CHECK-I686-NEXT: pinsrw $0, {{[0-9]+}}(%esp), %xmm0 1054; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 1055; CHECK-I686-NEXT: movw %ax, (%esp) 1056; CHECK-I686-NEXT: calll __extendhfsf2 1057; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1058; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1059; CHECK-I686-NEXT: sqrtss %xmm0, %xmm0 1060; CHECK-I686-NEXT: movss %xmm0, (%esp) 1061; CHECK-I686-NEXT: calll __truncsfhf2 1062; CHECK-I686-NEXT: addl $12, %esp 1063; CHECK-I686-NEXT: .cfi_def_cfa_offset 4 1064; CHECK-I686-NEXT: retl 1065entry: 1066 %1 = call half @llvm.sqrt.f16(half %0) 1067 ret half %1 1068} 1069 1070declare half @llvm.sqrt.f16(half) 1071 1072define void @main.158() local_unnamed_addr #0 { 1073; CHECK-LIBCALL-LABEL: main.158: 1074; CHECK-LIBCALL: # %bb.0: # %entry 1075; CHECK-LIBCALL-NEXT: pushq %rax 1076; CHECK-LIBCALL-NEXT: xorps %xmm0, %xmm0 1077; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 1078; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1079; CHECK-LIBCALL-NEXT: movss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1080; CHECK-LIBCALL-NEXT: ucomiss %xmm0, %xmm1 1081; CHECK-LIBCALL-NEXT: xorps %xmm0, %xmm0 1082; CHECK-LIBCALL-NEXT: jae .LBB20_2 1083; CHECK-LIBCALL-NEXT: # %bb.1: # %entry 1084; CHECK-LIBCALL-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1085; CHECK-LIBCALL-NEXT: .LBB20_2: # %entry 1086; CHECK-LIBCALL-NEXT: callq __truncsfhf2@PLT 1087; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 1088; CHECK-LIBCALL-NEXT: movw %ax, (%rax) 1089; CHECK-LIBCALL-NEXT: popq %rax 1090; CHECK-LIBCALL-NEXT: retq 1091; 1092; BWON-F16C-LABEL: main.158: 1093; BWON-F16C: # %bb.0: # %entry 1094; BWON-F16C-NEXT: vxorps %xmm0, %xmm0, %xmm0 1095; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 1096; BWON-F16C-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1097; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 1098; BWON-F16C-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 1099; BWON-F16C-NEXT: vucomiss %xmm0, %xmm1 1100; BWON-F16C-NEXT: vxorps %xmm0, %xmm0, %xmm0 1101; BWON-F16C-NEXT: jae .LBB20_2 1102; BWON-F16C-NEXT: # %bb.1: # %entry 1103; BWON-F16C-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1104; BWON-F16C-NEXT: .LBB20_2: # %entry 1105; BWON-F16C-NEXT: vcvtps2ph $4, %xmm0, %xmm0 1106; BWON-F16C-NEXT: vmovd %xmm0, %eax 1107; BWON-F16C-NEXT: movw %ax, (%rax) 1108; BWON-F16C-NEXT: retq 1109; 1110; CHECK-I686-LABEL: main.158: 1111; CHECK-I686: # %bb.0: # %entry 1112; CHECK-I686-NEXT: subl $12, %esp 1113; CHECK-I686-NEXT: pxor %xmm0, %xmm0 1114; CHECK-I686-NEXT: movd %xmm0, (%esp) 1115; CHECK-I686-NEXT: calll __truncsfhf2 1116; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 1117; CHECK-I686-NEXT: movw %ax, (%esp) 1118; CHECK-I686-NEXT: calll __extendhfsf2 1119; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1120; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1121; CHECK-I686-NEXT: ucomiss {{[0-9]+}}(%esp), %xmm0 1122; CHECK-I686-NEXT: xorps %xmm0, %xmm0 1123; CHECK-I686-NEXT: jae .LBB20_2 1124; CHECK-I686-NEXT: # %bb.1: # %entry 1125; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1126; CHECK-I686-NEXT: .LBB20_2: # %entry 1127; CHECK-I686-NEXT: movss %xmm0, (%esp) 1128; CHECK-I686-NEXT: calll __truncsfhf2 1129; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 1130; CHECK-I686-NEXT: movw %ax, (%eax) 1131; CHECK-I686-NEXT: addl $12, %esp 1132; CHECK-I686-NEXT: retl 1133entry: 1134 %0 = tail call half @llvm.fabs.f16(half undef) 1135 %1 = fpext half %0 to float 1136 %compare.2 = fcmp ole half %0, 0xH4800 1137 %multiply.95 = fmul float %1, 5.000000e-01 1138 %add.82 = fadd float %multiply.95, -2.000000e+00 1139 %multiply.68 = fmul float %add.82, 0.000000e+00 1140 %subtract.65 = fsub float %multiply.68, 0.000000e+00 1141 %multiply.57 = fmul float undef, 0.000000e+00 1142 %2 = select i1 %compare.2, float 0.000000e+00, float %multiply.57 1143 %3 = fptrunc float %2 to half 1144 store half %3, ptr undef, align 2 1145 ret void 1146} 1147 1148define void @main.45() local_unnamed_addr { 1149; CHECK-LIBCALL-LABEL: main.45: 1150; CHECK-LIBCALL: # %bb.0: # %entry 1151; CHECK-LIBCALL-NEXT: pushq %rbp 1152; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 16 1153; CHECK-LIBCALL-NEXT: pushq %r15 1154; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 24 1155; CHECK-LIBCALL-NEXT: pushq %r14 1156; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 32 1157; CHECK-LIBCALL-NEXT: pushq %rbx 1158; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 40 1159; CHECK-LIBCALL-NEXT: pushq %rax 1160; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 48 1161; CHECK-LIBCALL-NEXT: .cfi_offset %rbx, -40 1162; CHECK-LIBCALL-NEXT: .cfi_offset %r14, -32 1163; CHECK-LIBCALL-NEXT: .cfi_offset %r15, -24 1164; CHECK-LIBCALL-NEXT: .cfi_offset %rbp, -16 1165; CHECK-LIBCALL-NEXT: pinsrw $0, (%rax), %xmm0 1166; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %eax 1167; CHECK-LIBCALL-NEXT: movd %eax, %xmm1 1168; CHECK-LIBCALL-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[0,0,0,0,4,5,6,7] 1169; CHECK-LIBCALL-NEXT: movq %xmm1, %rbx 1170; CHECK-LIBCALL-NEXT: movq %rbx, %r14 1171; CHECK-LIBCALL-NEXT: shrq $48, %r14 1172; CHECK-LIBCALL-NEXT: movq %rbx, %r15 1173; CHECK-LIBCALL-NEXT: shrq $32, %r15 1174; CHECK-LIBCALL-NEXT: movl %ebx, %ebp 1175; CHECK-LIBCALL-NEXT: shrl $16, %ebp 1176; CHECK-LIBCALL-NEXT: callq __extendhfsf2@PLT 1177; CHECK-LIBCALL-NEXT: ucomiss %xmm0, %xmm0 1178; CHECK-LIBCALL-NEXT: movl $32256, %eax # imm = 0x7E00 1179; CHECK-LIBCALL-NEXT: cmovpl %eax, %ebp 1180; CHECK-LIBCALL-NEXT: cmovpl %eax, %r15d 1181; CHECK-LIBCALL-NEXT: cmovpl %eax, %r14d 1182; CHECK-LIBCALL-NEXT: cmovpl %eax, %ebx 1183; CHECK-LIBCALL-NEXT: movw %bx, (%rax) 1184; CHECK-LIBCALL-NEXT: movw %r14w, (%rax) 1185; CHECK-LIBCALL-NEXT: movw %r15w, (%rax) 1186; CHECK-LIBCALL-NEXT: movw %bp, (%rax) 1187; CHECK-LIBCALL-NEXT: addq $8, %rsp 1188; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 40 1189; CHECK-LIBCALL-NEXT: popq %rbx 1190; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 32 1191; CHECK-LIBCALL-NEXT: popq %r14 1192; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 24 1193; CHECK-LIBCALL-NEXT: popq %r15 1194; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 16 1195; CHECK-LIBCALL-NEXT: popq %rbp 1196; CHECK-LIBCALL-NEXT: .cfi_def_cfa_offset 8 1197; CHECK-LIBCALL-NEXT: retq 1198; 1199; BWON-F16C-LABEL: main.45: 1200; BWON-F16C: # %bb.0: # %entry 1201; BWON-F16C-NEXT: movzwl (%rax), %eax 1202; BWON-F16C-NEXT: vmovd %eax, %xmm0 1203; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,0,0,4,5,6,7] 1204; BWON-F16C-NEXT: vcvtph2ps %xmm0, %xmm0 1205; BWON-F16C-NEXT: xorl %eax, %eax 1206; BWON-F16C-NEXT: vucomiss %xmm0, %xmm0 1207; BWON-F16C-NEXT: movl $65535, %ecx # imm = 0xFFFF 1208; BWON-F16C-NEXT: cmovnpl %eax, %ecx 1209; BWON-F16C-NEXT: vmovd %ecx, %xmm0 1210; BWON-F16C-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1211; BWON-F16C-NEXT: vpblendvb %xmm0, {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0 1212; BWON-F16C-NEXT: vmovq %xmm0, (%rax) 1213; BWON-F16C-NEXT: retq 1214; 1215; CHECK-I686-LABEL: main.45: 1216; CHECK-I686: # %bb.0: # %entry 1217; CHECK-I686-NEXT: pushl %edi 1218; CHECK-I686-NEXT: .cfi_def_cfa_offset 8 1219; CHECK-I686-NEXT: pushl %esi 1220; CHECK-I686-NEXT: .cfi_def_cfa_offset 12 1221; CHECK-I686-NEXT: subl $20, %esp 1222; CHECK-I686-NEXT: .cfi_def_cfa_offset 32 1223; CHECK-I686-NEXT: .cfi_offset %esi, -12 1224; CHECK-I686-NEXT: .cfi_offset %edi, -8 1225; CHECK-I686-NEXT: pinsrw $0, (%eax), %xmm0 1226; CHECK-I686-NEXT: pextrw $0, %xmm0, %eax 1227; CHECK-I686-NEXT: movd %eax, %xmm0 1228; CHECK-I686-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 1229; CHECK-I686-NEXT: movd %xmm0, %esi 1230; CHECK-I686-NEXT: movl %esi, %edi 1231; CHECK-I686-NEXT: shrl $16, %edi 1232; CHECK-I686-NEXT: movw %ax, (%esp) 1233; CHECK-I686-NEXT: calll __extendhfsf2 1234; CHECK-I686-NEXT: fstps {{[0-9]+}}(%esp) 1235; CHECK-I686-NEXT: movss {{.*#+}} xmm0 = mem[0],zero,zero,zero 1236; CHECK-I686-NEXT: ucomiss %xmm0, %xmm0 1237; CHECK-I686-NEXT: movl $32256, %eax # imm = 0x7E00 1238; CHECK-I686-NEXT: cmovpl %eax, %esi 1239; CHECK-I686-NEXT: cmovpl %eax, %edi 1240; CHECK-I686-NEXT: movw %di, (%eax) 1241; CHECK-I686-NEXT: movw %si, (%eax) 1242; CHECK-I686-NEXT: addl $20, %esp 1243; CHECK-I686-NEXT: .cfi_def_cfa_offset 12 1244; CHECK-I686-NEXT: popl %esi 1245; CHECK-I686-NEXT: .cfi_def_cfa_offset 8 1246; CHECK-I686-NEXT: popl %edi 1247; CHECK-I686-NEXT: .cfi_def_cfa_offset 4 1248; CHECK-I686-NEXT: retl 1249entry: 1250 %0 = load half, ptr undef, align 8 1251 %1 = bitcast half %0 to i16 1252 %broadcast.splatinsert = insertelement <4 x half> poison, half %0, i64 0 1253 %broadcast.splat = shufflevector <4 x half> %broadcast.splatinsert, <4 x half> poison, <4 x i32> zeroinitializer 1254 %broadcast.splatinsert13 = insertelement <4 x i16> poison, i16 %1, i64 0 1255 %broadcast.splat14 = shufflevector <4 x i16> %broadcast.splatinsert13, <4 x i16> poison, <4 x i32> zeroinitializer 1256 %2 = fcmp uno <4 x half> %broadcast.splat, zeroinitializer 1257 %3 = add <4 x i16> zeroinitializer, %broadcast.splat14 1258 %4 = select i1 undef, <4 x i16> undef, <4 x i16> %3 1259 %5 = select <4 x i1> undef, <4 x i16> undef, <4 x i16> %4 1260 %6 = bitcast <4 x i16> %5 to <4 x half> 1261 %7 = select <4 x i1> %2, <4 x half> <half 0xH7E00, half 0xH7E00, half 0xH7E00, half 0xH7E00>, <4 x half> %6 1262 store <4 x half> %7, ptr undef, align 16 1263 ret void 1264} 1265 1266define half @fcopysign(half %x, half %y) { 1267; CHECK-LIBCALL-LABEL: fcopysign: 1268; CHECK-LIBCALL: # %bb.0: 1269; CHECK-LIBCALL-NEXT: pextrw $0, %xmm1, %eax 1270; CHECK-LIBCALL-NEXT: andl $-32768, %eax # imm = 0x8000 1271; CHECK-LIBCALL-NEXT: pextrw $0, %xmm0, %ecx 1272; CHECK-LIBCALL-NEXT: andl $32767, %ecx # imm = 0x7FFF 1273; CHECK-LIBCALL-NEXT: orl %eax, %ecx 1274; CHECK-LIBCALL-NEXT: pinsrw $0, %ecx, %xmm0 1275; CHECK-LIBCALL-NEXT: retq 1276; 1277; BWON-F16C-LABEL: fcopysign: 1278; BWON-F16C: # %bb.0: 1279; BWON-F16C-NEXT: vpextrw $0, %xmm1, %eax 1280; BWON-F16C-NEXT: andl $-32768, %eax # imm = 0x8000 1281; BWON-F16C-NEXT: vpextrw $0, %xmm0, %ecx 1282; BWON-F16C-NEXT: andl $32767, %ecx # imm = 0x7FFF 1283; BWON-F16C-NEXT: orl %eax, %ecx 1284; BWON-F16C-NEXT: vpinsrw $0, %ecx, %xmm0, %xmm0 1285; BWON-F16C-NEXT: retq 1286; 1287; CHECK-I686-LABEL: fcopysign: 1288; CHECK-I686: # %bb.0: 1289; CHECK-I686-NEXT: movl $-32768, %eax # imm = 0x8000 1290; CHECK-I686-NEXT: andl {{[0-9]+}}(%esp), %eax 1291; CHECK-I686-NEXT: movzwl {{[0-9]+}}(%esp), %ecx 1292; CHECK-I686-NEXT: andl $32767, %ecx # imm = 0x7FFF 1293; CHECK-I686-NEXT: orl %eax, %ecx 1294; CHECK-I686-NEXT: pinsrw $0, %ecx, %xmm0 1295; CHECK-I686-NEXT: retl 1296 %a = call half @llvm.copysign.f16(half %x, half %y) 1297 ret half %a 1298} 1299 1300declare half @llvm.fabs.f16(half) 1301declare half @llvm.copysign.f16(half, half) 1302 1303define <8 x half> @select(i1 %c, <8 x half> %x, <8 x half> %y) { 1304; CHECK-LIBCALL-LABEL: select: 1305; CHECK-LIBCALL: # %bb.0: 1306; CHECK-LIBCALL-NEXT: testb $1, %dil 1307; CHECK-LIBCALL-NEXT: jne .LBB23_2 1308; CHECK-LIBCALL-NEXT: # %bb.1: 1309; CHECK-LIBCALL-NEXT: movaps %xmm1, %xmm0 1310; CHECK-LIBCALL-NEXT: .LBB23_2: 1311; CHECK-LIBCALL-NEXT: retq 1312; 1313; BWON-F16C-LABEL: select: 1314; BWON-F16C: # %bb.0: 1315; BWON-F16C-NEXT: testb $1, %dil 1316; BWON-F16C-NEXT: jne .LBB23_2 1317; BWON-F16C-NEXT: # %bb.1: 1318; BWON-F16C-NEXT: vmovaps %xmm1, %xmm0 1319; BWON-F16C-NEXT: .LBB23_2: 1320; BWON-F16C-NEXT: retq 1321; 1322; CHECK-I686-LABEL: select: 1323; CHECK-I686: # %bb.0: 1324; CHECK-I686-NEXT: testb $1, {{[0-9]+}}(%esp) 1325; CHECK-I686-NEXT: jne .LBB23_2 1326; CHECK-I686-NEXT: # %bb.1: 1327; CHECK-I686-NEXT: movaps %xmm1, %xmm0 1328; CHECK-I686-NEXT: .LBB23_2: 1329; CHECK-I686-NEXT: retl 1330 %s = select i1 %c, <8 x half> %x, <8 x half> %y 1331 ret <8 x half> %s 1332} 1333 1334define <8 x half> @shuffle(ptr %p) { 1335; CHECK-LIBCALL-LABEL: shuffle: 1336; CHECK-LIBCALL: # %bb.0: 1337; CHECK-LIBCALL-NEXT: movdqu (%rdi), %xmm0 1338; CHECK-LIBCALL-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1339; CHECK-LIBCALL-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,2,2] 1340; CHECK-LIBCALL-NEXT: retq 1341; 1342; BWON-F16C-LABEL: shuffle: 1343; BWON-F16C: # %bb.0: 1344; BWON-F16C-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,4,4,4,4] 1345; BWON-F16C-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2] 1346; BWON-F16C-NEXT: retq 1347; 1348; CHECK-I686-LABEL: shuffle: 1349; CHECK-I686: # %bb.0: 1350; CHECK-I686-NEXT: movl {{[0-9]+}}(%esp), %eax 1351; CHECK-I686-NEXT: movdqu (%eax), %xmm0 1352; CHECK-I686-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,4,4,4] 1353; CHECK-I686-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,2,2,2] 1354; CHECK-I686-NEXT: retl 1355 %1 = load <8 x half>, ptr %p, align 8 1356 %2 = shufflevector <8 x half> %1, <8 x half> poison, <8 x i32> <i32 4, i32 4, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 1357 ret <8 x half> %2 1358} 1359 1360attributes #0 = { nounwind } 1361