1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+f16c | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+f16c | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,-f16c | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512VL 6 7; 8; Half to Float 9; 10 11define float @cvt_i16_to_f32(i16 %a0) nounwind { 12; AVX1-LABEL: cvt_i16_to_f32: 13; AVX1: # BB#0: 14; AVX1-NEXT: movswl %di, %eax 15; AVX1-NEXT: vmovd %eax, %xmm0 16; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0 17; AVX1-NEXT: retq 18; 19; AVX2-LABEL: cvt_i16_to_f32: 20; AVX2: # BB#0: 21; AVX2-NEXT: movswl %di, %eax 22; AVX2-NEXT: vmovd %eax, %xmm0 23; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 24; AVX2-NEXT: retq 25; 26; AVX512F-LABEL: cvt_i16_to_f32: 27; AVX512F: # BB#0: 28; AVX512F-NEXT: movswl %di, %eax 29; AVX512F-NEXT: vmovd %eax, %xmm0 30; AVX512F-NEXT: vcvtph2ps %ymm0, %zmm0 31; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 32; AVX512F-NEXT: retq 33; 34; AVX512VL-LABEL: cvt_i16_to_f32: 35; AVX512VL: # BB#0: 36; AVX512VL-NEXT: movswl %di, %eax 37; AVX512VL-NEXT: vmovd %eax, %xmm0 38; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 39; AVX512VL-NEXT: retq 40 %1 = bitcast i16 %a0 to half 41 %2 = fpext half %1 to float 42 ret float %2 43} 44 45define <4 x float> @cvt_4i16_to_4f32(<4 x i16> %a0) nounwind { 46; AVX1-LABEL: cvt_4i16_to_4f32: 47; AVX1: # BB#0: 48; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 49; AVX1-NEXT: vmovq %xmm0, %rax 50; AVX1-NEXT: movq %rax, %rcx 51; AVX1-NEXT: movq %rax, %rdx 52; AVX1-NEXT: movswl %ax, %esi 53; AVX1-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> 54; AVX1-NEXT: shrl $16, %eax 55; AVX1-NEXT: shrq $32, %rcx 56; AVX1-NEXT: shrq $48, %rdx 57; AVX1-NEXT: movswl %dx, %edx 58; AVX1-NEXT: vmovd %edx, %xmm0 59; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0 60; AVX1-NEXT: movswl %cx, %ecx 61; AVX1-NEXT: vmovd %ecx, %xmm1 62; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1 63; AVX1-NEXT: cwtl 64; AVX1-NEXT: vmovd %eax, %xmm2 65; AVX1-NEXT: vcvtph2ps %xmm2, %xmm2 66; AVX1-NEXT: vmovd %esi, %xmm3 67; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3 68; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 69; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 70; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 71; AVX1-NEXT: retq 72; 73; AVX2-LABEL: cvt_4i16_to_4f32: 74; AVX2: # BB#0: 75; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 76; AVX2-NEXT: vmovq %xmm0, %rax 77; AVX2-NEXT: movq %rax, %rcx 78; AVX2-NEXT: movq %rax, %rdx 79; AVX2-NEXT: movswl %ax, %esi 80; AVX2-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> 81; AVX2-NEXT: shrl $16, %eax 82; AVX2-NEXT: shrq $32, %rcx 83; AVX2-NEXT: shrq $48, %rdx 84; AVX2-NEXT: movswl %dx, %edx 85; AVX2-NEXT: vmovd %edx, %xmm0 86; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 87; AVX2-NEXT: movswl %cx, %ecx 88; AVX2-NEXT: vmovd %ecx, %xmm1 89; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1 90; AVX2-NEXT: cwtl 91; AVX2-NEXT: vmovd %eax, %xmm2 92; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2 93; AVX2-NEXT: vmovd %esi, %xmm3 94; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3 95; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 96; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 97; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 98; AVX2-NEXT: retq 99; 100; AVX512F-LABEL: cvt_4i16_to_4f32: 101; AVX512F: # BB#0: 102; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 103; AVX512F-NEXT: vmovq %xmm0, %rax 104; AVX512F-NEXT: movq %rax, %rcx 105; AVX512F-NEXT: movq %rax, %rdx 106; AVX512F-NEXT: movswl %ax, %esi 107; AVX512F-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> 108; AVX512F-NEXT: shrl $16, %eax 109; AVX512F-NEXT: shrq $32, %rcx 110; AVX512F-NEXT: shrq $48, %rdx 111; AVX512F-NEXT: movswl %dx, %edx 112; AVX512F-NEXT: vmovd %edx, %xmm0 113; AVX512F-NEXT: vcvtph2ps %ymm0, %zmm0 114; AVX512F-NEXT: movswl %cx, %ecx 115; AVX512F-NEXT: vmovd %ecx, %xmm1 116; AVX512F-NEXT: vcvtph2ps %ymm1, %zmm1 117; AVX512F-NEXT: cwtl 118; AVX512F-NEXT: vmovd %eax, %xmm2 119; AVX512F-NEXT: vcvtph2ps %ymm2, %zmm2 120; AVX512F-NEXT: vmovd %esi, %xmm3 121; AVX512F-NEXT: vcvtph2ps %ymm3, %zmm3 122; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 123; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 124; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 125; AVX512F-NEXT: retq 126; 127; AVX512VL-LABEL: cvt_4i16_to_4f32: 128; AVX512VL: # BB#0: 129; AVX512VL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp) 130; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax 131; AVX512VL-NEXT: movq %rax, %rcx 132; AVX512VL-NEXT: movq %rax, %rdx 133; AVX512VL-NEXT: movswl %ax, %esi 134; AVX512VL-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> 135; AVX512VL-NEXT: shrl $16, %eax 136; AVX512VL-NEXT: shrq $32, %rcx 137; AVX512VL-NEXT: shrq $48, %rdx 138; AVX512VL-NEXT: movswl %dx, %edx 139; AVX512VL-NEXT: vmovd %edx, %xmm0 140; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 141; AVX512VL-NEXT: movswl %cx, %ecx 142; AVX512VL-NEXT: vmovd %ecx, %xmm1 143; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1 144; AVX512VL-NEXT: cwtl 145; AVX512VL-NEXT: vmovd %eax, %xmm2 146; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2 147; AVX512VL-NEXT: vmovd %esi, %xmm3 148; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3 149; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 150; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 151; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 152; AVX512VL-NEXT: retq 153 %1 = bitcast <4 x i16> %a0 to <4 x half> 154 %2 = fpext <4 x half> %1 to <4 x float> 155 ret <4 x float> %2 156} 157 158define <4 x float> @cvt_8i16_to_4f32(<8 x i16> %a0) nounwind { 159; AVX1-LABEL: cvt_8i16_to_4f32: 160; AVX1: # BB#0: 161; AVX1-NEXT: vmovq %xmm0, %rax 162; AVX1-NEXT: movq %rax, %rcx 163; AVX1-NEXT: movq %rax, %rdx 164; AVX1-NEXT: movswl %ax, %esi 165; AVX1-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> 166; AVX1-NEXT: shrl $16, %eax 167; AVX1-NEXT: shrq $32, %rcx 168; AVX1-NEXT: shrq $48, %rdx 169; AVX1-NEXT: movswl %dx, %edx 170; AVX1-NEXT: vmovd %edx, %xmm0 171; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0 172; AVX1-NEXT: movswl %cx, %ecx 173; AVX1-NEXT: vmovd %ecx, %xmm1 174; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1 175; AVX1-NEXT: cwtl 176; AVX1-NEXT: vmovd %eax, %xmm2 177; AVX1-NEXT: vcvtph2ps %xmm2, %xmm2 178; AVX1-NEXT: vmovd %esi, %xmm3 179; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3 180; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 181; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 182; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 183; AVX1-NEXT: retq 184; 185; AVX2-LABEL: cvt_8i16_to_4f32: 186; AVX2: # BB#0: 187; AVX2-NEXT: vmovq %xmm0, %rax 188; AVX2-NEXT: movq %rax, %rcx 189; AVX2-NEXT: movq %rax, %rdx 190; AVX2-NEXT: movswl %ax, %esi 191; AVX2-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> 192; AVX2-NEXT: shrl $16, %eax 193; AVX2-NEXT: shrq $32, %rcx 194; AVX2-NEXT: shrq $48, %rdx 195; AVX2-NEXT: movswl %dx, %edx 196; AVX2-NEXT: vmovd %edx, %xmm0 197; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 198; AVX2-NEXT: movswl %cx, %ecx 199; AVX2-NEXT: vmovd %ecx, %xmm1 200; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1 201; AVX2-NEXT: cwtl 202; AVX2-NEXT: vmovd %eax, %xmm2 203; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2 204; AVX2-NEXT: vmovd %esi, %xmm3 205; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3 206; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 207; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 208; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 209; AVX2-NEXT: retq 210; 211; AVX512F-LABEL: cvt_8i16_to_4f32: 212; AVX512F: # BB#0: 213; AVX512F-NEXT: vmovq %xmm0, %rax 214; AVX512F-NEXT: movq %rax, %rcx 215; AVX512F-NEXT: movq %rax, %rdx 216; AVX512F-NEXT: movswl %ax, %esi 217; AVX512F-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> 218; AVX512F-NEXT: shrl $16, %eax 219; AVX512F-NEXT: shrq $32, %rcx 220; AVX512F-NEXT: shrq $48, %rdx 221; AVX512F-NEXT: movswl %dx, %edx 222; AVX512F-NEXT: vmovd %edx, %xmm0 223; AVX512F-NEXT: vcvtph2ps %ymm0, %zmm0 224; AVX512F-NEXT: movswl %cx, %ecx 225; AVX512F-NEXT: vmovd %ecx, %xmm1 226; AVX512F-NEXT: vcvtph2ps %ymm1, %zmm1 227; AVX512F-NEXT: cwtl 228; AVX512F-NEXT: vmovd %eax, %xmm2 229; AVX512F-NEXT: vcvtph2ps %ymm2, %zmm2 230; AVX512F-NEXT: vmovd %esi, %xmm3 231; AVX512F-NEXT: vcvtph2ps %ymm3, %zmm3 232; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 233; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 234; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 235; AVX512F-NEXT: retq 236; 237; AVX512VL-LABEL: cvt_8i16_to_4f32: 238; AVX512VL: # BB#0: 239; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 240; AVX512VL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp) 241; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax 242; AVX512VL-NEXT: movq %rax, %rcx 243; AVX512VL-NEXT: movq %rax, %rdx 244; AVX512VL-NEXT: movswl %ax, %esi 245; AVX512VL-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> 246; AVX512VL-NEXT: shrl $16, %eax 247; AVX512VL-NEXT: shrq $32, %rcx 248; AVX512VL-NEXT: shrq $48, %rdx 249; AVX512VL-NEXT: movswl %dx, %edx 250; AVX512VL-NEXT: vmovd %edx, %xmm0 251; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 252; AVX512VL-NEXT: movswl %cx, %ecx 253; AVX512VL-NEXT: vmovd %ecx, %xmm1 254; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1 255; AVX512VL-NEXT: cwtl 256; AVX512VL-NEXT: vmovd %eax, %xmm2 257; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2 258; AVX512VL-NEXT: vmovd %esi, %xmm3 259; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3 260; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 261; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 262; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 263; AVX512VL-NEXT: retq 264 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 265 %2 = bitcast <4 x i16> %1 to <4 x half> 266 %3 = fpext <4 x half> %2 to <4 x float> 267 ret <4 x float> %3 268} 269 270define <8 x float> @cvt_8i16_to_8f32(<8 x i16> %a0) nounwind { 271; AVX1-LABEL: cvt_8i16_to_8f32: 272; AVX1: # BB#0: 273; AVX1-NEXT: vpextrq $1, %xmm0, %rdx 274; AVX1-NEXT: movq %rdx, %r8 275; AVX1-NEXT: movq %rdx, %r10 276; AVX1-NEXT: movswl %dx, %r9d 277; AVX1-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<kill> 278; AVX1-NEXT: shrl $16, %edx 279; AVX1-NEXT: shrq $32, %r8 280; AVX1-NEXT: shrq $48, %r10 281; AVX1-NEXT: vmovq %xmm0, %rdi 282; AVX1-NEXT: movq %rdi, %rax 283; AVX1-NEXT: movq %rdi, %rsi 284; AVX1-NEXT: movswl %di, %ecx 285; AVX1-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<kill> 286; AVX1-NEXT: shrl $16, %edi 287; AVX1-NEXT: shrq $32, %rax 288; AVX1-NEXT: shrq $48, %rsi 289; AVX1-NEXT: movswl %si, %esi 290; AVX1-NEXT: vmovd %esi, %xmm0 291; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0 292; AVX1-NEXT: cwtl 293; AVX1-NEXT: vmovd %eax, %xmm1 294; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1 295; AVX1-NEXT: movswl %di, %eax 296; AVX1-NEXT: vmovd %eax, %xmm2 297; AVX1-NEXT: vcvtph2ps %xmm2, %xmm2 298; AVX1-NEXT: vmovd %ecx, %xmm3 299; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3 300; AVX1-NEXT: movswl %r10w, %eax 301; AVX1-NEXT: vmovd %eax, %xmm4 302; AVX1-NEXT: vcvtph2ps %xmm4, %xmm4 303; AVX1-NEXT: movswl %r8w, %eax 304; AVX1-NEXT: vmovd %eax, %xmm5 305; AVX1-NEXT: vcvtph2ps %xmm5, %xmm5 306; AVX1-NEXT: movswl %dx, %eax 307; AVX1-NEXT: vmovd %eax, %xmm6 308; AVX1-NEXT: vcvtph2ps %xmm6, %xmm6 309; AVX1-NEXT: vmovd %r9d, %xmm7 310; AVX1-NEXT: vcvtph2ps %xmm7, %xmm7 311; AVX1-NEXT: vinsertps {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[2,3] 312; AVX1-NEXT: vinsertps {{.*#+}} xmm5 = xmm6[0,1],xmm5[0],xmm6[3] 313; AVX1-NEXT: vinsertps {{.*#+}} xmm4 = xmm5[0,1,2],xmm4[0] 314; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 315; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 316; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 317; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 318; AVX1-NEXT: retq 319; 320; AVX2-LABEL: cvt_8i16_to_8f32: 321; AVX2: # BB#0: 322; AVX2-NEXT: vpextrq $1, %xmm0, %rdx 323; AVX2-NEXT: movq %rdx, %r8 324; AVX2-NEXT: movq %rdx, %r10 325; AVX2-NEXT: movswl %dx, %r9d 326; AVX2-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<kill> 327; AVX2-NEXT: shrl $16, %edx 328; AVX2-NEXT: shrq $32, %r8 329; AVX2-NEXT: shrq $48, %r10 330; AVX2-NEXT: vmovq %xmm0, %rdi 331; AVX2-NEXT: movq %rdi, %rax 332; AVX2-NEXT: movq %rdi, %rsi 333; AVX2-NEXT: movswl %di, %ecx 334; AVX2-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<kill> 335; AVX2-NEXT: shrl $16, %edi 336; AVX2-NEXT: shrq $32, %rax 337; AVX2-NEXT: shrq $48, %rsi 338; AVX2-NEXT: movswl %si, %esi 339; AVX2-NEXT: vmovd %esi, %xmm0 340; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 341; AVX2-NEXT: cwtl 342; AVX2-NEXT: vmovd %eax, %xmm1 343; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1 344; AVX2-NEXT: movswl %di, %eax 345; AVX2-NEXT: vmovd %eax, %xmm2 346; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2 347; AVX2-NEXT: vmovd %ecx, %xmm3 348; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3 349; AVX2-NEXT: movswl %r10w, %eax 350; AVX2-NEXT: vmovd %eax, %xmm4 351; AVX2-NEXT: vcvtph2ps %xmm4, %xmm4 352; AVX2-NEXT: movswl %r8w, %eax 353; AVX2-NEXT: vmovd %eax, %xmm5 354; AVX2-NEXT: vcvtph2ps %xmm5, %xmm5 355; AVX2-NEXT: movswl %dx, %eax 356; AVX2-NEXT: vmovd %eax, %xmm6 357; AVX2-NEXT: vcvtph2ps %xmm6, %xmm6 358; AVX2-NEXT: vmovd %r9d, %xmm7 359; AVX2-NEXT: vcvtph2ps %xmm7, %xmm7 360; AVX2-NEXT: vinsertps {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[2,3] 361; AVX2-NEXT: vinsertps {{.*#+}} xmm5 = xmm6[0,1],xmm5[0],xmm6[3] 362; AVX2-NEXT: vinsertps {{.*#+}} xmm4 = xmm5[0,1,2],xmm4[0] 363; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 364; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 365; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 366; AVX2-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 367; AVX2-NEXT: retq 368; 369; AVX512F-LABEL: cvt_8i16_to_8f32: 370; AVX512F: # BB#0: 371; AVX512F-NEXT: vpextrq $1, %xmm0, %rdx 372; AVX512F-NEXT: movq %rdx, %r8 373; AVX512F-NEXT: movq %rdx, %r9 374; AVX512F-NEXT: movswl %dx, %r10d 375; AVX512F-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<kill> 376; AVX512F-NEXT: shrl $16, %edx 377; AVX512F-NEXT: shrq $32, %r8 378; AVX512F-NEXT: shrq $48, %r9 379; AVX512F-NEXT: vmovq %xmm0, %rdi 380; AVX512F-NEXT: movq %rdi, %rax 381; AVX512F-NEXT: movq %rdi, %rcx 382; AVX512F-NEXT: movswl %di, %esi 383; AVX512F-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<kill> 384; AVX512F-NEXT: shrl $16, %edi 385; AVX512F-NEXT: shrq $32, %rax 386; AVX512F-NEXT: shrq $48, %rcx 387; AVX512F-NEXT: movswl %cx, %ecx 388; AVX512F-NEXT: vmovd %ecx, %xmm0 389; AVX512F-NEXT: vcvtph2ps %ymm0, %zmm0 390; AVX512F-NEXT: cwtl 391; AVX512F-NEXT: vmovd %eax, %xmm1 392; AVX512F-NEXT: vcvtph2ps %ymm1, %zmm1 393; AVX512F-NEXT: movswl %di, %eax 394; AVX512F-NEXT: vmovd %eax, %xmm2 395; AVX512F-NEXT: vcvtph2ps %ymm2, %zmm2 396; AVX512F-NEXT: vmovd %esi, %xmm3 397; AVX512F-NEXT: vcvtph2ps %ymm3, %zmm3 398; AVX512F-NEXT: movswl %r9w, %eax 399; AVX512F-NEXT: vmovd %eax, %xmm4 400; AVX512F-NEXT: vcvtph2ps %ymm4, %zmm4 401; AVX512F-NEXT: movswl %r8w, %eax 402; AVX512F-NEXT: vmovd %eax, %xmm5 403; AVX512F-NEXT: vcvtph2ps %ymm5, %zmm5 404; AVX512F-NEXT: movswl %dx, %eax 405; AVX512F-NEXT: vmovd %eax, %xmm6 406; AVX512F-NEXT: vcvtph2ps %ymm6, %zmm6 407; AVX512F-NEXT: vmovd %r10d, %xmm7 408; AVX512F-NEXT: vcvtph2ps %ymm7, %zmm7 409; AVX512F-NEXT: vinsertps {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[2,3] 410; AVX512F-NEXT: vinsertps {{.*#+}} xmm5 = xmm6[0,1],xmm5[0],xmm6[3] 411; AVX512F-NEXT: vinsertps {{.*#+}} xmm4 = xmm5[0,1,2],xmm4[0] 412; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 413; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 414; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 415; AVX512F-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 416; AVX512F-NEXT: retq 417; 418; AVX512VL-LABEL: cvt_8i16_to_8f32: 419; AVX512VL: # BB#0: 420; AVX512VL-NEXT: vpextrq $1, %xmm0, %rdx 421; AVX512VL-NEXT: movq %rdx, %r8 422; AVX512VL-NEXT: movq %rdx, %r10 423; AVX512VL-NEXT: movswl %dx, %r9d 424; AVX512VL-NEXT: # kill: %EDX<def> %EDX<kill> %RDX<kill> 425; AVX512VL-NEXT: shrl $16, %edx 426; AVX512VL-NEXT: shrq $32, %r8 427; AVX512VL-NEXT: shrq $48, %r10 428; AVX512VL-NEXT: vmovq %xmm0, %rdi 429; AVX512VL-NEXT: movq %rdi, %rax 430; AVX512VL-NEXT: movq %rdi, %rsi 431; AVX512VL-NEXT: movswl %di, %ecx 432; AVX512VL-NEXT: # kill: %EDI<def> %EDI<kill> %RDI<kill> 433; AVX512VL-NEXT: shrl $16, %edi 434; AVX512VL-NEXT: shrq $32, %rax 435; AVX512VL-NEXT: shrq $48, %rsi 436; AVX512VL-NEXT: movswl %si, %esi 437; AVX512VL-NEXT: vmovd %esi, %xmm0 438; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 439; AVX512VL-NEXT: cwtl 440; AVX512VL-NEXT: vmovd %eax, %xmm1 441; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1 442; AVX512VL-NEXT: movswl %di, %eax 443; AVX512VL-NEXT: vmovd %eax, %xmm2 444; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2 445; AVX512VL-NEXT: vmovd %ecx, %xmm3 446; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3 447; AVX512VL-NEXT: movswl %r10w, %eax 448; AVX512VL-NEXT: vmovd %eax, %xmm4 449; AVX512VL-NEXT: vcvtph2ps %xmm4, %xmm4 450; AVX512VL-NEXT: movswl %r8w, %eax 451; AVX512VL-NEXT: vmovd %eax, %xmm5 452; AVX512VL-NEXT: vcvtph2ps %xmm5, %xmm5 453; AVX512VL-NEXT: movswl %dx, %eax 454; AVX512VL-NEXT: vmovd %eax, %xmm6 455; AVX512VL-NEXT: vcvtph2ps %xmm6, %xmm6 456; AVX512VL-NEXT: vmovd %r9d, %xmm7 457; AVX512VL-NEXT: vcvtph2ps %xmm7, %xmm7 458; AVX512VL-NEXT: vinsertps {{.*#+}} xmm6 = xmm7[0],xmm6[0],xmm7[2,3] 459; AVX512VL-NEXT: vinsertps {{.*#+}} xmm5 = xmm6[0,1],xmm5[0],xmm6[3] 460; AVX512VL-NEXT: vinsertps {{.*#+}} xmm4 = xmm5[0,1,2],xmm4[0] 461; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 462; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 463; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 464; AVX512VL-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 465; AVX512VL-NEXT: retq 466 %1 = bitcast <8 x i16> %a0 to <8 x half> 467 %2 = fpext <8 x half> %1 to <8 x float> 468 ret <8 x float> %2 469} 470 471define <16 x float> @cvt_16i16_to_16f32(<16 x i16> %a0) nounwind { 472; AVX1-LABEL: cvt_16i16_to_16f32: 473; AVX1: # BB#0: 474; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 475; AVX1-NEXT: vmovq %xmm4, %rax 476; AVX1-NEXT: movq %rax, %rcx 477; AVX1-NEXT: shrq $48, %rcx 478; AVX1-NEXT: movswl %cx, %ecx 479; AVX1-NEXT: vmovd %ecx, %xmm8 480; AVX1-NEXT: movq %rax, %rcx 481; AVX1-NEXT: shrq $32, %rcx 482; AVX1-NEXT: movswl %cx, %ecx 483; AVX1-NEXT: vmovd %ecx, %xmm9 484; AVX1-NEXT: movswl %ax, %ecx 485; AVX1-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> 486; AVX1-NEXT: shrl $16, %eax 487; AVX1-NEXT: cwtl 488; AVX1-NEXT: vmovd %eax, %xmm10 489; AVX1-NEXT: vpextrq $1, %xmm4, %rax 490; AVX1-NEXT: vmovd %ecx, %xmm11 491; AVX1-NEXT: movq %rax, %rcx 492; AVX1-NEXT: shrq $48, %rcx 493; AVX1-NEXT: movswl %cx, %ecx 494; AVX1-NEXT: vmovd %ecx, %xmm12 495; AVX1-NEXT: movq %rax, %rcx 496; AVX1-NEXT: shrq $32, %rcx 497; AVX1-NEXT: movswl %cx, %ecx 498; AVX1-NEXT: vmovd %ecx, %xmm13 499; AVX1-NEXT: movswl %ax, %ecx 500; AVX1-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> 501; AVX1-NEXT: shrl $16, %eax 502; AVX1-NEXT: cwtl 503; AVX1-NEXT: vmovd %eax, %xmm14 504; AVX1-NEXT: vmovq %xmm0, %rax 505; AVX1-NEXT: vmovd %ecx, %xmm15 506; AVX1-NEXT: movq %rax, %rcx 507; AVX1-NEXT: shrq $48, %rcx 508; AVX1-NEXT: movswl %cx, %ecx 509; AVX1-NEXT: vmovd %ecx, %xmm2 510; AVX1-NEXT: movq %rax, %rcx 511; AVX1-NEXT: shrq $32, %rcx 512; AVX1-NEXT: movswl %cx, %ecx 513; AVX1-NEXT: vmovd %ecx, %xmm3 514; AVX1-NEXT: movswl %ax, %ecx 515; AVX1-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> 516; AVX1-NEXT: shrl $16, %eax 517; AVX1-NEXT: cwtl 518; AVX1-NEXT: vmovd %eax, %xmm4 519; AVX1-NEXT: vpextrq $1, %xmm0, %rax 520; AVX1-NEXT: vmovd %ecx, %xmm0 521; AVX1-NEXT: movq %rax, %rcx 522; AVX1-NEXT: shrq $48, %rcx 523; AVX1-NEXT: movswl %cx, %ecx 524; AVX1-NEXT: vmovd %ecx, %xmm5 525; AVX1-NEXT: movq %rax, %rcx 526; AVX1-NEXT: shrq $32, %rcx 527; AVX1-NEXT: movswl %cx, %ecx 528; AVX1-NEXT: vmovd %ecx, %xmm6 529; AVX1-NEXT: movl %eax, %ecx 530; AVX1-NEXT: shrl $16, %ecx 531; AVX1-NEXT: movswl %cx, %ecx 532; AVX1-NEXT: vmovd %ecx, %xmm7 533; AVX1-NEXT: cwtl 534; AVX1-NEXT: vmovd %eax, %xmm1 535; AVX1-NEXT: vcvtph2ps %xmm8, %xmm8 536; AVX1-NEXT: vcvtph2ps %xmm9, %xmm9 537; AVX1-NEXT: vcvtph2ps %xmm10, %xmm10 538; AVX1-NEXT: vcvtph2ps %xmm11, %xmm11 539; AVX1-NEXT: vcvtph2ps %xmm12, %xmm12 540; AVX1-NEXT: vcvtph2ps %xmm13, %xmm13 541; AVX1-NEXT: vcvtph2ps %xmm14, %xmm14 542; AVX1-NEXT: vcvtph2ps %xmm15, %xmm15 543; AVX1-NEXT: vcvtph2ps %xmm2, %xmm2 544; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3 545; AVX1-NEXT: vcvtph2ps %xmm4, %xmm4 546; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0 547; AVX1-NEXT: vcvtph2ps %xmm5, %xmm5 548; AVX1-NEXT: vcvtph2ps %xmm6, %xmm6 549; AVX1-NEXT: vcvtph2ps %xmm7, %xmm7 550; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1 551; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[2,3] 552; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm6[0],xmm1[3] 553; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm5[0] 554; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[2,3] 555; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm3[0],xmm0[3] 556; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm2[0] 557; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 558; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm15[0],xmm14[0],xmm15[2,3] 559; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm13[0],xmm1[3] 560; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm12[0] 561; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm11[0],xmm10[0],xmm11[2,3] 562; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm9[0],xmm2[3] 563; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm8[0] 564; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 565; AVX1-NEXT: retq 566; 567; AVX2-LABEL: cvt_16i16_to_16f32: 568; AVX2: # BB#0: 569; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm4 570; AVX2-NEXT: vmovq %xmm4, %rax 571; AVX2-NEXT: movq %rax, %rcx 572; AVX2-NEXT: shrq $48, %rcx 573; AVX2-NEXT: movswl %cx, %ecx 574; AVX2-NEXT: vmovd %ecx, %xmm8 575; AVX2-NEXT: movq %rax, %rcx 576; AVX2-NEXT: shrq $32, %rcx 577; AVX2-NEXT: movswl %cx, %ecx 578; AVX2-NEXT: vmovd %ecx, %xmm9 579; AVX2-NEXT: movswl %ax, %ecx 580; AVX2-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> 581; AVX2-NEXT: shrl $16, %eax 582; AVX2-NEXT: cwtl 583; AVX2-NEXT: vmovd %eax, %xmm10 584; AVX2-NEXT: vpextrq $1, %xmm4, %rax 585; AVX2-NEXT: vmovd %ecx, %xmm11 586; AVX2-NEXT: movq %rax, %rcx 587; AVX2-NEXT: shrq $48, %rcx 588; AVX2-NEXT: movswl %cx, %ecx 589; AVX2-NEXT: vmovd %ecx, %xmm12 590; AVX2-NEXT: movq %rax, %rcx 591; AVX2-NEXT: shrq $32, %rcx 592; AVX2-NEXT: movswl %cx, %ecx 593; AVX2-NEXT: vmovd %ecx, %xmm13 594; AVX2-NEXT: movswl %ax, %ecx 595; AVX2-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> 596; AVX2-NEXT: shrl $16, %eax 597; AVX2-NEXT: cwtl 598; AVX2-NEXT: vmovd %eax, %xmm14 599; AVX2-NEXT: vmovq %xmm0, %rax 600; AVX2-NEXT: vmovd %ecx, %xmm15 601; AVX2-NEXT: movq %rax, %rcx 602; AVX2-NEXT: shrq $48, %rcx 603; AVX2-NEXT: movswl %cx, %ecx 604; AVX2-NEXT: vmovd %ecx, %xmm2 605; AVX2-NEXT: movq %rax, %rcx 606; AVX2-NEXT: shrq $32, %rcx 607; AVX2-NEXT: movswl %cx, %ecx 608; AVX2-NEXT: vmovd %ecx, %xmm3 609; AVX2-NEXT: movswl %ax, %ecx 610; AVX2-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> 611; AVX2-NEXT: shrl $16, %eax 612; AVX2-NEXT: cwtl 613; AVX2-NEXT: vmovd %eax, %xmm4 614; AVX2-NEXT: vpextrq $1, %xmm0, %rax 615; AVX2-NEXT: vmovd %ecx, %xmm0 616; AVX2-NEXT: movq %rax, %rcx 617; AVX2-NEXT: shrq $48, %rcx 618; AVX2-NEXT: movswl %cx, %ecx 619; AVX2-NEXT: vmovd %ecx, %xmm5 620; AVX2-NEXT: movq %rax, %rcx 621; AVX2-NEXT: shrq $32, %rcx 622; AVX2-NEXT: movswl %cx, %ecx 623; AVX2-NEXT: vmovd %ecx, %xmm6 624; AVX2-NEXT: movl %eax, %ecx 625; AVX2-NEXT: shrl $16, %ecx 626; AVX2-NEXT: movswl %cx, %ecx 627; AVX2-NEXT: vmovd %ecx, %xmm7 628; AVX2-NEXT: cwtl 629; AVX2-NEXT: vmovd %eax, %xmm1 630; AVX2-NEXT: vcvtph2ps %xmm8, %xmm8 631; AVX2-NEXT: vcvtph2ps %xmm9, %xmm9 632; AVX2-NEXT: vcvtph2ps %xmm10, %xmm10 633; AVX2-NEXT: vcvtph2ps %xmm11, %xmm11 634; AVX2-NEXT: vcvtph2ps %xmm12, %xmm12 635; AVX2-NEXT: vcvtph2ps %xmm13, %xmm13 636; AVX2-NEXT: vcvtph2ps %xmm14, %xmm14 637; AVX2-NEXT: vcvtph2ps %xmm15, %xmm15 638; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2 639; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3 640; AVX2-NEXT: vcvtph2ps %xmm4, %xmm4 641; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 642; AVX2-NEXT: vcvtph2ps %xmm5, %xmm5 643; AVX2-NEXT: vcvtph2ps %xmm6, %xmm6 644; AVX2-NEXT: vcvtph2ps %xmm7, %xmm7 645; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1 646; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm7[0],xmm1[2,3] 647; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm6[0],xmm1[3] 648; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm5[0] 649; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[2,3] 650; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm3[0],xmm0[3] 651; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm2[0] 652; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 653; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm15[0],xmm14[0],xmm15[2,3] 654; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm13[0],xmm1[3] 655; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm12[0] 656; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm11[0],xmm10[0],xmm11[2,3] 657; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm9[0],xmm2[3] 658; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm8[0] 659; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 660; AVX2-NEXT: retq 661; 662; AVX512F-LABEL: cvt_16i16_to_16f32: 663; AVX512F: # BB#0: 664; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm1 665; AVX512F-NEXT: vmovq %xmm0, %rax 666; AVX512F-NEXT: movq %rax, %rcx 667; AVX512F-NEXT: shrq $48, %rcx 668; AVX512F-NEXT: movswl %cx, %ecx 669; AVX512F-NEXT: vmovd %ecx, %xmm2 670; AVX512F-NEXT: movq %rax, %rcx 671; AVX512F-NEXT: shrq $32, %rcx 672; AVX512F-NEXT: movswl %cx, %ecx 673; AVX512F-NEXT: vmovd %ecx, %xmm3 674; AVX512F-NEXT: movswl %ax, %ecx 675; AVX512F-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> 676; AVX512F-NEXT: shrl $16, %eax 677; AVX512F-NEXT: cwtl 678; AVX512F-NEXT: vmovd %eax, %xmm4 679; AVX512F-NEXT: vpextrq $1, %xmm0, %rax 680; AVX512F-NEXT: vmovd %ecx, %xmm0 681; AVX512F-NEXT: movq %rax, %rcx 682; AVX512F-NEXT: shrq $48, %rcx 683; AVX512F-NEXT: movswl %cx, %ecx 684; AVX512F-NEXT: vmovd %ecx, %xmm5 685; AVX512F-NEXT: movq %rax, %rcx 686; AVX512F-NEXT: shrq $32, %rcx 687; AVX512F-NEXT: movswl %cx, %ecx 688; AVX512F-NEXT: vmovd %ecx, %xmm6 689; AVX512F-NEXT: movswl %ax, %ecx 690; AVX512F-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> 691; AVX512F-NEXT: shrl $16, %eax 692; AVX512F-NEXT: cwtl 693; AVX512F-NEXT: vmovd %eax, %xmm7 694; AVX512F-NEXT: vmovq %xmm1, %rax 695; AVX512F-NEXT: vmovd %ecx, %xmm8 696; AVX512F-NEXT: movq %rax, %rcx 697; AVX512F-NEXT: shrq $48, %rcx 698; AVX512F-NEXT: movswl %cx, %ecx 699; AVX512F-NEXT: vmovd %ecx, %xmm9 700; AVX512F-NEXT: movq %rax, %rcx 701; AVX512F-NEXT: shrq $32, %rcx 702; AVX512F-NEXT: movswl %cx, %ecx 703; AVX512F-NEXT: vmovd %ecx, %xmm10 704; AVX512F-NEXT: movswl %ax, %ecx 705; AVX512F-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> 706; AVX512F-NEXT: shrl $16, %eax 707; AVX512F-NEXT: cwtl 708; AVX512F-NEXT: vmovd %eax, %xmm11 709; AVX512F-NEXT: vpextrq $1, %xmm1, %rax 710; AVX512F-NEXT: vmovd %ecx, %xmm1 711; AVX512F-NEXT: movq %rax, %rcx 712; AVX512F-NEXT: shrq $48, %rcx 713; AVX512F-NEXT: movswl %cx, %ecx 714; AVX512F-NEXT: vmovd %ecx, %xmm12 715; AVX512F-NEXT: movq %rax, %rcx 716; AVX512F-NEXT: shrq $32, %rcx 717; AVX512F-NEXT: movswl %cx, %ecx 718; AVX512F-NEXT: vmovd %ecx, %xmm13 719; AVX512F-NEXT: movl %eax, %ecx 720; AVX512F-NEXT: shrl $16, %ecx 721; AVX512F-NEXT: movswl %cx, %ecx 722; AVX512F-NEXT: vmovd %ecx, %xmm14 723; AVX512F-NEXT: cwtl 724; AVX512F-NEXT: vmovd %eax, %xmm15 725; AVX512F-NEXT: vcvtph2ps %ymm2, %zmm16 726; AVX512F-NEXT: vcvtph2ps %ymm3, %zmm3 727; AVX512F-NEXT: vcvtph2ps %ymm4, %zmm4 728; AVX512F-NEXT: vcvtph2ps %ymm0, %zmm0 729; AVX512F-NEXT: vcvtph2ps %ymm5, %zmm5 730; AVX512F-NEXT: vcvtph2ps %ymm6, %zmm6 731; AVX512F-NEXT: vcvtph2ps %ymm7, %zmm7 732; AVX512F-NEXT: vcvtph2ps %ymm8, %zmm8 733; AVX512F-NEXT: vcvtph2ps %ymm9, %zmm9 734; AVX512F-NEXT: vcvtph2ps %ymm10, %zmm10 735; AVX512F-NEXT: vcvtph2ps %ymm11, %zmm11 736; AVX512F-NEXT: vcvtph2ps %ymm1, %zmm1 737; AVX512F-NEXT: vcvtph2ps %ymm12, %zmm12 738; AVX512F-NEXT: vcvtph2ps %ymm13, %zmm13 739; AVX512F-NEXT: vcvtph2ps %ymm14, %zmm14 740; AVX512F-NEXT: vcvtph2ps %ymm15, %zmm15 741; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm15[0],xmm14[0],xmm15[2,3] 742; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm13[0],xmm2[3] 743; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm12[0] 744; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],xmm11[0],xmm1[2,3] 745; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm10[0],xmm1[3] 746; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm9[0] 747; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 748; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm8[0],xmm7[0],xmm8[2,3] 749; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm6[0],xmm2[3] 750; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm5[0] 751; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[2,3] 752; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm3[0],xmm0[3] 753; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm16[0] 754; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 755; AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 756; AVX512F-NEXT: retq 757; 758; AVX512VL-LABEL: cvt_16i16_to_16f32: 759; AVX512VL: # BB#0: 760; AVX512VL-NEXT: vextracti128 $1, %ymm0, %xmm10 761; AVX512VL-NEXT: vmovq %xmm0, %rax 762; AVX512VL-NEXT: movq %rax, %rcx 763; AVX512VL-NEXT: shrq $48, %rcx 764; AVX512VL-NEXT: movswl %cx, %ecx 765; AVX512VL-NEXT: vmovd %ecx, %xmm8 766; AVX512VL-NEXT: movq %rax, %rcx 767; AVX512VL-NEXT: shrq $32, %rcx 768; AVX512VL-NEXT: movswl %cx, %ecx 769; AVX512VL-NEXT: vmovd %ecx, %xmm9 770; AVX512VL-NEXT: movswl %ax, %ecx 771; AVX512VL-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> 772; AVX512VL-NEXT: shrl $16, %eax 773; AVX512VL-NEXT: cwtl 774; AVX512VL-NEXT: vmovd %eax, %xmm11 775; AVX512VL-NEXT: vpextrq $1, %xmm0, %rax 776; AVX512VL-NEXT: vmovd %ecx, %xmm12 777; AVX512VL-NEXT: movq %rax, %rcx 778; AVX512VL-NEXT: shrq $48, %rcx 779; AVX512VL-NEXT: movswl %cx, %ecx 780; AVX512VL-NEXT: vmovd %ecx, %xmm13 781; AVX512VL-NEXT: movq %rax, %rcx 782; AVX512VL-NEXT: shrq $32, %rcx 783; AVX512VL-NEXT: movswl %cx, %ecx 784; AVX512VL-NEXT: vmovd %ecx, %xmm14 785; AVX512VL-NEXT: movswl %ax, %ecx 786; AVX512VL-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> 787; AVX512VL-NEXT: shrl $16, %eax 788; AVX512VL-NEXT: cwtl 789; AVX512VL-NEXT: vmovd %eax, %xmm15 790; AVX512VL-NEXT: vmovq %xmm10, %rax 791; AVX512VL-NEXT: vmovd %ecx, %xmm16 792; AVX512VL-NEXT: movq %rax, %rcx 793; AVX512VL-NEXT: shrq $48, %rcx 794; AVX512VL-NEXT: movswl %cx, %ecx 795; AVX512VL-NEXT: vmovd %ecx, %xmm17 796; AVX512VL-NEXT: movq %rax, %rcx 797; AVX512VL-NEXT: shrq $32, %rcx 798; AVX512VL-NEXT: movswl %cx, %ecx 799; AVX512VL-NEXT: vmovd %ecx, %xmm18 800; AVX512VL-NEXT: movswl %ax, %ecx 801; AVX512VL-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> 802; AVX512VL-NEXT: shrl $16, %eax 803; AVX512VL-NEXT: cwtl 804; AVX512VL-NEXT: vmovd %eax, %xmm19 805; AVX512VL-NEXT: vpextrq $1, %xmm10, %rax 806; AVX512VL-NEXT: vmovd %ecx, %xmm10 807; AVX512VL-NEXT: movq %rax, %rcx 808; AVX512VL-NEXT: shrq $48, %rcx 809; AVX512VL-NEXT: movswl %cx, %ecx 810; AVX512VL-NEXT: vmovd %ecx, %xmm20 811; AVX512VL-NEXT: movq %rax, %rcx 812; AVX512VL-NEXT: shrq $32, %rcx 813; AVX512VL-NEXT: movswl %cx, %ecx 814; AVX512VL-NEXT: vmovd %ecx, %xmm21 815; AVX512VL-NEXT: movl %eax, %ecx 816; AVX512VL-NEXT: shrl $16, %ecx 817; AVX512VL-NEXT: movswl %cx, %ecx 818; AVX512VL-NEXT: vmovd %ecx, %xmm22 819; AVX512VL-NEXT: cwtl 820; AVX512VL-NEXT: vmovd %eax, %xmm2 821; AVX512VL-NEXT: vcvtph2ps %xmm8, %xmm8 822; AVX512VL-NEXT: vcvtph2ps %xmm9, %xmm9 823; AVX512VL-NEXT: vcvtph2ps %xmm11, %xmm11 824; AVX512VL-NEXT: vcvtph2ps %xmm12, %xmm12 825; AVX512VL-NEXT: vcvtph2ps %xmm13, %xmm13 826; AVX512VL-NEXT: vcvtph2ps %xmm14, %xmm14 827; AVX512VL-NEXT: vcvtph2ps %xmm15, %xmm15 828; AVX512VL-NEXT: vcvtph2ps %xmm16, %xmm16 829; AVX512VL-NEXT: vcvtph2ps %xmm17, %xmm4 830; AVX512VL-NEXT: vcvtph2ps %xmm18, %xmm0 831; AVX512VL-NEXT: vcvtph2ps %xmm19, %xmm5 832; AVX512VL-NEXT: vcvtph2ps %xmm10, %xmm7 833; AVX512VL-NEXT: vcvtph2ps %xmm20, %xmm3 834; AVX512VL-NEXT: vcvtph2ps %xmm21, %xmm6 835; AVX512VL-NEXT: vcvtph2ps %xmm22, %xmm1 836; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2 837; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0],xmm1[0],xmm2[2,3] 838; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm6[0],xmm1[3] 839; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm3[0] 840; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm7[0],xmm5[0],xmm7[2,3] 841; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1],xmm0[0],xmm2[3] 842; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm4[0] 843; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 844; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm16[0],xmm15[0],xmm16[2,3] 845; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm14[0],xmm1[3] 846; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm13[0] 847; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm12[0],xmm11[0],xmm12[2,3] 848; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm9[0],xmm2[3] 849; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm8[0] 850; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 851; AVX512VL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 852; AVX512VL-NEXT: retq 853 %1 = bitcast <16 x i16> %a0 to <16 x half> 854 %2 = fpext <16 x half> %1 to <16 x float> 855 ret <16 x float> %2 856} 857 858; 859; Half to Float (Load) 860; 861 862define float @load_cvt_i16_to_f32(i16* %a0) nounwind { 863; AVX1-LABEL: load_cvt_i16_to_f32: 864; AVX1: # BB#0: 865; AVX1-NEXT: movswl (%rdi), %eax 866; AVX1-NEXT: vmovd %eax, %xmm0 867; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0 868; AVX1-NEXT: retq 869; 870; AVX2-LABEL: load_cvt_i16_to_f32: 871; AVX2: # BB#0: 872; AVX2-NEXT: movswl (%rdi), %eax 873; AVX2-NEXT: vmovd %eax, %xmm0 874; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 875; AVX2-NEXT: retq 876; 877; AVX512F-LABEL: load_cvt_i16_to_f32: 878; AVX512F: # BB#0: 879; AVX512F-NEXT: movswl (%rdi), %eax 880; AVX512F-NEXT: vmovd %eax, %xmm0 881; AVX512F-NEXT: vcvtph2ps %ymm0, %zmm0 882; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 883; AVX512F-NEXT: retq 884; 885; AVX512VL-LABEL: load_cvt_i16_to_f32: 886; AVX512VL: # BB#0: 887; AVX512VL-NEXT: movswl (%rdi), %eax 888; AVX512VL-NEXT: vmovd %eax, %xmm0 889; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 890; AVX512VL-NEXT: retq 891 %1 = load i16, i16* %a0 892 %2 = bitcast i16 %1 to half 893 %3 = fpext half %2 to float 894 ret float %3 895} 896 897define <4 x float> @load_cvt_4i16_to_4f32(<4 x i16>* %a0) nounwind { 898; AVX1-LABEL: load_cvt_4i16_to_4f32: 899; AVX1: # BB#0: 900; AVX1-NEXT: movswl 6(%rdi), %eax 901; AVX1-NEXT: vmovd %eax, %xmm0 902; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0 903; AVX1-NEXT: movswl 4(%rdi), %eax 904; AVX1-NEXT: vmovd %eax, %xmm1 905; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1 906; AVX1-NEXT: movswl (%rdi), %eax 907; AVX1-NEXT: vmovd %eax, %xmm2 908; AVX1-NEXT: vcvtph2ps %xmm2, %xmm2 909; AVX1-NEXT: movswl 2(%rdi), %eax 910; AVX1-NEXT: vmovd %eax, %xmm3 911; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3 912; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] 913; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 914; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 915; AVX1-NEXT: retq 916; 917; AVX2-LABEL: load_cvt_4i16_to_4f32: 918; AVX2: # BB#0: 919; AVX2-NEXT: movswl 6(%rdi), %eax 920; AVX2-NEXT: vmovd %eax, %xmm0 921; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 922; AVX2-NEXT: movswl 4(%rdi), %eax 923; AVX2-NEXT: vmovd %eax, %xmm1 924; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1 925; AVX2-NEXT: movswl (%rdi), %eax 926; AVX2-NEXT: vmovd %eax, %xmm2 927; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2 928; AVX2-NEXT: movswl 2(%rdi), %eax 929; AVX2-NEXT: vmovd %eax, %xmm3 930; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3 931; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] 932; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 933; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 934; AVX2-NEXT: retq 935; 936; AVX512F-LABEL: load_cvt_4i16_to_4f32: 937; AVX512F: # BB#0: 938; AVX512F-NEXT: movswl 6(%rdi), %eax 939; AVX512F-NEXT: vmovd %eax, %xmm0 940; AVX512F-NEXT: vcvtph2ps %ymm0, %zmm0 941; AVX512F-NEXT: movswl 4(%rdi), %eax 942; AVX512F-NEXT: vmovd %eax, %xmm1 943; AVX512F-NEXT: vcvtph2ps %ymm1, %zmm1 944; AVX512F-NEXT: movswl (%rdi), %eax 945; AVX512F-NEXT: vmovd %eax, %xmm2 946; AVX512F-NEXT: vcvtph2ps %ymm2, %zmm2 947; AVX512F-NEXT: movswl 2(%rdi), %eax 948; AVX512F-NEXT: vmovd %eax, %xmm3 949; AVX512F-NEXT: vcvtph2ps %ymm3, %zmm3 950; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] 951; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 952; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 953; AVX512F-NEXT: retq 954; 955; AVX512VL-LABEL: load_cvt_4i16_to_4f32: 956; AVX512VL: # BB#0: 957; AVX512VL-NEXT: movswl 6(%rdi), %eax 958; AVX512VL-NEXT: vmovd %eax, %xmm0 959; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 960; AVX512VL-NEXT: movswl 4(%rdi), %eax 961; AVX512VL-NEXT: vmovd %eax, %xmm1 962; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1 963; AVX512VL-NEXT: movswl (%rdi), %eax 964; AVX512VL-NEXT: vmovd %eax, %xmm2 965; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2 966; AVX512VL-NEXT: movswl 2(%rdi), %eax 967; AVX512VL-NEXT: vmovd %eax, %xmm3 968; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3 969; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] 970; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 971; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 972; AVX512VL-NEXT: retq 973 %1 = load <4 x i16>, <4 x i16>* %a0 974 %2 = bitcast <4 x i16> %1 to <4 x half> 975 %3 = fpext <4 x half> %2 to <4 x float> 976 ret <4 x float> %3 977} 978 979define <4 x float> @load_cvt_8i16_to_4f32(<8 x i16>* %a0) nounwind { 980; AVX1-LABEL: load_cvt_8i16_to_4f32: 981; AVX1: # BB#0: 982; AVX1-NEXT: movq (%rdi), %rax 983; AVX1-NEXT: movq %rax, %rcx 984; AVX1-NEXT: movq %rax, %rdx 985; AVX1-NEXT: movswl %ax, %esi 986; AVX1-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> 987; AVX1-NEXT: shrl $16, %eax 988; AVX1-NEXT: shrq $32, %rcx 989; AVX1-NEXT: shrq $48, %rdx 990; AVX1-NEXT: movswl %dx, %edx 991; AVX1-NEXT: vmovd %edx, %xmm0 992; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0 993; AVX1-NEXT: movswl %cx, %ecx 994; AVX1-NEXT: vmovd %ecx, %xmm1 995; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1 996; AVX1-NEXT: cwtl 997; AVX1-NEXT: vmovd %eax, %xmm2 998; AVX1-NEXT: vcvtph2ps %xmm2, %xmm2 999; AVX1-NEXT: vmovd %esi, %xmm3 1000; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3 1001; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 1002; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 1003; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1004; AVX1-NEXT: retq 1005; 1006; AVX2-LABEL: load_cvt_8i16_to_4f32: 1007; AVX2: # BB#0: 1008; AVX2-NEXT: movq (%rdi), %rax 1009; AVX2-NEXT: movq %rax, %rcx 1010; AVX2-NEXT: movq %rax, %rdx 1011; AVX2-NEXT: movswl %ax, %esi 1012; AVX2-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> 1013; AVX2-NEXT: shrl $16, %eax 1014; AVX2-NEXT: shrq $32, %rcx 1015; AVX2-NEXT: shrq $48, %rdx 1016; AVX2-NEXT: movswl %dx, %edx 1017; AVX2-NEXT: vmovd %edx, %xmm0 1018; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 1019; AVX2-NEXT: movswl %cx, %ecx 1020; AVX2-NEXT: vmovd %ecx, %xmm1 1021; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1 1022; AVX2-NEXT: cwtl 1023; AVX2-NEXT: vmovd %eax, %xmm2 1024; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2 1025; AVX2-NEXT: vmovd %esi, %xmm3 1026; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3 1027; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 1028; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 1029; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1030; AVX2-NEXT: retq 1031; 1032; AVX512F-LABEL: load_cvt_8i16_to_4f32: 1033; AVX512F: # BB#0: 1034; AVX512F-NEXT: movq (%rdi), %rax 1035; AVX512F-NEXT: movq %rax, %rcx 1036; AVX512F-NEXT: movq %rax, %rdx 1037; AVX512F-NEXT: movswl %ax, %esi 1038; AVX512F-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> 1039; AVX512F-NEXT: shrl $16, %eax 1040; AVX512F-NEXT: shrq $32, %rcx 1041; AVX512F-NEXT: shrq $48, %rdx 1042; AVX512F-NEXT: movswl %dx, %edx 1043; AVX512F-NEXT: vmovd %edx, %xmm0 1044; AVX512F-NEXT: vcvtph2ps %ymm0, %zmm0 1045; AVX512F-NEXT: movswl %cx, %ecx 1046; AVX512F-NEXT: vmovd %ecx, %xmm1 1047; AVX512F-NEXT: vcvtph2ps %ymm1, %zmm1 1048; AVX512F-NEXT: cwtl 1049; AVX512F-NEXT: vmovd %eax, %xmm2 1050; AVX512F-NEXT: vcvtph2ps %ymm2, %zmm2 1051; AVX512F-NEXT: vmovd %esi, %xmm3 1052; AVX512F-NEXT: vcvtph2ps %ymm3, %zmm3 1053; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 1054; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 1055; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1056; AVX512F-NEXT: retq 1057; 1058; AVX512VL-LABEL: load_cvt_8i16_to_4f32: 1059; AVX512VL: # BB#0: 1060; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1061; AVX512VL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp) 1062; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax 1063; AVX512VL-NEXT: movq %rax, %rcx 1064; AVX512VL-NEXT: movq %rax, %rdx 1065; AVX512VL-NEXT: movswl %ax, %esi 1066; AVX512VL-NEXT: # kill: %EAX<def> %EAX<kill> %RAX<kill> 1067; AVX512VL-NEXT: shrl $16, %eax 1068; AVX512VL-NEXT: shrq $32, %rcx 1069; AVX512VL-NEXT: shrq $48, %rdx 1070; AVX512VL-NEXT: movswl %dx, %edx 1071; AVX512VL-NEXT: vmovd %edx, %xmm0 1072; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 1073; AVX512VL-NEXT: movswl %cx, %ecx 1074; AVX512VL-NEXT: vmovd %ecx, %xmm1 1075; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1 1076; AVX512VL-NEXT: cwtl 1077; AVX512VL-NEXT: vmovd %eax, %xmm2 1078; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2 1079; AVX512VL-NEXT: vmovd %esi, %xmm3 1080; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3 1081; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0],xmm2[0],xmm3[2,3] 1082; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 1083; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1084; AVX512VL-NEXT: retq 1085 %1 = load <8 x i16>, <8 x i16>* %a0 1086 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1087 %3 = bitcast <4 x i16> %2 to <4 x half> 1088 %4 = fpext <4 x half> %3 to <4 x float> 1089 ret <4 x float> %4 1090} 1091 1092define <8 x float> @load_cvt_8i16_to_8f32(<8 x i16>* %a0) nounwind { 1093; AVX1-LABEL: load_cvt_8i16_to_8f32: 1094; AVX1: # BB#0: 1095; AVX1-NEXT: movswl 6(%rdi), %eax 1096; AVX1-NEXT: vmovd %eax, %xmm0 1097; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0 1098; AVX1-NEXT: movswl 4(%rdi), %eax 1099; AVX1-NEXT: vmovd %eax, %xmm1 1100; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1 1101; AVX1-NEXT: movswl (%rdi), %eax 1102; AVX1-NEXT: vmovd %eax, %xmm2 1103; AVX1-NEXT: vcvtph2ps %xmm2, %xmm2 1104; AVX1-NEXT: movswl 2(%rdi), %eax 1105; AVX1-NEXT: vmovd %eax, %xmm3 1106; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3 1107; AVX1-NEXT: movswl 14(%rdi), %eax 1108; AVX1-NEXT: vmovd %eax, %xmm4 1109; AVX1-NEXT: vcvtph2ps %xmm4, %xmm4 1110; AVX1-NEXT: movswl 12(%rdi), %eax 1111; AVX1-NEXT: vmovd %eax, %xmm5 1112; AVX1-NEXT: vcvtph2ps %xmm5, %xmm5 1113; AVX1-NEXT: movswl 8(%rdi), %eax 1114; AVX1-NEXT: vmovd %eax, %xmm6 1115; AVX1-NEXT: vcvtph2ps %xmm6, %xmm6 1116; AVX1-NEXT: movswl 10(%rdi), %eax 1117; AVX1-NEXT: vmovd %eax, %xmm7 1118; AVX1-NEXT: vcvtph2ps %xmm7, %xmm7 1119; AVX1-NEXT: vinsertps {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[2,3] 1120; AVX1-NEXT: vinsertps {{.*#+}} xmm5 = xmm6[0,1],xmm5[0],xmm6[3] 1121; AVX1-NEXT: vinsertps {{.*#+}} xmm4 = xmm5[0,1,2],xmm4[0] 1122; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] 1123; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 1124; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1125; AVX1-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 1126; AVX1-NEXT: retq 1127; 1128; AVX2-LABEL: load_cvt_8i16_to_8f32: 1129; AVX2: # BB#0: 1130; AVX2-NEXT: movswl 6(%rdi), %eax 1131; AVX2-NEXT: vmovd %eax, %xmm0 1132; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 1133; AVX2-NEXT: movswl 4(%rdi), %eax 1134; AVX2-NEXT: vmovd %eax, %xmm1 1135; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1 1136; AVX2-NEXT: movswl (%rdi), %eax 1137; AVX2-NEXT: vmovd %eax, %xmm2 1138; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2 1139; AVX2-NEXT: movswl 2(%rdi), %eax 1140; AVX2-NEXT: vmovd %eax, %xmm3 1141; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3 1142; AVX2-NEXT: movswl 14(%rdi), %eax 1143; AVX2-NEXT: vmovd %eax, %xmm4 1144; AVX2-NEXT: vcvtph2ps %xmm4, %xmm4 1145; AVX2-NEXT: movswl 12(%rdi), %eax 1146; AVX2-NEXT: vmovd %eax, %xmm5 1147; AVX2-NEXT: vcvtph2ps %xmm5, %xmm5 1148; AVX2-NEXT: movswl 8(%rdi), %eax 1149; AVX2-NEXT: vmovd %eax, %xmm6 1150; AVX2-NEXT: vcvtph2ps %xmm6, %xmm6 1151; AVX2-NEXT: movswl 10(%rdi), %eax 1152; AVX2-NEXT: vmovd %eax, %xmm7 1153; AVX2-NEXT: vcvtph2ps %xmm7, %xmm7 1154; AVX2-NEXT: vinsertps {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[2,3] 1155; AVX2-NEXT: vinsertps {{.*#+}} xmm5 = xmm6[0,1],xmm5[0],xmm6[3] 1156; AVX2-NEXT: vinsertps {{.*#+}} xmm4 = xmm5[0,1,2],xmm4[0] 1157; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] 1158; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 1159; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1160; AVX2-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 1161; AVX2-NEXT: retq 1162; 1163; AVX512F-LABEL: load_cvt_8i16_to_8f32: 1164; AVX512F: # BB#0: 1165; AVX512F-NEXT: movswl 6(%rdi), %eax 1166; AVX512F-NEXT: vmovd %eax, %xmm0 1167; AVX512F-NEXT: vcvtph2ps %ymm0, %zmm0 1168; AVX512F-NEXT: movswl 4(%rdi), %eax 1169; AVX512F-NEXT: vmovd %eax, %xmm1 1170; AVX512F-NEXT: vcvtph2ps %ymm1, %zmm1 1171; AVX512F-NEXT: movswl (%rdi), %eax 1172; AVX512F-NEXT: vmovd %eax, %xmm2 1173; AVX512F-NEXT: vcvtph2ps %ymm2, %zmm2 1174; AVX512F-NEXT: movswl 2(%rdi), %eax 1175; AVX512F-NEXT: vmovd %eax, %xmm3 1176; AVX512F-NEXT: vcvtph2ps %ymm3, %zmm3 1177; AVX512F-NEXT: movswl 14(%rdi), %eax 1178; AVX512F-NEXT: vmovd %eax, %xmm4 1179; AVX512F-NEXT: vcvtph2ps %ymm4, %zmm4 1180; AVX512F-NEXT: movswl 12(%rdi), %eax 1181; AVX512F-NEXT: vmovd %eax, %xmm5 1182; AVX512F-NEXT: vcvtph2ps %ymm5, %zmm5 1183; AVX512F-NEXT: movswl 8(%rdi), %eax 1184; AVX512F-NEXT: vmovd %eax, %xmm6 1185; AVX512F-NEXT: vcvtph2ps %ymm6, %zmm6 1186; AVX512F-NEXT: movswl 10(%rdi), %eax 1187; AVX512F-NEXT: vmovd %eax, %xmm7 1188; AVX512F-NEXT: vcvtph2ps %ymm7, %zmm7 1189; AVX512F-NEXT: vinsertps {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[2,3] 1190; AVX512F-NEXT: vinsertps {{.*#+}} xmm5 = xmm6[0,1],xmm5[0],xmm6[3] 1191; AVX512F-NEXT: vinsertps {{.*#+}} xmm4 = xmm5[0,1,2],xmm4[0] 1192; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] 1193; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 1194; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1195; AVX512F-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 1196; AVX512F-NEXT: retq 1197; 1198; AVX512VL-LABEL: load_cvt_8i16_to_8f32: 1199; AVX512VL: # BB#0: 1200; AVX512VL-NEXT: movswl 6(%rdi), %eax 1201; AVX512VL-NEXT: vmovd %eax, %xmm0 1202; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 1203; AVX512VL-NEXT: movswl 4(%rdi), %eax 1204; AVX512VL-NEXT: vmovd %eax, %xmm1 1205; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1 1206; AVX512VL-NEXT: movswl (%rdi), %eax 1207; AVX512VL-NEXT: vmovd %eax, %xmm2 1208; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2 1209; AVX512VL-NEXT: movswl 2(%rdi), %eax 1210; AVX512VL-NEXT: vmovd %eax, %xmm3 1211; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3 1212; AVX512VL-NEXT: movswl 14(%rdi), %eax 1213; AVX512VL-NEXT: vmovd %eax, %xmm4 1214; AVX512VL-NEXT: vcvtph2ps %xmm4, %xmm4 1215; AVX512VL-NEXT: movswl 12(%rdi), %eax 1216; AVX512VL-NEXT: vmovd %eax, %xmm5 1217; AVX512VL-NEXT: vcvtph2ps %xmm5, %xmm5 1218; AVX512VL-NEXT: movswl 8(%rdi), %eax 1219; AVX512VL-NEXT: vmovd %eax, %xmm6 1220; AVX512VL-NEXT: vcvtph2ps %xmm6, %xmm6 1221; AVX512VL-NEXT: movswl 10(%rdi), %eax 1222; AVX512VL-NEXT: vmovd %eax, %xmm7 1223; AVX512VL-NEXT: vcvtph2ps %xmm7, %xmm7 1224; AVX512VL-NEXT: vinsertps {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[2,3] 1225; AVX512VL-NEXT: vinsertps {{.*#+}} xmm5 = xmm6[0,1],xmm5[0],xmm6[3] 1226; AVX512VL-NEXT: vinsertps {{.*#+}} xmm4 = xmm5[0,1,2],xmm4[0] 1227; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] 1228; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 1229; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1230; AVX512VL-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 1231; AVX512VL-NEXT: retq 1232 %1 = load <8 x i16>, <8 x i16>* %a0 1233 %2 = bitcast <8 x i16> %1 to <8 x half> 1234 %3 = fpext <8 x half> %2 to <8 x float> 1235 ret <8 x float> %3 1236} 1237 1238define <16 x float> @load_cvt_16i16_to_16f32(<16 x i16>* %a0) nounwind { 1239; AVX1-LABEL: load_cvt_16i16_to_16f32: 1240; AVX1: # BB#0: 1241; AVX1-NEXT: movswl 22(%rdi), %eax 1242; AVX1-NEXT: vmovd %eax, %xmm0 1243; AVX1-NEXT: vcvtph2ps %xmm0, %xmm8 1244; AVX1-NEXT: movswl 20(%rdi), %eax 1245; AVX1-NEXT: vmovd %eax, %xmm0 1246; AVX1-NEXT: vcvtph2ps %xmm0, %xmm9 1247; AVX1-NEXT: movswl 16(%rdi), %eax 1248; AVX1-NEXT: vmovd %eax, %xmm0 1249; AVX1-NEXT: vcvtph2ps %xmm0, %xmm10 1250; AVX1-NEXT: movswl 18(%rdi), %eax 1251; AVX1-NEXT: vmovd %eax, %xmm0 1252; AVX1-NEXT: vcvtph2ps %xmm0, %xmm11 1253; AVX1-NEXT: movswl 30(%rdi), %eax 1254; AVX1-NEXT: vmovd %eax, %xmm0 1255; AVX1-NEXT: vcvtph2ps %xmm0, %xmm12 1256; AVX1-NEXT: movswl 28(%rdi), %eax 1257; AVX1-NEXT: vmovd %eax, %xmm0 1258; AVX1-NEXT: vcvtph2ps %xmm0, %xmm13 1259; AVX1-NEXT: movswl 24(%rdi), %eax 1260; AVX1-NEXT: vmovd %eax, %xmm0 1261; AVX1-NEXT: vcvtph2ps %xmm0, %xmm14 1262; AVX1-NEXT: movswl 26(%rdi), %eax 1263; AVX1-NEXT: vmovd %eax, %xmm0 1264; AVX1-NEXT: vcvtph2ps %xmm0, %xmm15 1265; AVX1-NEXT: movswl 6(%rdi), %eax 1266; AVX1-NEXT: vmovd %eax, %xmm0 1267; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0 1268; AVX1-NEXT: movswl 4(%rdi), %eax 1269; AVX1-NEXT: vmovd %eax, %xmm2 1270; AVX1-NEXT: vcvtph2ps %xmm2, %xmm2 1271; AVX1-NEXT: movswl (%rdi), %eax 1272; AVX1-NEXT: vmovd %eax, %xmm3 1273; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3 1274; AVX1-NEXT: movswl 2(%rdi), %eax 1275; AVX1-NEXT: vmovd %eax, %xmm4 1276; AVX1-NEXT: vcvtph2ps %xmm4, %xmm4 1277; AVX1-NEXT: movswl 14(%rdi), %eax 1278; AVX1-NEXT: vmovd %eax, %xmm5 1279; AVX1-NEXT: vcvtph2ps %xmm5, %xmm5 1280; AVX1-NEXT: movswl 12(%rdi), %eax 1281; AVX1-NEXT: vmovd %eax, %xmm6 1282; AVX1-NEXT: vcvtph2ps %xmm6, %xmm6 1283; AVX1-NEXT: movswl 8(%rdi), %eax 1284; AVX1-NEXT: vmovd %eax, %xmm7 1285; AVX1-NEXT: vcvtph2ps %xmm7, %xmm7 1286; AVX1-NEXT: movswl 10(%rdi), %eax 1287; AVX1-NEXT: vmovd %eax, %xmm1 1288; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1 1289; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm7[0],xmm1[0],xmm7[2,3] 1290; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm6[0],xmm1[3] 1291; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm5[0] 1292; AVX1-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[2,3] 1293; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0,1],xmm2[0],xmm3[3] 1294; AVX1-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0] 1295; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1296; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm14[0],xmm15[0],xmm14[2,3] 1297; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm13[0],xmm1[3] 1298; AVX1-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm12[0] 1299; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm10[0],xmm11[0],xmm10[2,3] 1300; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm9[0],xmm2[3] 1301; AVX1-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm8[0] 1302; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 1303; AVX1-NEXT: retq 1304; 1305; AVX2-LABEL: load_cvt_16i16_to_16f32: 1306; AVX2: # BB#0: 1307; AVX2-NEXT: movswl 22(%rdi), %eax 1308; AVX2-NEXT: vmovd %eax, %xmm0 1309; AVX2-NEXT: vcvtph2ps %xmm0, %xmm8 1310; AVX2-NEXT: movswl 20(%rdi), %eax 1311; AVX2-NEXT: vmovd %eax, %xmm0 1312; AVX2-NEXT: vcvtph2ps %xmm0, %xmm9 1313; AVX2-NEXT: movswl 16(%rdi), %eax 1314; AVX2-NEXT: vmovd %eax, %xmm0 1315; AVX2-NEXT: vcvtph2ps %xmm0, %xmm10 1316; AVX2-NEXT: movswl 18(%rdi), %eax 1317; AVX2-NEXT: vmovd %eax, %xmm0 1318; AVX2-NEXT: vcvtph2ps %xmm0, %xmm11 1319; AVX2-NEXT: movswl 30(%rdi), %eax 1320; AVX2-NEXT: vmovd %eax, %xmm0 1321; AVX2-NEXT: vcvtph2ps %xmm0, %xmm12 1322; AVX2-NEXT: movswl 28(%rdi), %eax 1323; AVX2-NEXT: vmovd %eax, %xmm0 1324; AVX2-NEXT: vcvtph2ps %xmm0, %xmm13 1325; AVX2-NEXT: movswl 24(%rdi), %eax 1326; AVX2-NEXT: vmovd %eax, %xmm0 1327; AVX2-NEXT: vcvtph2ps %xmm0, %xmm14 1328; AVX2-NEXT: movswl 26(%rdi), %eax 1329; AVX2-NEXT: vmovd %eax, %xmm0 1330; AVX2-NEXT: vcvtph2ps %xmm0, %xmm15 1331; AVX2-NEXT: movswl 6(%rdi), %eax 1332; AVX2-NEXT: vmovd %eax, %xmm0 1333; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 1334; AVX2-NEXT: movswl 4(%rdi), %eax 1335; AVX2-NEXT: vmovd %eax, %xmm2 1336; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2 1337; AVX2-NEXT: movswl (%rdi), %eax 1338; AVX2-NEXT: vmovd %eax, %xmm3 1339; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3 1340; AVX2-NEXT: movswl 2(%rdi), %eax 1341; AVX2-NEXT: vmovd %eax, %xmm4 1342; AVX2-NEXT: vcvtph2ps %xmm4, %xmm4 1343; AVX2-NEXT: movswl 14(%rdi), %eax 1344; AVX2-NEXT: vmovd %eax, %xmm5 1345; AVX2-NEXT: vcvtph2ps %xmm5, %xmm5 1346; AVX2-NEXT: movswl 12(%rdi), %eax 1347; AVX2-NEXT: vmovd %eax, %xmm6 1348; AVX2-NEXT: vcvtph2ps %xmm6, %xmm6 1349; AVX2-NEXT: movswl 8(%rdi), %eax 1350; AVX2-NEXT: vmovd %eax, %xmm7 1351; AVX2-NEXT: vcvtph2ps %xmm7, %xmm7 1352; AVX2-NEXT: movswl 10(%rdi), %eax 1353; AVX2-NEXT: vmovd %eax, %xmm1 1354; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1 1355; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm7[0],xmm1[0],xmm7[2,3] 1356; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm6[0],xmm1[3] 1357; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm5[0] 1358; AVX2-NEXT: vinsertps {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[2,3] 1359; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm3[0,1],xmm2[0],xmm3[3] 1360; AVX2-NEXT: vinsertps {{.*#+}} xmm0 = xmm2[0,1,2],xmm0[0] 1361; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1362; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm14[0],xmm15[0],xmm14[2,3] 1363; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm13[0],xmm1[3] 1364; AVX2-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm12[0] 1365; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm10[0],xmm11[0],xmm10[2,3] 1366; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm9[0],xmm2[3] 1367; AVX2-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm8[0] 1368; AVX2-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 1369; AVX2-NEXT: retq 1370; 1371; AVX512F-LABEL: load_cvt_16i16_to_16f32: 1372; AVX512F: # BB#0: 1373; AVX512F-NEXT: movswl 6(%rdi), %eax 1374; AVX512F-NEXT: vmovd %eax, %xmm0 1375; AVX512F-NEXT: vcvtph2ps %ymm0, %zmm16 1376; AVX512F-NEXT: movswl 4(%rdi), %eax 1377; AVX512F-NEXT: vmovd %eax, %xmm1 1378; AVX512F-NEXT: vcvtph2ps %ymm1, %zmm17 1379; AVX512F-NEXT: movswl (%rdi), %eax 1380; AVX512F-NEXT: vmovd %eax, %xmm2 1381; AVX512F-NEXT: vcvtph2ps %ymm2, %zmm2 1382; AVX512F-NEXT: movswl 2(%rdi), %eax 1383; AVX512F-NEXT: vmovd %eax, %xmm3 1384; AVX512F-NEXT: vcvtph2ps %ymm3, %zmm3 1385; AVX512F-NEXT: movswl 14(%rdi), %eax 1386; AVX512F-NEXT: vmovd %eax, %xmm4 1387; AVX512F-NEXT: vcvtph2ps %ymm4, %zmm4 1388; AVX512F-NEXT: movswl 12(%rdi), %eax 1389; AVX512F-NEXT: vmovd %eax, %xmm5 1390; AVX512F-NEXT: vcvtph2ps %ymm5, %zmm5 1391; AVX512F-NEXT: movswl 8(%rdi), %eax 1392; AVX512F-NEXT: vmovd %eax, %xmm6 1393; AVX512F-NEXT: vcvtph2ps %ymm6, %zmm6 1394; AVX512F-NEXT: movswl 10(%rdi), %eax 1395; AVX512F-NEXT: vmovd %eax, %xmm7 1396; AVX512F-NEXT: vcvtph2ps %ymm7, %zmm7 1397; AVX512F-NEXT: movswl 22(%rdi), %eax 1398; AVX512F-NEXT: vmovd %eax, %xmm8 1399; AVX512F-NEXT: vcvtph2ps %ymm8, %zmm8 1400; AVX512F-NEXT: movswl 20(%rdi), %eax 1401; AVX512F-NEXT: vmovd %eax, %xmm9 1402; AVX512F-NEXT: vcvtph2ps %ymm9, %zmm9 1403; AVX512F-NEXT: movswl 16(%rdi), %eax 1404; AVX512F-NEXT: vmovd %eax, %xmm10 1405; AVX512F-NEXT: vcvtph2ps %ymm10, %zmm10 1406; AVX512F-NEXT: movswl 18(%rdi), %eax 1407; AVX512F-NEXT: vmovd %eax, %xmm11 1408; AVX512F-NEXT: vcvtph2ps %ymm11, %zmm11 1409; AVX512F-NEXT: movswl 30(%rdi), %eax 1410; AVX512F-NEXT: vmovd %eax, %xmm12 1411; AVX512F-NEXT: vcvtph2ps %ymm12, %zmm12 1412; AVX512F-NEXT: movswl 28(%rdi), %eax 1413; AVX512F-NEXT: vmovd %eax, %xmm13 1414; AVX512F-NEXT: vcvtph2ps %ymm13, %zmm13 1415; AVX512F-NEXT: movswl 24(%rdi), %eax 1416; AVX512F-NEXT: vmovd %eax, %xmm14 1417; AVX512F-NEXT: vcvtph2ps %ymm14, %zmm14 1418; AVX512F-NEXT: movswl 26(%rdi), %eax 1419; AVX512F-NEXT: vmovd %eax, %xmm15 1420; AVX512F-NEXT: vcvtph2ps %ymm15, %zmm15 1421; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm14[0],xmm15[0],xmm14[2,3] 1422; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],xmm13[0],xmm0[3] 1423; AVX512F-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],xmm12[0] 1424; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm10[0],xmm11[0],xmm10[2,3] 1425; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm9[0],xmm1[3] 1426; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm8[0] 1427; AVX512F-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1428; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm6[0],xmm7[0],xmm6[2,3] 1429; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm5[0],xmm1[3] 1430; AVX512F-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm4[0] 1431; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] 1432; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm17[0],xmm2[3] 1433; AVX512F-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm16[0] 1434; AVX512F-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 1435; AVX512F-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 1436; AVX512F-NEXT: retq 1437; 1438; AVX512VL-LABEL: load_cvt_16i16_to_16f32: 1439; AVX512VL: # BB#0: 1440; AVX512VL-NEXT: movswl 6(%rdi), %eax 1441; AVX512VL-NEXT: vmovd %eax, %xmm0 1442; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm8 1443; AVX512VL-NEXT: movswl 4(%rdi), %eax 1444; AVX512VL-NEXT: vmovd %eax, %xmm1 1445; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm9 1446; AVX512VL-NEXT: movswl (%rdi), %eax 1447; AVX512VL-NEXT: vmovd %eax, %xmm2 1448; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm10 1449; AVX512VL-NEXT: movswl 2(%rdi), %eax 1450; AVX512VL-NEXT: vmovd %eax, %xmm3 1451; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm11 1452; AVX512VL-NEXT: movswl 14(%rdi), %eax 1453; AVX512VL-NEXT: vmovd %eax, %xmm4 1454; AVX512VL-NEXT: vcvtph2ps %xmm4, %xmm12 1455; AVX512VL-NEXT: movswl 12(%rdi), %eax 1456; AVX512VL-NEXT: vmovd %eax, %xmm5 1457; AVX512VL-NEXT: vcvtph2ps %xmm5, %xmm13 1458; AVX512VL-NEXT: movswl 8(%rdi), %eax 1459; AVX512VL-NEXT: vmovd %eax, %xmm6 1460; AVX512VL-NEXT: vcvtph2ps %xmm6, %xmm14 1461; AVX512VL-NEXT: movswl 10(%rdi), %eax 1462; AVX512VL-NEXT: vmovd %eax, %xmm7 1463; AVX512VL-NEXT: vcvtph2ps %xmm7, %xmm15 1464; AVX512VL-NEXT: movswl 22(%rdi), %eax 1465; AVX512VL-NEXT: vmovd %eax, %xmm0 1466; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 1467; AVX512VL-NEXT: movswl 20(%rdi), %eax 1468; AVX512VL-NEXT: vmovd %eax, %xmm1 1469; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1 1470; AVX512VL-NEXT: movswl 16(%rdi), %eax 1471; AVX512VL-NEXT: vmovd %eax, %xmm2 1472; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2 1473; AVX512VL-NEXT: movswl 18(%rdi), %eax 1474; AVX512VL-NEXT: vmovd %eax, %xmm3 1475; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3 1476; AVX512VL-NEXT: movswl 30(%rdi), %eax 1477; AVX512VL-NEXT: vmovd %eax, %xmm4 1478; AVX512VL-NEXT: vcvtph2ps %xmm4, %xmm4 1479; AVX512VL-NEXT: movswl 28(%rdi), %eax 1480; AVX512VL-NEXT: vmovd %eax, %xmm5 1481; AVX512VL-NEXT: vcvtph2ps %xmm5, %xmm5 1482; AVX512VL-NEXT: movswl 24(%rdi), %eax 1483; AVX512VL-NEXT: vmovd %eax, %xmm6 1484; AVX512VL-NEXT: vcvtph2ps %xmm6, %xmm6 1485; AVX512VL-NEXT: movswl 26(%rdi), %eax 1486; AVX512VL-NEXT: vmovd %eax, %xmm7 1487; AVX512VL-NEXT: vcvtph2ps %xmm7, %xmm7 1488; AVX512VL-NEXT: vinsertps {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[2,3] 1489; AVX512VL-NEXT: vinsertps {{.*#+}} xmm5 = xmm6[0,1],xmm5[0],xmm6[3] 1490; AVX512VL-NEXT: vinsertps {{.*#+}} xmm4 = xmm5[0,1,2],xmm4[0] 1491; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[2,3] 1492; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm2[0,1],xmm1[0],xmm2[3] 1493; AVX512VL-NEXT: vinsertps {{.*#+}} xmm0 = xmm1[0,1,2],xmm0[0] 1494; AVX512VL-NEXT: vinsertf128 $1, %xmm4, %ymm0, %ymm0 1495; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm14[0],xmm15[0],xmm14[2,3] 1496; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],xmm13[0],xmm1[3] 1497; AVX512VL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],xmm12[0] 1498; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm10[0],xmm11[0],xmm10[2,3] 1499; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1],xmm9[0],xmm2[3] 1500; AVX512VL-NEXT: vinsertps {{.*#+}} xmm2 = xmm2[0,1,2],xmm8[0] 1501; AVX512VL-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 1502; AVX512VL-NEXT: vinsertf64x4 $1, %ymm0, %zmm1, %zmm0 1503; AVX512VL-NEXT: retq 1504 %1 = load <16 x i16>, <16 x i16>* %a0 1505 %2 = bitcast <16 x i16> %1 to <16 x half> 1506 %3 = fpext <16 x half> %2 to <16 x float> 1507 ret <16 x float> %3 1508} 1509 1510; 1511; Half to Double 1512; 1513 1514define double @cvt_i16_to_f64(i16 %a0) nounwind { 1515; AVX1-LABEL: cvt_i16_to_f64: 1516; AVX1: # BB#0: 1517; AVX1-NEXT: movswl %di, %eax 1518; AVX1-NEXT: vmovd %eax, %xmm0 1519; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0 1520; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 1521; AVX1-NEXT: retq 1522; 1523; AVX2-LABEL: cvt_i16_to_f64: 1524; AVX2: # BB#0: 1525; AVX2-NEXT: movswl %di, %eax 1526; AVX2-NEXT: vmovd %eax, %xmm0 1527; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 1528; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 1529; AVX2-NEXT: retq 1530; 1531; AVX512F-LABEL: cvt_i16_to_f64: 1532; AVX512F: # BB#0: 1533; AVX512F-NEXT: movswl %di, %eax 1534; AVX512F-NEXT: vmovd %eax, %xmm0 1535; AVX512F-NEXT: vcvtph2ps %ymm0, %zmm0 1536; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 1537; AVX512F-NEXT: retq 1538; 1539; AVX512VL-LABEL: cvt_i16_to_f64: 1540; AVX512VL: # BB#0: 1541; AVX512VL-NEXT: movswl %di, %eax 1542; AVX512VL-NEXT: vmovd %eax, %xmm0 1543; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 1544; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 1545; AVX512VL-NEXT: retq 1546 %1 = bitcast i16 %a0 to half 1547 %2 = fpext half %1 to double 1548 ret double %2 1549} 1550 1551define <2 x double> @cvt_2i16_to_2f64(<2 x i16> %a0) nounwind { 1552; AVX1-LABEL: cvt_2i16_to_2f64: 1553; AVX1: # BB#0: 1554; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1555; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 1556; AVX1-NEXT: vmovd %xmm0, %eax 1557; AVX1-NEXT: movswl %ax, %ecx 1558; AVX1-NEXT: shrl $16, %eax 1559; AVX1-NEXT: cwtl 1560; AVX1-NEXT: vmovd %eax, %xmm0 1561; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0 1562; AVX1-NEXT: vmovd %ecx, %xmm1 1563; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1 1564; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 1565; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 1566; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1567; AVX1-NEXT: retq 1568; 1569; AVX2-LABEL: cvt_2i16_to_2f64: 1570; AVX2: # BB#0: 1571; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1572; AVX2-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 1573; AVX2-NEXT: vmovd %xmm0, %eax 1574; AVX2-NEXT: movswl %ax, %ecx 1575; AVX2-NEXT: shrl $16, %eax 1576; AVX2-NEXT: cwtl 1577; AVX2-NEXT: vmovd %eax, %xmm0 1578; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 1579; AVX2-NEXT: vmovd %ecx, %xmm1 1580; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1 1581; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 1582; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 1583; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1584; AVX2-NEXT: retq 1585; 1586; AVX512F-LABEL: cvt_2i16_to_2f64: 1587; AVX512F: # BB#0: 1588; AVX512F-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 1589; AVX512F-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 1590; AVX512F-NEXT: vmovd %xmm0, %eax 1591; AVX512F-NEXT: movswl %ax, %ecx 1592; AVX512F-NEXT: shrl $16, %eax 1593; AVX512F-NEXT: cwtl 1594; AVX512F-NEXT: vmovd %eax, %xmm0 1595; AVX512F-NEXT: vcvtph2ps %ymm0, %zmm0 1596; AVX512F-NEXT: vmovd %ecx, %xmm1 1597; AVX512F-NEXT: vcvtph2ps %ymm1, %zmm1 1598; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 1599; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 1600; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1601; AVX512F-NEXT: retq 1602; 1603; AVX512VL-LABEL: cvt_2i16_to_2f64: 1604; AVX512VL: # BB#0: 1605; AVX512VL-NEXT: vpmovqw %xmm0, -{{[0-9]+}}(%rsp) 1606; AVX512VL-NEXT: movl -{{[0-9]+}}(%rsp), %eax 1607; AVX512VL-NEXT: movswl %ax, %ecx 1608; AVX512VL-NEXT: shrl $16, %eax 1609; AVX512VL-NEXT: cwtl 1610; AVX512VL-NEXT: vmovd %eax, %xmm0 1611; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 1612; AVX512VL-NEXT: vmovd %ecx, %xmm1 1613; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1 1614; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 1615; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 1616; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1617; AVX512VL-NEXT: retq 1618 %1 = bitcast <2 x i16> %a0 to <2 x half> 1619 %2 = fpext <2 x half> %1 to <2 x double> 1620 ret <2 x double> %2 1621} 1622 1623define <4 x double> @cvt_4i16_to_4f64(<4 x i16> %a0) nounwind { 1624; AVX1-LABEL: cvt_4i16_to_4f64: 1625; AVX1: # BB#0: 1626; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 1627; AVX1-NEXT: vmovq %xmm0, %rax 1628; AVX1-NEXT: movq %rax, %rcx 1629; AVX1-NEXT: movl %eax, %edx 1630; AVX1-NEXT: movswl %ax, %esi 1631; AVX1-NEXT: shrq $48, %rax 1632; AVX1-NEXT: shrq $32, %rcx 1633; AVX1-NEXT: shrl $16, %edx 1634; AVX1-NEXT: movswl %dx, %edx 1635; AVX1-NEXT: vmovd %edx, %xmm0 1636; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0 1637; AVX1-NEXT: vmovd %esi, %xmm1 1638; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1 1639; AVX1-NEXT: movswl %cx, %ecx 1640; AVX1-NEXT: vmovd %ecx, %xmm2 1641; AVX1-NEXT: vcvtph2ps %xmm2, %xmm2 1642; AVX1-NEXT: cwtl 1643; AVX1-NEXT: vmovd %eax, %xmm3 1644; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3 1645; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 1646; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 1647; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 1648; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 1649; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 1650; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1651; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1652; AVX1-NEXT: retq 1653; 1654; AVX2-LABEL: cvt_4i16_to_4f64: 1655; AVX2: # BB#0: 1656; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 1657; AVX2-NEXT: vmovq %xmm0, %rax 1658; AVX2-NEXT: movq %rax, %rcx 1659; AVX2-NEXT: movl %eax, %edx 1660; AVX2-NEXT: movswl %ax, %esi 1661; AVX2-NEXT: shrq $48, %rax 1662; AVX2-NEXT: shrq $32, %rcx 1663; AVX2-NEXT: shrl $16, %edx 1664; AVX2-NEXT: movswl %dx, %edx 1665; AVX2-NEXT: vmovd %edx, %xmm0 1666; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 1667; AVX2-NEXT: vmovd %esi, %xmm1 1668; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1 1669; AVX2-NEXT: movswl %cx, %ecx 1670; AVX2-NEXT: vmovd %ecx, %xmm2 1671; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2 1672; AVX2-NEXT: cwtl 1673; AVX2-NEXT: vmovd %eax, %xmm3 1674; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3 1675; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 1676; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 1677; AVX2-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 1678; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 1679; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 1680; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1681; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1682; AVX2-NEXT: retq 1683; 1684; AVX512F-LABEL: cvt_4i16_to_4f64: 1685; AVX512F: # BB#0: 1686; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 1687; AVX512F-NEXT: vmovq %xmm0, %rax 1688; AVX512F-NEXT: movq %rax, %rcx 1689; AVX512F-NEXT: movl %eax, %edx 1690; AVX512F-NEXT: movswl %ax, %esi 1691; AVX512F-NEXT: shrq $48, %rax 1692; AVX512F-NEXT: shrq $32, %rcx 1693; AVX512F-NEXT: shrl $16, %edx 1694; AVX512F-NEXT: movswl %dx, %edx 1695; AVX512F-NEXT: vmovd %edx, %xmm0 1696; AVX512F-NEXT: vcvtph2ps %ymm0, %zmm0 1697; AVX512F-NEXT: vmovd %esi, %xmm1 1698; AVX512F-NEXT: vcvtph2ps %ymm1, %zmm1 1699; AVX512F-NEXT: movswl %cx, %ecx 1700; AVX512F-NEXT: vmovd %ecx, %xmm2 1701; AVX512F-NEXT: vcvtph2ps %ymm2, %zmm2 1702; AVX512F-NEXT: cwtl 1703; AVX512F-NEXT: vmovd %eax, %xmm3 1704; AVX512F-NEXT: vcvtph2ps %ymm3, %zmm3 1705; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 1706; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 1707; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 1708; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 1709; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 1710; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1711; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1712; AVX512F-NEXT: retq 1713; 1714; AVX512VL-LABEL: cvt_4i16_to_4f64: 1715; AVX512VL: # BB#0: 1716; AVX512VL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp) 1717; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax 1718; AVX512VL-NEXT: movq %rax, %rcx 1719; AVX512VL-NEXT: movl %eax, %edx 1720; AVX512VL-NEXT: movswl %ax, %esi 1721; AVX512VL-NEXT: shrq $48, %rax 1722; AVX512VL-NEXT: shrq $32, %rcx 1723; AVX512VL-NEXT: shrl $16, %edx 1724; AVX512VL-NEXT: movswl %dx, %edx 1725; AVX512VL-NEXT: vmovd %edx, %xmm0 1726; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 1727; AVX512VL-NEXT: vmovd %esi, %xmm1 1728; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1 1729; AVX512VL-NEXT: movswl %cx, %ecx 1730; AVX512VL-NEXT: vmovd %ecx, %xmm2 1731; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2 1732; AVX512VL-NEXT: cwtl 1733; AVX512VL-NEXT: vmovd %eax, %xmm3 1734; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3 1735; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 1736; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 1737; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 1738; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 1739; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 1740; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1741; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1742; AVX512VL-NEXT: retq 1743 %1 = bitcast <4 x i16> %a0 to <4 x half> 1744 %2 = fpext <4 x half> %1 to <4 x double> 1745 ret <4 x double> %2 1746} 1747 1748define <2 x double> @cvt_8i16_to_2f64(<8 x i16> %a0) nounwind { 1749; AVX1-LABEL: cvt_8i16_to_2f64: 1750; AVX1: # BB#0: 1751; AVX1-NEXT: vmovd %xmm0, %eax 1752; AVX1-NEXT: movswl %ax, %ecx 1753; AVX1-NEXT: shrl $16, %eax 1754; AVX1-NEXT: cwtl 1755; AVX1-NEXT: vmovd %eax, %xmm0 1756; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0 1757; AVX1-NEXT: vmovd %ecx, %xmm1 1758; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1 1759; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 1760; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 1761; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1762; AVX1-NEXT: retq 1763; 1764; AVX2-LABEL: cvt_8i16_to_2f64: 1765; AVX2: # BB#0: 1766; AVX2-NEXT: vmovd %xmm0, %eax 1767; AVX2-NEXT: movswl %ax, %ecx 1768; AVX2-NEXT: shrl $16, %eax 1769; AVX2-NEXT: cwtl 1770; AVX2-NEXT: vmovd %eax, %xmm0 1771; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 1772; AVX2-NEXT: vmovd %ecx, %xmm1 1773; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1 1774; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 1775; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 1776; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1777; AVX2-NEXT: retq 1778; 1779; AVX512F-LABEL: cvt_8i16_to_2f64: 1780; AVX512F: # BB#0: 1781; AVX512F-NEXT: vmovd %xmm0, %eax 1782; AVX512F-NEXT: movswl %ax, %ecx 1783; AVX512F-NEXT: shrl $16, %eax 1784; AVX512F-NEXT: cwtl 1785; AVX512F-NEXT: vmovd %eax, %xmm0 1786; AVX512F-NEXT: vcvtph2ps %ymm0, %zmm0 1787; AVX512F-NEXT: vmovd %ecx, %xmm1 1788; AVX512F-NEXT: vcvtph2ps %ymm1, %zmm1 1789; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 1790; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 1791; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1792; AVX512F-NEXT: retq 1793; 1794; AVX512VL-LABEL: cvt_8i16_to_2f64: 1795; AVX512VL: # BB#0: 1796; AVX512VL-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1797; AVX512VL-NEXT: vpmovqw %xmm0, -{{[0-9]+}}(%rsp) 1798; AVX512VL-NEXT: movl -{{[0-9]+}}(%rsp), %eax 1799; AVX512VL-NEXT: movswl %ax, %ecx 1800; AVX512VL-NEXT: shrl $16, %eax 1801; AVX512VL-NEXT: cwtl 1802; AVX512VL-NEXT: vmovd %eax, %xmm0 1803; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 1804; AVX512VL-NEXT: vmovd %ecx, %xmm1 1805; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1 1806; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 1807; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 1808; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1809; AVX512VL-NEXT: retq 1810 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 1811 %2 = bitcast <2 x i16> %1 to <2 x half> 1812 %3 = fpext <2 x half> %2 to <2 x double> 1813 ret <2 x double> %3 1814} 1815 1816define <4 x double> @cvt_8i16_to_4f64(<8 x i16> %a0) nounwind { 1817; AVX1-LABEL: cvt_8i16_to_4f64: 1818; AVX1: # BB#0: 1819; AVX1-NEXT: vmovq %xmm0, %rax 1820; AVX1-NEXT: movq %rax, %rcx 1821; AVX1-NEXT: movl %eax, %edx 1822; AVX1-NEXT: movswl %ax, %esi 1823; AVX1-NEXT: shrq $48, %rax 1824; AVX1-NEXT: shrq $32, %rcx 1825; AVX1-NEXT: shrl $16, %edx 1826; AVX1-NEXT: movswl %dx, %edx 1827; AVX1-NEXT: vmovd %edx, %xmm0 1828; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0 1829; AVX1-NEXT: vmovd %esi, %xmm1 1830; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1 1831; AVX1-NEXT: movswl %cx, %ecx 1832; AVX1-NEXT: vmovd %ecx, %xmm2 1833; AVX1-NEXT: vcvtph2ps %xmm2, %xmm2 1834; AVX1-NEXT: cwtl 1835; AVX1-NEXT: vmovd %eax, %xmm3 1836; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3 1837; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 1838; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 1839; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 1840; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 1841; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 1842; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1843; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1844; AVX1-NEXT: retq 1845; 1846; AVX2-LABEL: cvt_8i16_to_4f64: 1847; AVX2: # BB#0: 1848; AVX2-NEXT: vmovq %xmm0, %rax 1849; AVX2-NEXT: movq %rax, %rcx 1850; AVX2-NEXT: movl %eax, %edx 1851; AVX2-NEXT: movswl %ax, %esi 1852; AVX2-NEXT: shrq $48, %rax 1853; AVX2-NEXT: shrq $32, %rcx 1854; AVX2-NEXT: shrl $16, %edx 1855; AVX2-NEXT: movswl %dx, %edx 1856; AVX2-NEXT: vmovd %edx, %xmm0 1857; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 1858; AVX2-NEXT: vmovd %esi, %xmm1 1859; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1 1860; AVX2-NEXT: movswl %cx, %ecx 1861; AVX2-NEXT: vmovd %ecx, %xmm2 1862; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2 1863; AVX2-NEXT: cwtl 1864; AVX2-NEXT: vmovd %eax, %xmm3 1865; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3 1866; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 1867; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 1868; AVX2-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 1869; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 1870; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 1871; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1872; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1873; AVX2-NEXT: retq 1874; 1875; AVX512F-LABEL: cvt_8i16_to_4f64: 1876; AVX512F: # BB#0: 1877; AVX512F-NEXT: vmovq %xmm0, %rax 1878; AVX512F-NEXT: movq %rax, %rcx 1879; AVX512F-NEXT: movl %eax, %edx 1880; AVX512F-NEXT: movswl %ax, %esi 1881; AVX512F-NEXT: shrq $48, %rax 1882; AVX512F-NEXT: shrq $32, %rcx 1883; AVX512F-NEXT: shrl $16, %edx 1884; AVX512F-NEXT: movswl %dx, %edx 1885; AVX512F-NEXT: vmovd %edx, %xmm0 1886; AVX512F-NEXT: vcvtph2ps %ymm0, %zmm0 1887; AVX512F-NEXT: vmovd %esi, %xmm1 1888; AVX512F-NEXT: vcvtph2ps %ymm1, %zmm1 1889; AVX512F-NEXT: movswl %cx, %ecx 1890; AVX512F-NEXT: vmovd %ecx, %xmm2 1891; AVX512F-NEXT: vcvtph2ps %ymm2, %zmm2 1892; AVX512F-NEXT: cwtl 1893; AVX512F-NEXT: vmovd %eax, %xmm3 1894; AVX512F-NEXT: vcvtph2ps %ymm3, %zmm3 1895; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 1896; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 1897; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 1898; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 1899; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 1900; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1901; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1902; AVX512F-NEXT: retq 1903; 1904; AVX512VL-LABEL: cvt_8i16_to_4f64: 1905; AVX512VL: # BB#0: 1906; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1907; AVX512VL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp) 1908; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax 1909; AVX512VL-NEXT: movq %rax, %rcx 1910; AVX512VL-NEXT: movl %eax, %edx 1911; AVX512VL-NEXT: movswl %ax, %esi 1912; AVX512VL-NEXT: shrq $48, %rax 1913; AVX512VL-NEXT: shrq $32, %rcx 1914; AVX512VL-NEXT: shrl $16, %edx 1915; AVX512VL-NEXT: movswl %dx, %edx 1916; AVX512VL-NEXT: vmovd %edx, %xmm0 1917; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 1918; AVX512VL-NEXT: vmovd %esi, %xmm1 1919; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1 1920; AVX512VL-NEXT: movswl %cx, %ecx 1921; AVX512VL-NEXT: vmovd %ecx, %xmm2 1922; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2 1923; AVX512VL-NEXT: cwtl 1924; AVX512VL-NEXT: vmovd %eax, %xmm3 1925; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3 1926; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 1927; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 1928; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 1929; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 1930; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 1931; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1932; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 1933; AVX512VL-NEXT: retq 1934 %1 = shufflevector <8 x i16> %a0, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 1935 %2 = bitcast <4 x i16> %1 to <4 x half> 1936 %3 = fpext <4 x half> %2 to <4 x double> 1937 ret <4 x double> %3 1938} 1939 1940define <8 x double> @cvt_8i16_to_8f64(<8 x i16> %a0) nounwind { 1941; AVX1-LABEL: cvt_8i16_to_8f64: 1942; AVX1: # BB#0: 1943; AVX1-NEXT: vmovq %xmm0, %rdx 1944; AVX1-NEXT: movq %rdx, %r9 1945; AVX1-NEXT: movl %edx, %r10d 1946; AVX1-NEXT: movswl %dx, %r8d 1947; AVX1-NEXT: shrq $48, %rdx 1948; AVX1-NEXT: shrq $32, %r9 1949; AVX1-NEXT: shrl $16, %r10d 1950; AVX1-NEXT: vpextrq $1, %xmm0, %rdi 1951; AVX1-NEXT: movq %rdi, %rsi 1952; AVX1-NEXT: movl %edi, %eax 1953; AVX1-NEXT: movswl %di, %ecx 1954; AVX1-NEXT: shrq $48, %rdi 1955; AVX1-NEXT: shrq $32, %rsi 1956; AVX1-NEXT: shrl $16, %eax 1957; AVX1-NEXT: cwtl 1958; AVX1-NEXT: vmovd %eax, %xmm0 1959; AVX1-NEXT: vcvtph2ps %xmm0, %xmm1 1960; AVX1-NEXT: vmovd %ecx, %xmm0 1961; AVX1-NEXT: vcvtph2ps %xmm0, %xmm2 1962; AVX1-NEXT: movswl %si, %eax 1963; AVX1-NEXT: vmovd %eax, %xmm0 1964; AVX1-NEXT: vcvtph2ps %xmm0, %xmm3 1965; AVX1-NEXT: movswl %di, %eax 1966; AVX1-NEXT: vmovd %eax, %xmm0 1967; AVX1-NEXT: vcvtph2ps %xmm0, %xmm4 1968; AVX1-NEXT: movswl %r10w, %eax 1969; AVX1-NEXT: vmovd %eax, %xmm0 1970; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0 1971; AVX1-NEXT: vmovd %r8d, %xmm5 1972; AVX1-NEXT: vcvtph2ps %xmm5, %xmm5 1973; AVX1-NEXT: movswl %r9w, %eax 1974; AVX1-NEXT: vmovd %eax, %xmm6 1975; AVX1-NEXT: vcvtph2ps %xmm6, %xmm6 1976; AVX1-NEXT: movswl %dx, %eax 1977; AVX1-NEXT: vmovd %eax, %xmm7 1978; AVX1-NEXT: vcvtph2ps %xmm7, %xmm7 1979; AVX1-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7 1980; AVX1-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6 1981; AVX1-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0] 1982; AVX1-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5 1983; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 1984; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm5[0],xmm0[0] 1985; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm0 1986; AVX1-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4 1987; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 1988; AVX1-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] 1989; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 1990; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 1991; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] 1992; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 1993; AVX1-NEXT: retq 1994; 1995; AVX2-LABEL: cvt_8i16_to_8f64: 1996; AVX2: # BB#0: 1997; AVX2-NEXT: vmovq %xmm0, %rdx 1998; AVX2-NEXT: movq %rdx, %r9 1999; AVX2-NEXT: movl %edx, %r10d 2000; AVX2-NEXT: movswl %dx, %r8d 2001; AVX2-NEXT: shrq $48, %rdx 2002; AVX2-NEXT: shrq $32, %r9 2003; AVX2-NEXT: shrl $16, %r10d 2004; AVX2-NEXT: vpextrq $1, %xmm0, %rdi 2005; AVX2-NEXT: movq %rdi, %rsi 2006; AVX2-NEXT: movl %edi, %eax 2007; AVX2-NEXT: movswl %di, %ecx 2008; AVX2-NEXT: shrq $48, %rdi 2009; AVX2-NEXT: shrq $32, %rsi 2010; AVX2-NEXT: shrl $16, %eax 2011; AVX2-NEXT: cwtl 2012; AVX2-NEXT: vmovd %eax, %xmm0 2013; AVX2-NEXT: vcvtph2ps %xmm0, %xmm1 2014; AVX2-NEXT: vmovd %ecx, %xmm0 2015; AVX2-NEXT: vcvtph2ps %xmm0, %xmm2 2016; AVX2-NEXT: movswl %si, %eax 2017; AVX2-NEXT: vmovd %eax, %xmm0 2018; AVX2-NEXT: vcvtph2ps %xmm0, %xmm3 2019; AVX2-NEXT: movswl %di, %eax 2020; AVX2-NEXT: vmovd %eax, %xmm0 2021; AVX2-NEXT: vcvtph2ps %xmm0, %xmm4 2022; AVX2-NEXT: movswl %r10w, %eax 2023; AVX2-NEXT: vmovd %eax, %xmm0 2024; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 2025; AVX2-NEXT: vmovd %r8d, %xmm5 2026; AVX2-NEXT: vcvtph2ps %xmm5, %xmm5 2027; AVX2-NEXT: movswl %r9w, %eax 2028; AVX2-NEXT: vmovd %eax, %xmm6 2029; AVX2-NEXT: vcvtph2ps %xmm6, %xmm6 2030; AVX2-NEXT: movswl %dx, %eax 2031; AVX2-NEXT: vmovd %eax, %xmm7 2032; AVX2-NEXT: vcvtph2ps %xmm7, %xmm7 2033; AVX2-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7 2034; AVX2-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6 2035; AVX2-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0] 2036; AVX2-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5 2037; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 2038; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm5[0],xmm0[0] 2039; AVX2-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm0 2040; AVX2-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4 2041; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 2042; AVX2-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] 2043; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 2044; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 2045; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm2[0],xmm1[0] 2046; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 2047; AVX2-NEXT: retq 2048; 2049; AVX512F-LABEL: cvt_8i16_to_8f64: 2050; AVX512F: # BB#0: 2051; AVX512F-NEXT: vpextrq $1, %xmm0, %rdx 2052; AVX512F-NEXT: movq %rdx, %r8 2053; AVX512F-NEXT: movl %edx, %r9d 2054; AVX512F-NEXT: movswl %dx, %r10d 2055; AVX512F-NEXT: shrq $48, %rdx 2056; AVX512F-NEXT: shrq $32, %r8 2057; AVX512F-NEXT: shrl $16, %r9d 2058; AVX512F-NEXT: vmovq %xmm0, %rdi 2059; AVX512F-NEXT: movq %rdi, %rax 2060; AVX512F-NEXT: movl %edi, %ecx 2061; AVX512F-NEXT: movswl %di, %esi 2062; AVX512F-NEXT: shrq $48, %rdi 2063; AVX512F-NEXT: shrq $32, %rax 2064; AVX512F-NEXT: shrl $16, %ecx 2065; AVX512F-NEXT: movswl %cx, %ecx 2066; AVX512F-NEXT: vmovd %ecx, %xmm0 2067; AVX512F-NEXT: vcvtph2ps %ymm0, %zmm0 2068; AVX512F-NEXT: vmovd %esi, %xmm1 2069; AVX512F-NEXT: vcvtph2ps %ymm1, %zmm1 2070; AVX512F-NEXT: cwtl 2071; AVX512F-NEXT: vmovd %eax, %xmm2 2072; AVX512F-NEXT: vcvtph2ps %ymm2, %zmm2 2073; AVX512F-NEXT: movswl %di, %eax 2074; AVX512F-NEXT: vmovd %eax, %xmm3 2075; AVX512F-NEXT: vcvtph2ps %ymm3, %zmm3 2076; AVX512F-NEXT: movswl %r9w, %eax 2077; AVX512F-NEXT: vmovd %eax, %xmm4 2078; AVX512F-NEXT: vcvtph2ps %ymm4, %zmm4 2079; AVX512F-NEXT: vmovd %r10d, %xmm5 2080; AVX512F-NEXT: vcvtph2ps %ymm5, %zmm5 2081; AVX512F-NEXT: movswl %r8w, %eax 2082; AVX512F-NEXT: vmovd %eax, %xmm6 2083; AVX512F-NEXT: vcvtph2ps %ymm6, %zmm6 2084; AVX512F-NEXT: movswl %dx, %eax 2085; AVX512F-NEXT: vmovd %eax, %xmm7 2086; AVX512F-NEXT: vcvtph2ps %ymm7, %zmm7 2087; AVX512F-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7 2088; AVX512F-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6 2089; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0] 2090; AVX512F-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5 2091; AVX512F-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4 2092; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm5[0],xmm4[0] 2093; AVX512F-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4 2094; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 2095; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 2096; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 2097; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 2098; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 2099; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2100; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2101; AVX512F-NEXT: vinsertf64x4 $1, %ymm4, %zmm0, %zmm0 2102; AVX512F-NEXT: retq 2103; 2104; AVX512VL-LABEL: cvt_8i16_to_8f64: 2105; AVX512VL: # BB#0: 2106; AVX512VL-NEXT: vpextrq $1, %xmm0, %rdx 2107; AVX512VL-NEXT: movq %rdx, %r8 2108; AVX512VL-NEXT: movl %edx, %r10d 2109; AVX512VL-NEXT: movswl %dx, %r9d 2110; AVX512VL-NEXT: shrq $48, %rdx 2111; AVX512VL-NEXT: shrq $32, %r8 2112; AVX512VL-NEXT: shrl $16, %r10d 2113; AVX512VL-NEXT: vmovq %xmm0, %rdi 2114; AVX512VL-NEXT: movq %rdi, %rax 2115; AVX512VL-NEXT: movl %edi, %esi 2116; AVX512VL-NEXT: movswl %di, %ecx 2117; AVX512VL-NEXT: shrq $48, %rdi 2118; AVX512VL-NEXT: shrq $32, %rax 2119; AVX512VL-NEXT: shrl $16, %esi 2120; AVX512VL-NEXT: movswl %si, %esi 2121; AVX512VL-NEXT: vmovd %esi, %xmm0 2122; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 2123; AVX512VL-NEXT: vmovd %ecx, %xmm1 2124; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1 2125; AVX512VL-NEXT: cwtl 2126; AVX512VL-NEXT: vmovd %eax, %xmm2 2127; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2 2128; AVX512VL-NEXT: movswl %di, %eax 2129; AVX512VL-NEXT: vmovd %eax, %xmm3 2130; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3 2131; AVX512VL-NEXT: movswl %r10w, %eax 2132; AVX512VL-NEXT: vmovd %eax, %xmm4 2133; AVX512VL-NEXT: vcvtph2ps %xmm4, %xmm4 2134; AVX512VL-NEXT: vmovd %r9d, %xmm5 2135; AVX512VL-NEXT: vcvtph2ps %xmm5, %xmm5 2136; AVX512VL-NEXT: movswl %r8w, %eax 2137; AVX512VL-NEXT: vmovd %eax, %xmm6 2138; AVX512VL-NEXT: vcvtph2ps %xmm6, %xmm6 2139; AVX512VL-NEXT: movswl %dx, %eax 2140; AVX512VL-NEXT: vmovd %eax, %xmm7 2141; AVX512VL-NEXT: vcvtph2ps %xmm7, %xmm7 2142; AVX512VL-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7 2143; AVX512VL-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6 2144; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0] 2145; AVX512VL-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5 2146; AVX512VL-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4 2147; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm5[0],xmm4[0] 2148; AVX512VL-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4 2149; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 2150; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 2151; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 2152; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 2153; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 2154; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2155; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2156; AVX512VL-NEXT: vinsertf64x4 $1, %ymm4, %zmm0, %zmm0 2157; AVX512VL-NEXT: retq 2158 %1 = bitcast <8 x i16> %a0 to <8 x half> 2159 %2 = fpext <8 x half> %1 to <8 x double> 2160 ret <8 x double> %2 2161} 2162 2163; 2164; Half to Double (Load) 2165; 2166 2167define double @load_cvt_i16_to_f64(i16* %a0) nounwind { 2168; AVX1-LABEL: load_cvt_i16_to_f64: 2169; AVX1: # BB#0: 2170; AVX1-NEXT: movswl (%rdi), %eax 2171; AVX1-NEXT: vmovd %eax, %xmm0 2172; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0 2173; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 2174; AVX1-NEXT: retq 2175; 2176; AVX2-LABEL: load_cvt_i16_to_f64: 2177; AVX2: # BB#0: 2178; AVX2-NEXT: movswl (%rdi), %eax 2179; AVX2-NEXT: vmovd %eax, %xmm0 2180; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 2181; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 2182; AVX2-NEXT: retq 2183; 2184; AVX512F-LABEL: load_cvt_i16_to_f64: 2185; AVX512F: # BB#0: 2186; AVX512F-NEXT: movswl (%rdi), %eax 2187; AVX512F-NEXT: vmovd %eax, %xmm0 2188; AVX512F-NEXT: vcvtph2ps %ymm0, %zmm0 2189; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 2190; AVX512F-NEXT: retq 2191; 2192; AVX512VL-LABEL: load_cvt_i16_to_f64: 2193; AVX512VL: # BB#0: 2194; AVX512VL-NEXT: movswl (%rdi), %eax 2195; AVX512VL-NEXT: vmovd %eax, %xmm0 2196; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 2197; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 2198; AVX512VL-NEXT: retq 2199 %1 = load i16, i16* %a0 2200 %2 = bitcast i16 %1 to half 2201 %3 = fpext half %2 to double 2202 ret double %3 2203} 2204 2205define <2 x double> @load_cvt_2i16_to_2f64(<2 x i16>* %a0) nounwind { 2206; AVX1-LABEL: load_cvt_2i16_to_2f64: 2207; AVX1: # BB#0: 2208; AVX1-NEXT: movswl (%rdi), %eax 2209; AVX1-NEXT: vmovd %eax, %xmm0 2210; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0 2211; AVX1-NEXT: movswl 2(%rdi), %eax 2212; AVX1-NEXT: vmovd %eax, %xmm1 2213; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1 2214; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 2215; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 2216; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2217; AVX1-NEXT: retq 2218; 2219; AVX2-LABEL: load_cvt_2i16_to_2f64: 2220; AVX2: # BB#0: 2221; AVX2-NEXT: movswl (%rdi), %eax 2222; AVX2-NEXT: vmovd %eax, %xmm0 2223; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 2224; AVX2-NEXT: movswl 2(%rdi), %eax 2225; AVX2-NEXT: vmovd %eax, %xmm1 2226; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1 2227; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 2228; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 2229; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2230; AVX2-NEXT: retq 2231; 2232; AVX512F-LABEL: load_cvt_2i16_to_2f64: 2233; AVX512F: # BB#0: 2234; AVX512F-NEXT: movswl (%rdi), %eax 2235; AVX512F-NEXT: vmovd %eax, %xmm0 2236; AVX512F-NEXT: vcvtph2ps %ymm0, %zmm0 2237; AVX512F-NEXT: movswl 2(%rdi), %eax 2238; AVX512F-NEXT: vmovd %eax, %xmm1 2239; AVX512F-NEXT: vcvtph2ps %ymm1, %zmm1 2240; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 2241; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 2242; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2243; AVX512F-NEXT: retq 2244; 2245; AVX512VL-LABEL: load_cvt_2i16_to_2f64: 2246; AVX512VL: # BB#0: 2247; AVX512VL-NEXT: movswl (%rdi), %eax 2248; AVX512VL-NEXT: vmovd %eax, %xmm0 2249; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 2250; AVX512VL-NEXT: movswl 2(%rdi), %eax 2251; AVX512VL-NEXT: vmovd %eax, %xmm1 2252; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1 2253; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 2254; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 2255; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2256; AVX512VL-NEXT: retq 2257 %1 = load <2 x i16>, <2 x i16>* %a0 2258 %2 = bitcast <2 x i16> %1 to <2 x half> 2259 %3 = fpext <2 x half> %2 to <2 x double> 2260 ret <2 x double> %3 2261} 2262 2263define <4 x double> @load_cvt_4i16_to_4f64(<4 x i16>* %a0) nounwind { 2264; AVX1-LABEL: load_cvt_4i16_to_4f64: 2265; AVX1: # BB#0: 2266; AVX1-NEXT: movswl (%rdi), %eax 2267; AVX1-NEXT: vmovd %eax, %xmm0 2268; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0 2269; AVX1-NEXT: movswl 2(%rdi), %eax 2270; AVX1-NEXT: vmovd %eax, %xmm1 2271; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1 2272; AVX1-NEXT: movswl 4(%rdi), %eax 2273; AVX1-NEXT: vmovd %eax, %xmm2 2274; AVX1-NEXT: vcvtph2ps %xmm2, %xmm2 2275; AVX1-NEXT: movswl 6(%rdi), %eax 2276; AVX1-NEXT: vmovd %eax, %xmm3 2277; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3 2278; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 2279; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 2280; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 2281; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 2282; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 2283; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2284; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2285; AVX1-NEXT: retq 2286; 2287; AVX2-LABEL: load_cvt_4i16_to_4f64: 2288; AVX2: # BB#0: 2289; AVX2-NEXT: movswl (%rdi), %eax 2290; AVX2-NEXT: vmovd %eax, %xmm0 2291; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 2292; AVX2-NEXT: movswl 2(%rdi), %eax 2293; AVX2-NEXT: vmovd %eax, %xmm1 2294; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1 2295; AVX2-NEXT: movswl 4(%rdi), %eax 2296; AVX2-NEXT: vmovd %eax, %xmm2 2297; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2 2298; AVX2-NEXT: movswl 6(%rdi), %eax 2299; AVX2-NEXT: vmovd %eax, %xmm3 2300; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3 2301; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 2302; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 2303; AVX2-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 2304; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 2305; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 2306; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2307; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2308; AVX2-NEXT: retq 2309; 2310; AVX512F-LABEL: load_cvt_4i16_to_4f64: 2311; AVX512F: # BB#0: 2312; AVX512F-NEXT: movswl (%rdi), %eax 2313; AVX512F-NEXT: vmovd %eax, %xmm0 2314; AVX512F-NEXT: vcvtph2ps %ymm0, %zmm0 2315; AVX512F-NEXT: movswl 2(%rdi), %eax 2316; AVX512F-NEXT: vmovd %eax, %xmm1 2317; AVX512F-NEXT: vcvtph2ps %ymm1, %zmm1 2318; AVX512F-NEXT: movswl 4(%rdi), %eax 2319; AVX512F-NEXT: vmovd %eax, %xmm2 2320; AVX512F-NEXT: vcvtph2ps %ymm2, %zmm2 2321; AVX512F-NEXT: movswl 6(%rdi), %eax 2322; AVX512F-NEXT: vmovd %eax, %xmm3 2323; AVX512F-NEXT: vcvtph2ps %ymm3, %zmm3 2324; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 2325; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 2326; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 2327; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 2328; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 2329; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2330; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2331; AVX512F-NEXT: retq 2332; 2333; AVX512VL-LABEL: load_cvt_4i16_to_4f64: 2334; AVX512VL: # BB#0: 2335; AVX512VL-NEXT: movswl (%rdi), %eax 2336; AVX512VL-NEXT: vmovd %eax, %xmm0 2337; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 2338; AVX512VL-NEXT: movswl 2(%rdi), %eax 2339; AVX512VL-NEXT: vmovd %eax, %xmm1 2340; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1 2341; AVX512VL-NEXT: movswl 4(%rdi), %eax 2342; AVX512VL-NEXT: vmovd %eax, %xmm2 2343; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2 2344; AVX512VL-NEXT: movswl 6(%rdi), %eax 2345; AVX512VL-NEXT: vmovd %eax, %xmm3 2346; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3 2347; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 2348; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 2349; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 2350; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 2351; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 2352; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2353; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2354; AVX512VL-NEXT: retq 2355 %1 = load <4 x i16>, <4 x i16>* %a0 2356 %2 = bitcast <4 x i16> %1 to <4 x half> 2357 %3 = fpext <4 x half> %2 to <4 x double> 2358 ret <4 x double> %3 2359} 2360 2361define <4 x double> @load_cvt_8i16_to_4f64(<8 x i16>* %a0) nounwind { 2362; AVX1-LABEL: load_cvt_8i16_to_4f64: 2363; AVX1: # BB#0: 2364; AVX1-NEXT: movq (%rdi), %rax 2365; AVX1-NEXT: movq %rax, %rcx 2366; AVX1-NEXT: movl %eax, %edx 2367; AVX1-NEXT: movswl %ax, %esi 2368; AVX1-NEXT: shrq $48, %rax 2369; AVX1-NEXT: shrq $32, %rcx 2370; AVX1-NEXT: shrl $16, %edx 2371; AVX1-NEXT: movswl %dx, %edx 2372; AVX1-NEXT: vmovd %edx, %xmm0 2373; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0 2374; AVX1-NEXT: vmovd %esi, %xmm1 2375; AVX1-NEXT: vcvtph2ps %xmm1, %xmm1 2376; AVX1-NEXT: movswl %cx, %ecx 2377; AVX1-NEXT: vmovd %ecx, %xmm2 2378; AVX1-NEXT: vcvtph2ps %xmm2, %xmm2 2379; AVX1-NEXT: cwtl 2380; AVX1-NEXT: vmovd %eax, %xmm3 2381; AVX1-NEXT: vcvtph2ps %xmm3, %xmm3 2382; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 2383; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 2384; AVX1-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 2385; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 2386; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 2387; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2388; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2389; AVX1-NEXT: retq 2390; 2391; AVX2-LABEL: load_cvt_8i16_to_4f64: 2392; AVX2: # BB#0: 2393; AVX2-NEXT: movq (%rdi), %rax 2394; AVX2-NEXT: movq %rax, %rcx 2395; AVX2-NEXT: movl %eax, %edx 2396; AVX2-NEXT: movswl %ax, %esi 2397; AVX2-NEXT: shrq $48, %rax 2398; AVX2-NEXT: shrq $32, %rcx 2399; AVX2-NEXT: shrl $16, %edx 2400; AVX2-NEXT: movswl %dx, %edx 2401; AVX2-NEXT: vmovd %edx, %xmm0 2402; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 2403; AVX2-NEXT: vmovd %esi, %xmm1 2404; AVX2-NEXT: vcvtph2ps %xmm1, %xmm1 2405; AVX2-NEXT: movswl %cx, %ecx 2406; AVX2-NEXT: vmovd %ecx, %xmm2 2407; AVX2-NEXT: vcvtph2ps %xmm2, %xmm2 2408; AVX2-NEXT: cwtl 2409; AVX2-NEXT: vmovd %eax, %xmm3 2410; AVX2-NEXT: vcvtph2ps %xmm3, %xmm3 2411; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 2412; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 2413; AVX2-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 2414; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 2415; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 2416; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2417; AVX2-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2418; AVX2-NEXT: retq 2419; 2420; AVX512F-LABEL: load_cvt_8i16_to_4f64: 2421; AVX512F: # BB#0: 2422; AVX512F-NEXT: movq (%rdi), %rax 2423; AVX512F-NEXT: movq %rax, %rcx 2424; AVX512F-NEXT: movl %eax, %edx 2425; AVX512F-NEXT: movswl %ax, %esi 2426; AVX512F-NEXT: shrq $48, %rax 2427; AVX512F-NEXT: shrq $32, %rcx 2428; AVX512F-NEXT: shrl $16, %edx 2429; AVX512F-NEXT: movswl %dx, %edx 2430; AVX512F-NEXT: vmovd %edx, %xmm0 2431; AVX512F-NEXT: vcvtph2ps %ymm0, %zmm0 2432; AVX512F-NEXT: vmovd %esi, %xmm1 2433; AVX512F-NEXT: vcvtph2ps %ymm1, %zmm1 2434; AVX512F-NEXT: movswl %cx, %ecx 2435; AVX512F-NEXT: vmovd %ecx, %xmm2 2436; AVX512F-NEXT: vcvtph2ps %ymm2, %zmm2 2437; AVX512F-NEXT: cwtl 2438; AVX512F-NEXT: vmovd %eax, %xmm3 2439; AVX512F-NEXT: vcvtph2ps %ymm3, %zmm3 2440; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 2441; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 2442; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 2443; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 2444; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 2445; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2446; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2447; AVX512F-NEXT: retq 2448; 2449; AVX512VL-LABEL: load_cvt_8i16_to_4f64: 2450; AVX512VL: # BB#0: 2451; AVX512VL-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 2452; AVX512VL-NEXT: vpmovdw %xmm0, -{{[0-9]+}}(%rsp) 2453; AVX512VL-NEXT: movq -{{[0-9]+}}(%rsp), %rax 2454; AVX512VL-NEXT: movq %rax, %rcx 2455; AVX512VL-NEXT: movl %eax, %edx 2456; AVX512VL-NEXT: movswl %ax, %esi 2457; AVX512VL-NEXT: shrq $48, %rax 2458; AVX512VL-NEXT: shrq $32, %rcx 2459; AVX512VL-NEXT: shrl $16, %edx 2460; AVX512VL-NEXT: movswl %dx, %edx 2461; AVX512VL-NEXT: vmovd %edx, %xmm0 2462; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 2463; AVX512VL-NEXT: vmovd %esi, %xmm1 2464; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1 2465; AVX512VL-NEXT: movswl %cx, %ecx 2466; AVX512VL-NEXT: vmovd %ecx, %xmm2 2467; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2 2468; AVX512VL-NEXT: cwtl 2469; AVX512VL-NEXT: vmovd %eax, %xmm3 2470; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3 2471; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 2472; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 2473; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 2474; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 2475; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 2476; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0] 2477; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2478; AVX512VL-NEXT: retq 2479 %1 = load <8 x i16>, <8 x i16>* %a0 2480 %2 = shufflevector <8 x i16> %1, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 2481 %3 = bitcast <4 x i16> %2 to <4 x half> 2482 %4 = fpext <4 x half> %3 to <4 x double> 2483 ret <4 x double> %4 2484} 2485 2486define <8 x double> @load_cvt_8i16_to_8f64(<8 x i16>* %a0) nounwind { 2487; AVX1-LABEL: load_cvt_8i16_to_8f64: 2488; AVX1: # BB#0: 2489; AVX1-NEXT: movswl 8(%rdi), %eax 2490; AVX1-NEXT: vmovd %eax, %xmm0 2491; AVX1-NEXT: vcvtph2ps %xmm0, %xmm1 2492; AVX1-NEXT: movswl 10(%rdi), %eax 2493; AVX1-NEXT: vmovd %eax, %xmm0 2494; AVX1-NEXT: vcvtph2ps %xmm0, %xmm2 2495; AVX1-NEXT: movswl 12(%rdi), %eax 2496; AVX1-NEXT: vmovd %eax, %xmm0 2497; AVX1-NEXT: vcvtph2ps %xmm0, %xmm3 2498; AVX1-NEXT: movswl 14(%rdi), %eax 2499; AVX1-NEXT: vmovd %eax, %xmm0 2500; AVX1-NEXT: vcvtph2ps %xmm0, %xmm4 2501; AVX1-NEXT: movswl (%rdi), %eax 2502; AVX1-NEXT: vmovd %eax, %xmm0 2503; AVX1-NEXT: vcvtph2ps %xmm0, %xmm0 2504; AVX1-NEXT: movswl 2(%rdi), %eax 2505; AVX1-NEXT: vmovd %eax, %xmm5 2506; AVX1-NEXT: vcvtph2ps %xmm5, %xmm5 2507; AVX1-NEXT: movswl 4(%rdi), %eax 2508; AVX1-NEXT: vmovd %eax, %xmm6 2509; AVX1-NEXT: vcvtph2ps %xmm6, %xmm6 2510; AVX1-NEXT: movswl 6(%rdi), %eax 2511; AVX1-NEXT: vmovd %eax, %xmm7 2512; AVX1-NEXT: vcvtph2ps %xmm7, %xmm7 2513; AVX1-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7 2514; AVX1-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6 2515; AVX1-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0] 2516; AVX1-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5 2517; AVX1-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 2518; AVX1-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0] 2519; AVX1-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm0 2520; AVX1-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4 2521; AVX1-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 2522; AVX1-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] 2523; AVX1-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 2524; AVX1-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 2525; AVX1-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 2526; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 2527; AVX1-NEXT: retq 2528; 2529; AVX2-LABEL: load_cvt_8i16_to_8f64: 2530; AVX2: # BB#0: 2531; AVX2-NEXT: movswl 8(%rdi), %eax 2532; AVX2-NEXT: vmovd %eax, %xmm0 2533; AVX2-NEXT: vcvtph2ps %xmm0, %xmm1 2534; AVX2-NEXT: movswl 10(%rdi), %eax 2535; AVX2-NEXT: vmovd %eax, %xmm0 2536; AVX2-NEXT: vcvtph2ps %xmm0, %xmm2 2537; AVX2-NEXT: movswl 12(%rdi), %eax 2538; AVX2-NEXT: vmovd %eax, %xmm0 2539; AVX2-NEXT: vcvtph2ps %xmm0, %xmm3 2540; AVX2-NEXT: movswl 14(%rdi), %eax 2541; AVX2-NEXT: vmovd %eax, %xmm0 2542; AVX2-NEXT: vcvtph2ps %xmm0, %xmm4 2543; AVX2-NEXT: movswl (%rdi), %eax 2544; AVX2-NEXT: vmovd %eax, %xmm0 2545; AVX2-NEXT: vcvtph2ps %xmm0, %xmm0 2546; AVX2-NEXT: movswl 2(%rdi), %eax 2547; AVX2-NEXT: vmovd %eax, %xmm5 2548; AVX2-NEXT: vcvtph2ps %xmm5, %xmm5 2549; AVX2-NEXT: movswl 4(%rdi), %eax 2550; AVX2-NEXT: vmovd %eax, %xmm6 2551; AVX2-NEXT: vcvtph2ps %xmm6, %xmm6 2552; AVX2-NEXT: movswl 6(%rdi), %eax 2553; AVX2-NEXT: vmovd %eax, %xmm7 2554; AVX2-NEXT: vcvtph2ps %xmm7, %xmm7 2555; AVX2-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7 2556; AVX2-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6 2557; AVX2-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0] 2558; AVX2-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5 2559; AVX2-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 2560; AVX2-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm5[0] 2561; AVX2-NEXT: vinsertf128 $1, %xmm6, %ymm0, %ymm0 2562; AVX2-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4 2563; AVX2-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 2564; AVX2-NEXT: vunpcklpd {{.*#+}} xmm3 = xmm3[0],xmm4[0] 2565; AVX2-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 2566; AVX2-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 2567; AVX2-NEXT: vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm2[0] 2568; AVX2-NEXT: vinsertf128 $1, %xmm3, %ymm1, %ymm1 2569; AVX2-NEXT: retq 2570; 2571; AVX512F-LABEL: load_cvt_8i16_to_8f64: 2572; AVX512F: # BB#0: 2573; AVX512F-NEXT: movswl (%rdi), %eax 2574; AVX512F-NEXT: vmovd %eax, %xmm0 2575; AVX512F-NEXT: vcvtph2ps %ymm0, %zmm0 2576; AVX512F-NEXT: movswl 2(%rdi), %eax 2577; AVX512F-NEXT: vmovd %eax, %xmm1 2578; AVX512F-NEXT: vcvtph2ps %ymm1, %zmm1 2579; AVX512F-NEXT: movswl 4(%rdi), %eax 2580; AVX512F-NEXT: vmovd %eax, %xmm2 2581; AVX512F-NEXT: vcvtph2ps %ymm2, %zmm2 2582; AVX512F-NEXT: movswl 6(%rdi), %eax 2583; AVX512F-NEXT: vmovd %eax, %xmm3 2584; AVX512F-NEXT: vcvtph2ps %ymm3, %zmm3 2585; AVX512F-NEXT: movswl 8(%rdi), %eax 2586; AVX512F-NEXT: vmovd %eax, %xmm4 2587; AVX512F-NEXT: vcvtph2ps %ymm4, %zmm4 2588; AVX512F-NEXT: movswl 10(%rdi), %eax 2589; AVX512F-NEXT: vmovd %eax, %xmm5 2590; AVX512F-NEXT: vcvtph2ps %ymm5, %zmm5 2591; AVX512F-NEXT: movswl 12(%rdi), %eax 2592; AVX512F-NEXT: vmovd %eax, %xmm6 2593; AVX512F-NEXT: vcvtph2ps %ymm6, %zmm6 2594; AVX512F-NEXT: movswl 14(%rdi), %eax 2595; AVX512F-NEXT: vmovd %eax, %xmm7 2596; AVX512F-NEXT: vcvtph2ps %ymm7, %zmm7 2597; AVX512F-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7 2598; AVX512F-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6 2599; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0] 2600; AVX512F-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5 2601; AVX512F-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4 2602; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm4[0],xmm5[0] 2603; AVX512F-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4 2604; AVX512F-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 2605; AVX512F-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 2606; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 2607; AVX512F-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 2608; AVX512F-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 2609; AVX512F-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2610; AVX512F-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2611; AVX512F-NEXT: vinsertf64x4 $1, %ymm4, %zmm0, %zmm0 2612; AVX512F-NEXT: retq 2613; 2614; AVX512VL-LABEL: load_cvt_8i16_to_8f64: 2615; AVX512VL: # BB#0: 2616; AVX512VL-NEXT: movswl (%rdi), %eax 2617; AVX512VL-NEXT: vmovd %eax, %xmm0 2618; AVX512VL-NEXT: vcvtph2ps %xmm0, %xmm0 2619; AVX512VL-NEXT: movswl 2(%rdi), %eax 2620; AVX512VL-NEXT: vmovd %eax, %xmm1 2621; AVX512VL-NEXT: vcvtph2ps %xmm1, %xmm1 2622; AVX512VL-NEXT: movswl 4(%rdi), %eax 2623; AVX512VL-NEXT: vmovd %eax, %xmm2 2624; AVX512VL-NEXT: vcvtph2ps %xmm2, %xmm2 2625; AVX512VL-NEXT: movswl 6(%rdi), %eax 2626; AVX512VL-NEXT: vmovd %eax, %xmm3 2627; AVX512VL-NEXT: vcvtph2ps %xmm3, %xmm3 2628; AVX512VL-NEXT: movswl 8(%rdi), %eax 2629; AVX512VL-NEXT: vmovd %eax, %xmm4 2630; AVX512VL-NEXT: vcvtph2ps %xmm4, %xmm4 2631; AVX512VL-NEXT: movswl 10(%rdi), %eax 2632; AVX512VL-NEXT: vmovd %eax, %xmm5 2633; AVX512VL-NEXT: vcvtph2ps %xmm5, %xmm5 2634; AVX512VL-NEXT: movswl 12(%rdi), %eax 2635; AVX512VL-NEXT: vmovd %eax, %xmm6 2636; AVX512VL-NEXT: vcvtph2ps %xmm6, %xmm6 2637; AVX512VL-NEXT: movswl 14(%rdi), %eax 2638; AVX512VL-NEXT: vmovd %eax, %xmm7 2639; AVX512VL-NEXT: vcvtph2ps %xmm7, %xmm7 2640; AVX512VL-NEXT: vcvtss2sd %xmm7, %xmm7, %xmm7 2641; AVX512VL-NEXT: vcvtss2sd %xmm6, %xmm6, %xmm6 2642; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm6 = xmm6[0],xmm7[0] 2643; AVX512VL-NEXT: vcvtss2sd %xmm5, %xmm5, %xmm5 2644; AVX512VL-NEXT: vcvtss2sd %xmm4, %xmm4, %xmm4 2645; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm4 = xmm4[0],xmm5[0] 2646; AVX512VL-NEXT: vinsertf128 $1, %xmm6, %ymm4, %ymm4 2647; AVX512VL-NEXT: vcvtss2sd %xmm3, %xmm3, %xmm3 2648; AVX512VL-NEXT: vcvtss2sd %xmm2, %xmm2, %xmm2 2649; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm2 = xmm2[0],xmm3[0] 2650; AVX512VL-NEXT: vcvtss2sd %xmm1, %xmm1, %xmm1 2651; AVX512VL-NEXT: vcvtss2sd %xmm0, %xmm0, %xmm0 2652; AVX512VL-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2653; AVX512VL-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 2654; AVX512VL-NEXT: vinsertf64x4 $1, %ymm4, %zmm0, %zmm0 2655; AVX512VL-NEXT: retq 2656 %1 = load <8 x i16>, <8 x i16>* %a0 2657 %2 = bitcast <8 x i16> %1 to <8 x half> 2658 %3 = fpext <8 x half> %2 to <8 x double> 2659 ret <8 x double> %3 2660} 2661 2662; 2663; Float to Half 2664; 2665 2666define i16 @cvt_f32_to_i16(float %a0) nounwind { 2667; AVX1-LABEL: cvt_f32_to_i16: 2668; AVX1: # BB#0: 2669; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm0 2670; AVX1-NEXT: vmovd %xmm0, %eax 2671; AVX1-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> 2672; AVX1-NEXT: retq 2673; 2674; AVX2-LABEL: cvt_f32_to_i16: 2675; AVX2: # BB#0: 2676; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm0 2677; AVX2-NEXT: vmovd %xmm0, %eax 2678; AVX2-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> 2679; AVX2-NEXT: retq 2680; 2681; AVX512F-LABEL: cvt_f32_to_i16: 2682; AVX512F: # BB#0: 2683; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> 2684; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0 2685; AVX512F-NEXT: vmovd %xmm0, %eax 2686; AVX512F-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> 2687; AVX512F-NEXT: retq 2688; 2689; AVX512VL-LABEL: cvt_f32_to_i16: 2690; AVX512VL: # BB#0: 2691; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 2692; AVX512VL-NEXT: vmovd %xmm0, %eax 2693; AVX512VL-NEXT: # kill: %AX<def> %AX<kill> %EAX<kill> 2694; AVX512VL-NEXT: retq 2695 %1 = fptrunc float %a0 to half 2696 %2 = bitcast half %1 to i16 2697 ret i16 %2 2698} 2699 2700define <4 x i16> @cvt_4f32_to_4i16(<4 x float> %a0) nounwind { 2701; AVX1-LABEL: cvt_4f32_to_4i16: 2702; AVX1: # BB#0: 2703; AVX1-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 2704; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 2705; AVX1-NEXT: vmovd %xmm1, %eax 2706; AVX1-NEXT: shll $16, %eax 2707; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm1 2708; AVX1-NEXT: vmovd %xmm1, %ecx 2709; AVX1-NEXT: movzwl %cx, %ecx 2710; AVX1-NEXT: orl %eax, %ecx 2711; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 2712; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 2713; AVX1-NEXT: vmovd %xmm1, %eax 2714; AVX1-NEXT: shll $16, %eax 2715; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 2716; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm0 2717; AVX1-NEXT: vmovd %xmm0, %edx 2718; AVX1-NEXT: movzwl %dx, %edx 2719; AVX1-NEXT: orl %eax, %edx 2720; AVX1-NEXT: shlq $32, %rdx 2721; AVX1-NEXT: orq %rcx, %rdx 2722; AVX1-NEXT: vmovq %rdx, %xmm0 2723; AVX1-NEXT: retq 2724; 2725; AVX2-LABEL: cvt_4f32_to_4i16: 2726; AVX2: # BB#0: 2727; AVX2-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 2728; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 2729; AVX2-NEXT: vmovd %xmm1, %eax 2730; AVX2-NEXT: shll $16, %eax 2731; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm1 2732; AVX2-NEXT: vmovd %xmm1, %ecx 2733; AVX2-NEXT: movzwl %cx, %ecx 2734; AVX2-NEXT: orl %eax, %ecx 2735; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 2736; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 2737; AVX2-NEXT: vmovd %xmm1, %eax 2738; AVX2-NEXT: shll $16, %eax 2739; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 2740; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm0 2741; AVX2-NEXT: vmovd %xmm0, %edx 2742; AVX2-NEXT: movzwl %dx, %edx 2743; AVX2-NEXT: orl %eax, %edx 2744; AVX2-NEXT: shlq $32, %rdx 2745; AVX2-NEXT: orq %rcx, %rdx 2746; AVX2-NEXT: vmovq %rdx, %xmm0 2747; AVX2-NEXT: retq 2748; 2749; AVX512F-LABEL: cvt_4f32_to_4i16: 2750; AVX512F: # BB#0: 2751; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> 2752; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm1 2753; AVX512F-NEXT: vmovd %xmm1, %eax 2754; AVX512F-NEXT: movzwl %ax, %eax 2755; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 2756; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 2757; AVX512F-NEXT: vmovd %xmm1, %ecx 2758; AVX512F-NEXT: shll $16, %ecx 2759; AVX512F-NEXT: orl %eax, %ecx 2760; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 2761; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 2762; AVX512F-NEXT: vmovd %xmm1, %eax 2763; AVX512F-NEXT: movzwl %ax, %eax 2764; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 2765; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0 2766; AVX512F-NEXT: vmovd %xmm0, %edx 2767; AVX512F-NEXT: shll $16, %edx 2768; AVX512F-NEXT: orl %eax, %edx 2769; AVX512F-NEXT: shlq $32, %rdx 2770; AVX512F-NEXT: orq %rcx, %rdx 2771; AVX512F-NEXT: vmovq %rdx, %xmm0 2772; AVX512F-NEXT: retq 2773; 2774; AVX512VL-LABEL: cvt_4f32_to_4i16: 2775; AVX512VL: # BB#0: 2776; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 2777; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 2778; AVX512VL-NEXT: vmovd %xmm1, %eax 2779; AVX512VL-NEXT: shll $16, %eax 2780; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm1 2781; AVX512VL-NEXT: vmovd %xmm1, %ecx 2782; AVX512VL-NEXT: movzwl %cx, %ecx 2783; AVX512VL-NEXT: orl %eax, %ecx 2784; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 2785; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 2786; AVX512VL-NEXT: vmovd %xmm1, %eax 2787; AVX512VL-NEXT: shll $16, %eax 2788; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 2789; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 2790; AVX512VL-NEXT: vmovd %xmm0, %edx 2791; AVX512VL-NEXT: movzwl %dx, %edx 2792; AVX512VL-NEXT: orl %eax, %edx 2793; AVX512VL-NEXT: shlq $32, %rdx 2794; AVX512VL-NEXT: orq %rcx, %rdx 2795; AVX512VL-NEXT: vmovq %rdx, %xmm0 2796; AVX512VL-NEXT: retq 2797 %1 = fptrunc <4 x float> %a0 to <4 x half> 2798 %2 = bitcast <4 x half> %1 to <4 x i16> 2799 ret <4 x i16> %2 2800} 2801 2802define <8 x i16> @cvt_4f32_to_8i16_undef(<4 x float> %a0) nounwind { 2803; AVX1-LABEL: cvt_4f32_to_8i16_undef: 2804; AVX1: # BB#0: 2805; AVX1-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 2806; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 2807; AVX1-NEXT: vmovd %xmm1, %eax 2808; AVX1-NEXT: shll $16, %eax 2809; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm1 2810; AVX1-NEXT: vmovd %xmm1, %ecx 2811; AVX1-NEXT: movzwl %cx, %ecx 2812; AVX1-NEXT: orl %eax, %ecx 2813; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 2814; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 2815; AVX1-NEXT: vmovd %xmm1, %eax 2816; AVX1-NEXT: shll $16, %eax 2817; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 2818; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm0 2819; AVX1-NEXT: vmovd %xmm0, %edx 2820; AVX1-NEXT: movzwl %dx, %edx 2821; AVX1-NEXT: orl %eax, %edx 2822; AVX1-NEXT: shlq $32, %rdx 2823; AVX1-NEXT: orq %rcx, %rdx 2824; AVX1-NEXT: vmovq %rdx, %xmm0 2825; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 2826; AVX1-NEXT: retq 2827; 2828; AVX2-LABEL: cvt_4f32_to_8i16_undef: 2829; AVX2: # BB#0: 2830; AVX2-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 2831; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 2832; AVX2-NEXT: vmovd %xmm1, %eax 2833; AVX2-NEXT: shll $16, %eax 2834; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm1 2835; AVX2-NEXT: vmovd %xmm1, %ecx 2836; AVX2-NEXT: movzwl %cx, %ecx 2837; AVX2-NEXT: orl %eax, %ecx 2838; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 2839; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 2840; AVX2-NEXT: vmovd %xmm1, %eax 2841; AVX2-NEXT: shll $16, %eax 2842; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 2843; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm0 2844; AVX2-NEXT: vmovd %xmm0, %edx 2845; AVX2-NEXT: movzwl %dx, %edx 2846; AVX2-NEXT: orl %eax, %edx 2847; AVX2-NEXT: shlq $32, %rdx 2848; AVX2-NEXT: orq %rcx, %rdx 2849; AVX2-NEXT: vmovq %rdx, %xmm0 2850; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 2851; AVX2-NEXT: retq 2852; 2853; AVX512F-LABEL: cvt_4f32_to_8i16_undef: 2854; AVX512F: # BB#0: 2855; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> 2856; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm1 2857; AVX512F-NEXT: vmovd %xmm1, %eax 2858; AVX512F-NEXT: movzwl %ax, %eax 2859; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 2860; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 2861; AVX512F-NEXT: vmovd %xmm1, %ecx 2862; AVX512F-NEXT: shll $16, %ecx 2863; AVX512F-NEXT: orl %eax, %ecx 2864; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 2865; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 2866; AVX512F-NEXT: vmovd %xmm1, %eax 2867; AVX512F-NEXT: movzwl %ax, %eax 2868; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 2869; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0 2870; AVX512F-NEXT: vmovd %xmm0, %edx 2871; AVX512F-NEXT: shll $16, %edx 2872; AVX512F-NEXT: orl %eax, %edx 2873; AVX512F-NEXT: shlq $32, %rdx 2874; AVX512F-NEXT: orq %rcx, %rdx 2875; AVX512F-NEXT: vmovq %rdx, %xmm0 2876; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 2877; AVX512F-NEXT: retq 2878; 2879; AVX512VL-LABEL: cvt_4f32_to_8i16_undef: 2880; AVX512VL: # BB#0: 2881; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 2882; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 2883; AVX512VL-NEXT: vmovd %xmm1, %eax 2884; AVX512VL-NEXT: shll $16, %eax 2885; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm1 2886; AVX512VL-NEXT: vmovd %xmm1, %ecx 2887; AVX512VL-NEXT: movzwl %cx, %ecx 2888; AVX512VL-NEXT: orl %eax, %ecx 2889; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 2890; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 2891; AVX512VL-NEXT: vmovd %xmm1, %eax 2892; AVX512VL-NEXT: shll $16, %eax 2893; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 2894; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 2895; AVX512VL-NEXT: vmovd %xmm0, %edx 2896; AVX512VL-NEXT: movzwl %dx, %edx 2897; AVX512VL-NEXT: orl %eax, %edx 2898; AVX512VL-NEXT: shlq $32, %rdx 2899; AVX512VL-NEXT: orq %rcx, %rdx 2900; AVX512VL-NEXT: vmovq %rdx, %xmm0 2901; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 2902; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 2903; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 2904; AVX512VL-NEXT: retq 2905 %1 = fptrunc <4 x float> %a0 to <4 x half> 2906 %2 = bitcast <4 x half> %1 to <4 x i16> 2907 %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 2908 ret <8 x i16> %3 2909} 2910 2911define <8 x i16> @cvt_4f32_to_8i16_zero(<4 x float> %a0) nounwind { 2912; AVX1-LABEL: cvt_4f32_to_8i16_zero: 2913; AVX1: # BB#0: 2914; AVX1-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 2915; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 2916; AVX1-NEXT: vmovd %xmm1, %eax 2917; AVX1-NEXT: shll $16, %eax 2918; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm1 2919; AVX1-NEXT: vmovd %xmm1, %ecx 2920; AVX1-NEXT: movzwl %cx, %ecx 2921; AVX1-NEXT: orl %eax, %ecx 2922; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 2923; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 2924; AVX1-NEXT: vmovd %xmm1, %eax 2925; AVX1-NEXT: shll $16, %eax 2926; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 2927; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm0 2928; AVX1-NEXT: vmovd %xmm0, %edx 2929; AVX1-NEXT: movzwl %dx, %edx 2930; AVX1-NEXT: orl %eax, %edx 2931; AVX1-NEXT: shlq $32, %rdx 2932; AVX1-NEXT: orq %rcx, %rdx 2933; AVX1-NEXT: vmovq %rdx, %xmm0 2934; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero 2935; AVX1-NEXT: retq 2936; 2937; AVX2-LABEL: cvt_4f32_to_8i16_zero: 2938; AVX2: # BB#0: 2939; AVX2-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 2940; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 2941; AVX2-NEXT: vmovd %xmm1, %eax 2942; AVX2-NEXT: shll $16, %eax 2943; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm1 2944; AVX2-NEXT: vmovd %xmm1, %ecx 2945; AVX2-NEXT: movzwl %cx, %ecx 2946; AVX2-NEXT: orl %eax, %ecx 2947; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 2948; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 2949; AVX2-NEXT: vmovd %xmm1, %eax 2950; AVX2-NEXT: shll $16, %eax 2951; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 2952; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm0 2953; AVX2-NEXT: vmovd %xmm0, %edx 2954; AVX2-NEXT: movzwl %dx, %edx 2955; AVX2-NEXT: orl %eax, %edx 2956; AVX2-NEXT: shlq $32, %rdx 2957; AVX2-NEXT: orq %rcx, %rdx 2958; AVX2-NEXT: vmovq %rdx, %xmm0 2959; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero 2960; AVX2-NEXT: retq 2961; 2962; AVX512F-LABEL: cvt_4f32_to_8i16_zero: 2963; AVX512F: # BB#0: 2964; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> 2965; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm1 2966; AVX512F-NEXT: vmovd %xmm1, %eax 2967; AVX512F-NEXT: movzwl %ax, %eax 2968; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 2969; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 2970; AVX512F-NEXT: vmovd %xmm1, %ecx 2971; AVX512F-NEXT: shll $16, %ecx 2972; AVX512F-NEXT: orl %eax, %ecx 2973; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 2974; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 2975; AVX512F-NEXT: vmovd %xmm1, %eax 2976; AVX512F-NEXT: movzwl %ax, %eax 2977; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 2978; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0 2979; AVX512F-NEXT: vmovd %xmm0, %edx 2980; AVX512F-NEXT: shll $16, %edx 2981; AVX512F-NEXT: orl %eax, %edx 2982; AVX512F-NEXT: shlq $32, %rdx 2983; AVX512F-NEXT: orq %rcx, %rdx 2984; AVX512F-NEXT: vmovq %rdx, %xmm0 2985; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero 2986; AVX512F-NEXT: retq 2987; 2988; AVX512VL-LABEL: cvt_4f32_to_8i16_zero: 2989; AVX512VL: # BB#0: 2990; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 2991; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 2992; AVX512VL-NEXT: vmovd %xmm1, %eax 2993; AVX512VL-NEXT: shll $16, %eax 2994; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm1 2995; AVX512VL-NEXT: vmovd %xmm1, %ecx 2996; AVX512VL-NEXT: movzwl %cx, %ecx 2997; AVX512VL-NEXT: orl %eax, %ecx 2998; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 2999; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3000; AVX512VL-NEXT: vmovd %xmm1, %eax 3001; AVX512VL-NEXT: shll $16, %eax 3002; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 3003; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3004; AVX512VL-NEXT: vmovd %xmm0, %edx 3005; AVX512VL-NEXT: movzwl %dx, %edx 3006; AVX512VL-NEXT: orl %eax, %edx 3007; AVX512VL-NEXT: shlq $32, %rdx 3008; AVX512VL-NEXT: orq %rcx, %rdx 3009; AVX512VL-NEXT: vmovq %rdx, %xmm0 3010; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 3011; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 3012; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2] 3013; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 3014; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 3015; AVX512VL-NEXT: retq 3016 %1 = fptrunc <4 x float> %a0 to <4 x half> 3017 %2 = bitcast <4 x half> %1 to <4 x i16> 3018 %3 = shufflevector <4 x i16> %2, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3019 ret <8 x i16> %3 3020} 3021 3022define <8 x i16> @cvt_8f32_to_8i16(<8 x float> %a0) nounwind { 3023; AVX1-LABEL: cvt_8f32_to_8i16: 3024; AVX1: # BB#0: 3025; AVX1-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3026; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3027; AVX1-NEXT: vmovd %xmm1, %eax 3028; AVX1-NEXT: shll $16, %eax 3029; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm1 3030; AVX1-NEXT: vmovd %xmm1, %ecx 3031; AVX1-NEXT: movzwl %cx, %ecx 3032; AVX1-NEXT: orl %eax, %ecx 3033; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 3034; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3035; AVX1-NEXT: vmovd %xmm1, %edx 3036; AVX1-NEXT: shll $16, %edx 3037; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 3038; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3039; AVX1-NEXT: vmovd %xmm1, %eax 3040; AVX1-NEXT: movzwl %ax, %eax 3041; AVX1-NEXT: orl %edx, %eax 3042; AVX1-NEXT: shlq $32, %rax 3043; AVX1-NEXT: orq %rcx, %rax 3044; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 3045; AVX1-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3046; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3047; AVX1-NEXT: vmovd %xmm1, %ecx 3048; AVX1-NEXT: shll $16, %ecx 3049; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm1 3050; AVX1-NEXT: vmovd %xmm1, %edx 3051; AVX1-NEXT: movzwl %dx, %edx 3052; AVX1-NEXT: orl %ecx, %edx 3053; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 3054; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3055; AVX1-NEXT: vmovd %xmm1, %ecx 3056; AVX1-NEXT: shll $16, %ecx 3057; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 3058; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3059; AVX1-NEXT: vmovd %xmm0, %esi 3060; AVX1-NEXT: movzwl %si, %esi 3061; AVX1-NEXT: orl %ecx, %esi 3062; AVX1-NEXT: shlq $32, %rsi 3063; AVX1-NEXT: orq %rdx, %rsi 3064; AVX1-NEXT: vmovq %rsi, %xmm0 3065; AVX1-NEXT: vmovq %rax, %xmm1 3066; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 3067; AVX1-NEXT: vzeroupper 3068; AVX1-NEXT: retq 3069; 3070; AVX2-LABEL: cvt_8f32_to_8i16: 3071; AVX2: # BB#0: 3072; AVX2-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3073; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3074; AVX2-NEXT: vmovd %xmm1, %eax 3075; AVX2-NEXT: shll $16, %eax 3076; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm1 3077; AVX2-NEXT: vmovd %xmm1, %ecx 3078; AVX2-NEXT: movzwl %cx, %ecx 3079; AVX2-NEXT: orl %eax, %ecx 3080; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 3081; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3082; AVX2-NEXT: vmovd %xmm1, %edx 3083; AVX2-NEXT: shll $16, %edx 3084; AVX2-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 3085; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3086; AVX2-NEXT: vmovd %xmm1, %eax 3087; AVX2-NEXT: movzwl %ax, %eax 3088; AVX2-NEXT: orl %edx, %eax 3089; AVX2-NEXT: shlq $32, %rax 3090; AVX2-NEXT: orq %rcx, %rax 3091; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 3092; AVX2-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3093; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3094; AVX2-NEXT: vmovd %xmm1, %ecx 3095; AVX2-NEXT: shll $16, %ecx 3096; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm1 3097; AVX2-NEXT: vmovd %xmm1, %edx 3098; AVX2-NEXT: movzwl %dx, %edx 3099; AVX2-NEXT: orl %ecx, %edx 3100; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 3101; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3102; AVX2-NEXT: vmovd %xmm1, %ecx 3103; AVX2-NEXT: shll $16, %ecx 3104; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 3105; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3106; AVX2-NEXT: vmovd %xmm0, %esi 3107; AVX2-NEXT: movzwl %si, %esi 3108; AVX2-NEXT: orl %ecx, %esi 3109; AVX2-NEXT: shlq $32, %rsi 3110; AVX2-NEXT: orq %rdx, %rsi 3111; AVX2-NEXT: vmovq %rsi, %xmm0 3112; AVX2-NEXT: vmovq %rax, %xmm1 3113; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 3114; AVX2-NEXT: vzeroupper 3115; AVX2-NEXT: retq 3116; 3117; AVX512F-LABEL: cvt_8f32_to_8i16: 3118; AVX512F: # BB#0: 3119; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 3120; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm1 3121; AVX512F-NEXT: vmovd %xmm1, %eax 3122; AVX512F-NEXT: movzwl %ax, %eax 3123; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3124; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 3125; AVX512F-NEXT: vmovd %xmm1, %ecx 3126; AVX512F-NEXT: shll $16, %ecx 3127; AVX512F-NEXT: orl %eax, %ecx 3128; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 3129; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 3130; AVX512F-NEXT: vmovd %xmm1, %eax 3131; AVX512F-NEXT: movzwl %ax, %edx 3132; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 3133; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 3134; AVX512F-NEXT: vmovd %xmm1, %eax 3135; AVX512F-NEXT: shll $16, %eax 3136; AVX512F-NEXT: orl %edx, %eax 3137; AVX512F-NEXT: shlq $32, %rax 3138; AVX512F-NEXT: orq %rcx, %rax 3139; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 3140; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm1 3141; AVX512F-NEXT: vmovd %xmm1, %ecx 3142; AVX512F-NEXT: movzwl %cx, %ecx 3143; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3144; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 3145; AVX512F-NEXT: vmovd %xmm1, %edx 3146; AVX512F-NEXT: shll $16, %edx 3147; AVX512F-NEXT: orl %ecx, %edx 3148; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 3149; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 3150; AVX512F-NEXT: vmovd %xmm1, %ecx 3151; AVX512F-NEXT: movzwl %cx, %ecx 3152; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 3153; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0 3154; AVX512F-NEXT: vmovd %xmm0, %esi 3155; AVX512F-NEXT: shll $16, %esi 3156; AVX512F-NEXT: orl %ecx, %esi 3157; AVX512F-NEXT: shlq $32, %rsi 3158; AVX512F-NEXT: orq %rdx, %rsi 3159; AVX512F-NEXT: vmovq %rsi, %xmm0 3160; AVX512F-NEXT: vmovq %rax, %xmm1 3161; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 3162; AVX512F-NEXT: retq 3163; 3164; AVX512VL-LABEL: cvt_8f32_to_8i16: 3165; AVX512VL: # BB#0: 3166; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3167; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3168; AVX512VL-NEXT: vmovd %xmm1, %eax 3169; AVX512VL-NEXT: shll $16, %eax 3170; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm1 3171; AVX512VL-NEXT: vmovd %xmm1, %ecx 3172; AVX512VL-NEXT: movzwl %cx, %ecx 3173; AVX512VL-NEXT: orl %eax, %ecx 3174; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 3175; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3176; AVX512VL-NEXT: vmovd %xmm1, %edx 3177; AVX512VL-NEXT: shll $16, %edx 3178; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 3179; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3180; AVX512VL-NEXT: vmovd %xmm1, %eax 3181; AVX512VL-NEXT: movzwl %ax, %eax 3182; AVX512VL-NEXT: orl %edx, %eax 3183; AVX512VL-NEXT: shlq $32, %rax 3184; AVX512VL-NEXT: orq %rcx, %rax 3185; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 3186; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3187; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3188; AVX512VL-NEXT: vmovd %xmm1, %ecx 3189; AVX512VL-NEXT: shll $16, %ecx 3190; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm1 3191; AVX512VL-NEXT: vmovd %xmm1, %edx 3192; AVX512VL-NEXT: movzwl %dx, %edx 3193; AVX512VL-NEXT: orl %ecx, %edx 3194; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 3195; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3196; AVX512VL-NEXT: vmovd %xmm1, %ecx 3197; AVX512VL-NEXT: shll $16, %ecx 3198; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 3199; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3200; AVX512VL-NEXT: vmovd %xmm0, %esi 3201; AVX512VL-NEXT: movzwl %si, %esi 3202; AVX512VL-NEXT: orl %ecx, %esi 3203; AVX512VL-NEXT: shlq $32, %rsi 3204; AVX512VL-NEXT: orq %rdx, %rsi 3205; AVX512VL-NEXT: vmovq %rsi, %xmm0 3206; AVX512VL-NEXT: vmovq %rax, %xmm1 3207; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 3208; AVX512VL-NEXT: retq 3209 %1 = fptrunc <8 x float> %a0 to <8 x half> 3210 %2 = bitcast <8 x half> %1 to <8 x i16> 3211 ret <8 x i16> %2 3212} 3213 3214define <16 x i16> @cvt_16f32_to_16i16(<16 x float> %a0) nounwind { 3215; AVX1-LABEL: cvt_16f32_to_16i16: 3216; AVX1: # BB#0: 3217; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm2 3218; AVX1-NEXT: vmovd %xmm2, %eax 3219; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 3220; AVX1-NEXT: vcvtps2ph $4, %xmm2, %xmm2 3221; AVX1-NEXT: vmovd %eax, %xmm3 3222; AVX1-NEXT: vmovd %xmm2, %eax 3223; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 3224; AVX1-NEXT: vcvtps2ph $4, %xmm2, %xmm2 3225; AVX1-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 3226; AVX1-NEXT: vmovd %xmm2, %eax 3227; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 3228; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] 3229; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3230; AVX1-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 3231; AVX1-NEXT: vmovd %xmm1, %eax 3232; AVX1-NEXT: vcvtps2ph $4, %xmm2, %xmm1 3233; AVX1-NEXT: vpinsrw $3, %eax, %xmm3, %xmm3 3234; AVX1-NEXT: vmovd %xmm1, %eax 3235; AVX1-NEXT: vmovshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] 3236; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3237; AVX1-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 3238; AVX1-NEXT: vmovd %xmm1, %eax 3239; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm2[1,0] 3240; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3241; AVX1-NEXT: vpinsrw $5, %eax, %xmm3, %xmm3 3242; AVX1-NEXT: vmovd %xmm1, %eax 3243; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm1 3244; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] 3245; AVX1-NEXT: vcvtps2ph $4, %xmm2, %xmm2 3246; AVX1-NEXT: vpinsrw $6, %eax, %xmm3, %xmm3 3247; AVX1-NEXT: vmovd %xmm2, %eax 3248; AVX1-NEXT: vpinsrw $7, %eax, %xmm3, %xmm2 3249; AVX1-NEXT: vmovd %xmm1, %eax 3250; AVX1-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3251; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3252; AVX1-NEXT: vmovd %eax, %xmm3 3253; AVX1-NEXT: vmovd %xmm1, %eax 3254; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 3255; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3256; AVX1-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 3257; AVX1-NEXT: vmovd %xmm1, %eax 3258; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3259; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 3260; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3261; AVX1-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 3262; AVX1-NEXT: vmovd %xmm0, %eax 3263; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm0 3264; AVX1-NEXT: vpinsrw $3, %eax, %xmm3, %xmm3 3265; AVX1-NEXT: vmovd %xmm0, %eax 3266; AVX1-NEXT: vmovshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] 3267; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3268; AVX1-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 3269; AVX1-NEXT: vmovd %xmm0, %eax 3270; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[3,1,2,3] 3271; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3272; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 3273; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3274; AVX1-NEXT: vpinsrw $5, %eax, %xmm3, %xmm3 3275; AVX1-NEXT: vmovd %xmm1, %eax 3276; AVX1-NEXT: vpinsrw $6, %eax, %xmm3, %xmm1 3277; AVX1-NEXT: vmovd %xmm0, %eax 3278; AVX1-NEXT: vpinsrw $7, %eax, %xmm1, %xmm0 3279; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3280; AVX1-NEXT: retq 3281; 3282; AVX2-LABEL: cvt_16f32_to_16i16: 3283; AVX2: # BB#0: 3284; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm2 3285; AVX2-NEXT: vmovd %xmm2, %eax 3286; AVX2-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 3287; AVX2-NEXT: vcvtps2ph $4, %xmm2, %xmm2 3288; AVX2-NEXT: vmovd %eax, %xmm3 3289; AVX2-NEXT: vmovd %xmm2, %eax 3290; AVX2-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 3291; AVX2-NEXT: vcvtps2ph $4, %xmm2, %xmm2 3292; AVX2-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 3293; AVX2-NEXT: vmovd %xmm2, %eax 3294; AVX2-NEXT: vextractf128 $1, %ymm1, %xmm2 3295; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] 3296; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3297; AVX2-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 3298; AVX2-NEXT: vmovd %xmm1, %eax 3299; AVX2-NEXT: vcvtps2ph $4, %xmm2, %xmm1 3300; AVX2-NEXT: vpinsrw $3, %eax, %xmm3, %xmm3 3301; AVX2-NEXT: vmovd %xmm1, %eax 3302; AVX2-NEXT: vmovshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] 3303; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3304; AVX2-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 3305; AVX2-NEXT: vmovd %xmm1, %eax 3306; AVX2-NEXT: vpermilpd {{.*#+}} xmm1 = xmm2[1,0] 3307; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3308; AVX2-NEXT: vpinsrw $5, %eax, %xmm3, %xmm3 3309; AVX2-NEXT: vmovd %xmm1, %eax 3310; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm1 3311; AVX2-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] 3312; AVX2-NEXT: vcvtps2ph $4, %xmm2, %xmm2 3313; AVX2-NEXT: vpinsrw $6, %eax, %xmm3, %xmm3 3314; AVX2-NEXT: vmovd %xmm2, %eax 3315; AVX2-NEXT: vpinsrw $7, %eax, %xmm3, %xmm2 3316; AVX2-NEXT: vmovd %xmm1, %eax 3317; AVX2-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3318; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3319; AVX2-NEXT: vmovd %eax, %xmm3 3320; AVX2-NEXT: vmovd %xmm1, %eax 3321; AVX2-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 3322; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3323; AVX2-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 3324; AVX2-NEXT: vmovd %xmm1, %eax 3325; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 3326; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 3327; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3328; AVX2-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 3329; AVX2-NEXT: vmovd %xmm0, %eax 3330; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm0 3331; AVX2-NEXT: vpinsrw $3, %eax, %xmm3, %xmm3 3332; AVX2-NEXT: vmovd %xmm0, %eax 3333; AVX2-NEXT: vmovshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] 3334; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3335; AVX2-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 3336; AVX2-NEXT: vmovd %xmm0, %eax 3337; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[3,1,2,3] 3338; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3339; AVX2-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 3340; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3341; AVX2-NEXT: vpinsrw $5, %eax, %xmm3, %xmm3 3342; AVX2-NEXT: vmovd %xmm1, %eax 3343; AVX2-NEXT: vpinsrw $6, %eax, %xmm3, %xmm1 3344; AVX2-NEXT: vmovd %xmm0, %eax 3345; AVX2-NEXT: vpinsrw $7, %eax, %xmm1, %xmm0 3346; AVX2-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 3347; AVX2-NEXT: retq 3348; 3349; AVX512F-LABEL: cvt_16f32_to_16i16: 3350; AVX512F: # BB#0: 3351; AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm1 3352; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm2 3353; AVX512F-NEXT: vmovd %xmm2, %eax 3354; AVX512F-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 3355; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm2 3356; AVX512F-NEXT: vmovd %eax, %xmm3 3357; AVX512F-NEXT: vmovd %xmm2, %eax 3358; AVX512F-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 3359; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm2 3360; AVX512F-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 3361; AVX512F-NEXT: vmovd %xmm2, %eax 3362; AVX512F-NEXT: vextractf128 $1, %ymm1, %xmm2 3363; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] 3364; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 3365; AVX512F-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 3366; AVX512F-NEXT: vmovd %xmm1, %eax 3367; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm1 3368; AVX512F-NEXT: vpinsrw $3, %eax, %xmm3, %xmm3 3369; AVX512F-NEXT: vmovd %xmm1, %eax 3370; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] 3371; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 3372; AVX512F-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 3373; AVX512F-NEXT: vmovd %xmm1, %eax 3374; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm2[1,0] 3375; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 3376; AVX512F-NEXT: vpinsrw $5, %eax, %xmm3, %xmm3 3377; AVX512F-NEXT: vmovd %xmm1, %eax 3378; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm1 3379; AVX512F-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] 3380; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm2 3381; AVX512F-NEXT: vpinsrw $6, %eax, %xmm3, %xmm3 3382; AVX512F-NEXT: vmovd %xmm2, %eax 3383; AVX512F-NEXT: vpinsrw $7, %eax, %xmm3, %xmm2 3384; AVX512F-NEXT: vmovd %xmm1, %eax 3385; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3386; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 3387; AVX512F-NEXT: vmovd %eax, %xmm3 3388; AVX512F-NEXT: vmovd %xmm1, %eax 3389; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 3390; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 3391; AVX512F-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 3392; AVX512F-NEXT: vmovd %xmm1, %eax 3393; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1 3394; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 3395; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0 3396; AVX512F-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 3397; AVX512F-NEXT: vmovd %xmm0, %eax 3398; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm0 3399; AVX512F-NEXT: vpinsrw $3, %eax, %xmm3, %xmm3 3400; AVX512F-NEXT: vmovd %xmm0, %eax 3401; AVX512F-NEXT: vmovshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] 3402; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0 3403; AVX512F-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 3404; AVX512F-NEXT: vmovd %xmm0, %eax 3405; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0] 3406; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0 3407; AVX512F-NEXT: vpinsrw $5, %eax, %xmm3, %xmm3 3408; AVX512F-NEXT: vmovd %xmm0, %eax 3409; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[3,1,2,3] 3410; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0 3411; AVX512F-NEXT: vpinsrw $6, %eax, %xmm3, %xmm1 3412; AVX512F-NEXT: vmovd %xmm0, %eax 3413; AVX512F-NEXT: vpinsrw $7, %eax, %xmm1, %xmm0 3414; AVX512F-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 3415; AVX512F-NEXT: retq 3416; 3417; AVX512VL-LABEL: cvt_16f32_to_16i16: 3418; AVX512VL: # BB#0: 3419; AVX512VL-NEXT: vextractf64x4 $1, %zmm0, %ymm1 3420; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm2 3421; AVX512VL-NEXT: vmovd %xmm2, %eax 3422; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 3423; AVX512VL-NEXT: vcvtps2ph $4, %xmm2, %xmm2 3424; AVX512VL-NEXT: vmovd %eax, %xmm3 3425; AVX512VL-NEXT: vmovd %xmm2, %eax 3426; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 3427; AVX512VL-NEXT: vcvtps2ph $4, %xmm2, %xmm2 3428; AVX512VL-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 3429; AVX512VL-NEXT: vmovd %xmm2, %eax 3430; AVX512VL-NEXT: vextractf128 $1, %ymm1, %xmm2 3431; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm1[3,1,2,3] 3432; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3433; AVX512VL-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 3434; AVX512VL-NEXT: vmovd %xmm1, %eax 3435; AVX512VL-NEXT: vcvtps2ph $4, %xmm2, %xmm1 3436; AVX512VL-NEXT: vpinsrw $3, %eax, %xmm3, %xmm3 3437; AVX512VL-NEXT: vmovd %xmm1, %eax 3438; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] 3439; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3440; AVX512VL-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 3441; AVX512VL-NEXT: vmovd %xmm1, %eax 3442; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm2[1,0] 3443; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3444; AVX512VL-NEXT: vpinsrw $5, %eax, %xmm3, %xmm3 3445; AVX512VL-NEXT: vmovd %xmm1, %eax 3446; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm1 3447; AVX512VL-NEXT: vpermilps {{.*#+}} xmm2 = xmm2[3,1,2,3] 3448; AVX512VL-NEXT: vcvtps2ph $4, %xmm2, %xmm2 3449; AVX512VL-NEXT: vpinsrw $6, %eax, %xmm3, %xmm3 3450; AVX512VL-NEXT: vmovd %xmm2, %eax 3451; AVX512VL-NEXT: vpinsrw $7, %eax, %xmm3, %xmm2 3452; AVX512VL-NEXT: vmovd %xmm1, %eax 3453; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3454; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3455; AVX512VL-NEXT: vmovd %eax, %xmm3 3456; AVX512VL-NEXT: vmovd %xmm1, %eax 3457; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 3458; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3459; AVX512VL-NEXT: vpinsrw $1, %eax, %xmm3, %xmm3 3460; AVX512VL-NEXT: vmovd %xmm1, %eax 3461; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1 3462; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 3463; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3464; AVX512VL-NEXT: vpinsrw $2, %eax, %xmm3, %xmm3 3465; AVX512VL-NEXT: vmovd %xmm0, %eax 3466; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm0 3467; AVX512VL-NEXT: vpinsrw $3, %eax, %xmm3, %xmm3 3468; AVX512VL-NEXT: vmovd %xmm0, %eax 3469; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm0 = xmm1[1,1,3,3] 3470; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3471; AVX512VL-NEXT: vpinsrw $4, %eax, %xmm3, %xmm3 3472; AVX512VL-NEXT: vmovd %xmm0, %eax 3473; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm1[1,0] 3474; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3475; AVX512VL-NEXT: vpinsrw $5, %eax, %xmm3, %xmm3 3476; AVX512VL-NEXT: vmovd %xmm0, %eax 3477; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm1[3,1,2,3] 3478; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3479; AVX512VL-NEXT: vpinsrw $6, %eax, %xmm3, %xmm1 3480; AVX512VL-NEXT: vmovd %xmm0, %eax 3481; AVX512VL-NEXT: vpinsrw $7, %eax, %xmm1, %xmm0 3482; AVX512VL-NEXT: vinserti128 $1, %xmm2, %ymm0, %ymm0 3483; AVX512VL-NEXT: retq 3484 %1 = fptrunc <16 x float> %a0 to <16 x half> 3485 %2 = bitcast <16 x half> %1 to <16 x i16> 3486 ret <16 x i16> %2 3487} 3488 3489; 3490; Float to Half (Store) 3491; 3492 3493define void @store_cvt_f32_to_i16(float %a0, i16* %a1) nounwind { 3494; AVX1-LABEL: store_cvt_f32_to_i16: 3495; AVX1: # BB#0: 3496; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3497; AVX1-NEXT: vmovd %xmm0, %eax 3498; AVX1-NEXT: movw %ax, (%rdi) 3499; AVX1-NEXT: retq 3500; 3501; AVX2-LABEL: store_cvt_f32_to_i16: 3502; AVX2: # BB#0: 3503; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3504; AVX2-NEXT: vmovd %xmm0, %eax 3505; AVX2-NEXT: movw %ax, (%rdi) 3506; AVX2-NEXT: retq 3507; 3508; AVX512F-LABEL: store_cvt_f32_to_i16: 3509; AVX512F: # BB#0: 3510; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> 3511; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0 3512; AVX512F-NEXT: vmovd %xmm0, %eax 3513; AVX512F-NEXT: movw %ax, (%rdi) 3514; AVX512F-NEXT: retq 3515; 3516; AVX512VL-LABEL: store_cvt_f32_to_i16: 3517; AVX512VL: # BB#0: 3518; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3519; AVX512VL-NEXT: vmovd %xmm0, %eax 3520; AVX512VL-NEXT: movw %ax, (%rdi) 3521; AVX512VL-NEXT: retq 3522 %1 = fptrunc float %a0 to half 3523 %2 = bitcast half %1 to i16 3524 store i16 %2, i16* %a1 3525 ret void 3526} 3527 3528define void @store_cvt_4f32_to_4i16(<4 x float> %a0, <4 x i16>* %a1) nounwind { 3529; AVX1-LABEL: store_cvt_4f32_to_4i16: 3530; AVX1: # BB#0: 3531; AVX1-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3532; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3533; AVX1-NEXT: vmovd %xmm1, %eax 3534; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 3535; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3536; AVX1-NEXT: vmovd %xmm1, %ecx 3537; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 3538; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3539; AVX1-NEXT: vmovd %xmm1, %edx 3540; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3541; AVX1-NEXT: vmovd %xmm0, %esi 3542; AVX1-NEXT: movw %si, (%rdi) 3543; AVX1-NEXT: movw %dx, 6(%rdi) 3544; AVX1-NEXT: movw %cx, 4(%rdi) 3545; AVX1-NEXT: movw %ax, 2(%rdi) 3546; AVX1-NEXT: retq 3547; 3548; AVX2-LABEL: store_cvt_4f32_to_4i16: 3549; AVX2: # BB#0: 3550; AVX2-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3551; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3552; AVX2-NEXT: vmovd %xmm1, %eax 3553; AVX2-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 3554; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3555; AVX2-NEXT: vmovd %xmm1, %ecx 3556; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 3557; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3558; AVX2-NEXT: vmovd %xmm1, %edx 3559; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3560; AVX2-NEXT: vmovd %xmm0, %esi 3561; AVX2-NEXT: movw %si, (%rdi) 3562; AVX2-NEXT: movw %dx, 6(%rdi) 3563; AVX2-NEXT: movw %cx, 4(%rdi) 3564; AVX2-NEXT: movw %ax, 2(%rdi) 3565; AVX2-NEXT: retq 3566; 3567; AVX512F-LABEL: store_cvt_4f32_to_4i16: 3568; AVX512F: # BB#0: 3569; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> 3570; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3571; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 3572; AVX512F-NEXT: vmovd %xmm1, %eax 3573; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 3574; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 3575; AVX512F-NEXT: vmovd %xmm1, %ecx 3576; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 3577; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 3578; AVX512F-NEXT: vmovd %xmm1, %edx 3579; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0 3580; AVX512F-NEXT: vmovd %xmm0, %esi 3581; AVX512F-NEXT: movw %si, (%rdi) 3582; AVX512F-NEXT: movw %dx, 6(%rdi) 3583; AVX512F-NEXT: movw %cx, 4(%rdi) 3584; AVX512F-NEXT: movw %ax, 2(%rdi) 3585; AVX512F-NEXT: retq 3586; 3587; AVX512VL-LABEL: store_cvt_4f32_to_4i16: 3588; AVX512VL: # BB#0: 3589; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3590; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3591; AVX512VL-NEXT: vmovd %xmm1, %eax 3592; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 3593; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3594; AVX512VL-NEXT: vmovd %xmm1, %ecx 3595; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 3596; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3597; AVX512VL-NEXT: vmovd %xmm1, %edx 3598; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3599; AVX512VL-NEXT: vmovd %xmm0, %esi 3600; AVX512VL-NEXT: movw %si, (%rdi) 3601; AVX512VL-NEXT: movw %dx, 6(%rdi) 3602; AVX512VL-NEXT: movw %cx, 4(%rdi) 3603; AVX512VL-NEXT: movw %ax, 2(%rdi) 3604; AVX512VL-NEXT: retq 3605 %1 = fptrunc <4 x float> %a0 to <4 x half> 3606 %2 = bitcast <4 x half> %1 to <4 x i16> 3607 store <4 x i16> %2, <4 x i16>* %a1 3608 ret void 3609} 3610 3611define void @store_cvt_4f32_to_8i16_undef(<4 x float> %a0, <8 x i16>* %a1) nounwind { 3612; AVX1-LABEL: store_cvt_4f32_to_8i16_undef: 3613; AVX1: # BB#0: 3614; AVX1-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3615; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3616; AVX1-NEXT: vmovd %xmm1, %eax 3617; AVX1-NEXT: shll $16, %eax 3618; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm1 3619; AVX1-NEXT: vmovd %xmm1, %ecx 3620; AVX1-NEXT: movzwl %cx, %ecx 3621; AVX1-NEXT: orl %eax, %ecx 3622; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 3623; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3624; AVX1-NEXT: vmovd %xmm1, %eax 3625; AVX1-NEXT: shll $16, %eax 3626; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 3627; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3628; AVX1-NEXT: vmovd %xmm0, %edx 3629; AVX1-NEXT: movzwl %dx, %edx 3630; AVX1-NEXT: orl %eax, %edx 3631; AVX1-NEXT: shlq $32, %rdx 3632; AVX1-NEXT: orq %rcx, %rdx 3633; AVX1-NEXT: vmovq %rdx, %xmm0 3634; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 3635; AVX1-NEXT: vmovdqa %xmm0, (%rdi) 3636; AVX1-NEXT: retq 3637; 3638; AVX2-LABEL: store_cvt_4f32_to_8i16_undef: 3639; AVX2: # BB#0: 3640; AVX2-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3641; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3642; AVX2-NEXT: vmovd %xmm1, %eax 3643; AVX2-NEXT: shll $16, %eax 3644; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm1 3645; AVX2-NEXT: vmovd %xmm1, %ecx 3646; AVX2-NEXT: movzwl %cx, %ecx 3647; AVX2-NEXT: orl %eax, %ecx 3648; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 3649; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3650; AVX2-NEXT: vmovd %xmm1, %eax 3651; AVX2-NEXT: shll $16, %eax 3652; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 3653; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3654; AVX2-NEXT: vmovd %xmm0, %edx 3655; AVX2-NEXT: movzwl %dx, %edx 3656; AVX2-NEXT: orl %eax, %edx 3657; AVX2-NEXT: shlq $32, %rdx 3658; AVX2-NEXT: orq %rcx, %rdx 3659; AVX2-NEXT: vmovq %rdx, %xmm0 3660; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 3661; AVX2-NEXT: vmovdqa %xmm0, (%rdi) 3662; AVX2-NEXT: retq 3663; 3664; AVX512F-LABEL: store_cvt_4f32_to_8i16_undef: 3665; AVX512F: # BB#0: 3666; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> 3667; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm1 3668; AVX512F-NEXT: vmovd %xmm1, %eax 3669; AVX512F-NEXT: movzwl %ax, %eax 3670; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3671; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 3672; AVX512F-NEXT: vmovd %xmm1, %ecx 3673; AVX512F-NEXT: shll $16, %ecx 3674; AVX512F-NEXT: orl %eax, %ecx 3675; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 3676; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 3677; AVX512F-NEXT: vmovd %xmm1, %eax 3678; AVX512F-NEXT: movzwl %ax, %eax 3679; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 3680; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0 3681; AVX512F-NEXT: vmovd %xmm0, %edx 3682; AVX512F-NEXT: shll $16, %edx 3683; AVX512F-NEXT: orl %eax, %edx 3684; AVX512F-NEXT: shlq $32, %rdx 3685; AVX512F-NEXT: orq %rcx, %rdx 3686; AVX512F-NEXT: vmovq %rdx, %xmm0 3687; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 3688; AVX512F-NEXT: vmovdqa %xmm0, (%rdi) 3689; AVX512F-NEXT: retq 3690; 3691; AVX512VL-LABEL: store_cvt_4f32_to_8i16_undef: 3692; AVX512VL: # BB#0: 3693; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3694; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3695; AVX512VL-NEXT: vmovd %xmm1, %eax 3696; AVX512VL-NEXT: shll $16, %eax 3697; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm1 3698; AVX512VL-NEXT: vmovd %xmm1, %ecx 3699; AVX512VL-NEXT: movzwl %cx, %ecx 3700; AVX512VL-NEXT: orl %eax, %ecx 3701; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 3702; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3703; AVX512VL-NEXT: vmovd %xmm1, %eax 3704; AVX512VL-NEXT: shll $16, %eax 3705; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 3706; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3707; AVX512VL-NEXT: vmovd %xmm0, %edx 3708; AVX512VL-NEXT: movzwl %dx, %edx 3709; AVX512VL-NEXT: orl %eax, %edx 3710; AVX512VL-NEXT: shlq $32, %rdx 3711; AVX512VL-NEXT: orq %rcx, %rdx 3712; AVX512VL-NEXT: vmovq %rdx, %xmm0 3713; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 3714; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 3715; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 3716; AVX512VL-NEXT: vmovdqa %xmm0, (%rdi) 3717; AVX512VL-NEXT: retq 3718 %1 = fptrunc <4 x float> %a0 to <4 x half> 3719 %2 = bitcast <4 x half> %1 to <4 x i16> 3720 %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3721 store <8 x i16> %3, <8 x i16>* %a1 3722 ret void 3723} 3724 3725define void @store_cvt_4f32_to_8i16_zero(<4 x float> %a0, <8 x i16>* %a1) nounwind { 3726; AVX1-LABEL: store_cvt_4f32_to_8i16_zero: 3727; AVX1: # BB#0: 3728; AVX1-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3729; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3730; AVX1-NEXT: vmovd %xmm1, %eax 3731; AVX1-NEXT: shll $16, %eax 3732; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm1 3733; AVX1-NEXT: vmovd %xmm1, %ecx 3734; AVX1-NEXT: movzwl %cx, %ecx 3735; AVX1-NEXT: orl %eax, %ecx 3736; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 3737; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3738; AVX1-NEXT: vmovd %xmm1, %eax 3739; AVX1-NEXT: shll $16, %eax 3740; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 3741; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3742; AVX1-NEXT: vmovd %xmm0, %edx 3743; AVX1-NEXT: movzwl %dx, %edx 3744; AVX1-NEXT: orl %eax, %edx 3745; AVX1-NEXT: shlq $32, %rdx 3746; AVX1-NEXT: orq %rcx, %rdx 3747; AVX1-NEXT: vmovq %rdx, %xmm0 3748; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero 3749; AVX1-NEXT: vmovdqa %xmm0, (%rdi) 3750; AVX1-NEXT: retq 3751; 3752; AVX2-LABEL: store_cvt_4f32_to_8i16_zero: 3753; AVX2: # BB#0: 3754; AVX2-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3755; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3756; AVX2-NEXT: vmovd %xmm1, %eax 3757; AVX2-NEXT: shll $16, %eax 3758; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm1 3759; AVX2-NEXT: vmovd %xmm1, %ecx 3760; AVX2-NEXT: movzwl %cx, %ecx 3761; AVX2-NEXT: orl %eax, %ecx 3762; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 3763; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3764; AVX2-NEXT: vmovd %xmm1, %eax 3765; AVX2-NEXT: shll $16, %eax 3766; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 3767; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3768; AVX2-NEXT: vmovd %xmm0, %edx 3769; AVX2-NEXT: movzwl %dx, %edx 3770; AVX2-NEXT: orl %eax, %edx 3771; AVX2-NEXT: shlq $32, %rdx 3772; AVX2-NEXT: orq %rcx, %rdx 3773; AVX2-NEXT: vmovq %rdx, %xmm0 3774; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero 3775; AVX2-NEXT: vmovdqa %xmm0, (%rdi) 3776; AVX2-NEXT: retq 3777; 3778; AVX512F-LABEL: store_cvt_4f32_to_8i16_zero: 3779; AVX512F: # BB#0: 3780; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> 3781; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm1 3782; AVX512F-NEXT: vmovd %xmm1, %eax 3783; AVX512F-NEXT: movzwl %ax, %eax 3784; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3785; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 3786; AVX512F-NEXT: vmovd %xmm1, %ecx 3787; AVX512F-NEXT: shll $16, %ecx 3788; AVX512F-NEXT: orl %eax, %ecx 3789; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 3790; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 3791; AVX512F-NEXT: vmovd %xmm1, %eax 3792; AVX512F-NEXT: movzwl %ax, %eax 3793; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 3794; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0 3795; AVX512F-NEXT: vmovd %xmm0, %edx 3796; AVX512F-NEXT: shll $16, %edx 3797; AVX512F-NEXT: orl %eax, %edx 3798; AVX512F-NEXT: shlq $32, %rdx 3799; AVX512F-NEXT: orq %rcx, %rdx 3800; AVX512F-NEXT: vmovq %rdx, %xmm0 3801; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero 3802; AVX512F-NEXT: vmovdqa %xmm0, (%rdi) 3803; AVX512F-NEXT: retq 3804; 3805; AVX512VL-LABEL: store_cvt_4f32_to_8i16_zero: 3806; AVX512VL: # BB#0: 3807; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3808; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3809; AVX512VL-NEXT: vmovd %xmm1, %eax 3810; AVX512VL-NEXT: shll $16, %eax 3811; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm1 3812; AVX512VL-NEXT: vmovd %xmm1, %ecx 3813; AVX512VL-NEXT: movzwl %cx, %ecx 3814; AVX512VL-NEXT: orl %eax, %ecx 3815; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 3816; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3817; AVX512VL-NEXT: vmovd %xmm1, %eax 3818; AVX512VL-NEXT: shll $16, %eax 3819; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 3820; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3821; AVX512VL-NEXT: vmovd %xmm0, %edx 3822; AVX512VL-NEXT: movzwl %dx, %edx 3823; AVX512VL-NEXT: orl %eax, %edx 3824; AVX512VL-NEXT: shlq $32, %rdx 3825; AVX512VL-NEXT: orq %rcx, %rdx 3826; AVX512VL-NEXT: vmovq %rdx, %xmm0 3827; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 3828; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 3829; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2] 3830; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 3831; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 3832; AVX512VL-NEXT: vmovdqa %xmm0, (%rdi) 3833; AVX512VL-NEXT: retq 3834 %1 = fptrunc <4 x float> %a0 to <4 x half> 3835 %2 = bitcast <4 x half> %1 to <4 x i16> 3836 %3 = shufflevector <4 x i16> %2, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 3837 store <8 x i16> %3, <8 x i16>* %a1 3838 ret void 3839} 3840 3841define void @store_cvt_8f32_to_8i16(<8 x float> %a0, <8 x i16>* %a1) nounwind { 3842; AVX1-LABEL: store_cvt_8f32_to_8i16: 3843; AVX1: # BB#0: 3844; AVX1-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3845; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3846; AVX1-NEXT: vmovd %xmm1, %r8d 3847; AVX1-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 3848; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3849; AVX1-NEXT: vmovd %xmm1, %r9d 3850; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 3851; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3852; AVX1-NEXT: vmovd %xmm1, %r10d 3853; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 3854; AVX1-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 3855; AVX1-NEXT: vcvtps2ph $4, %xmm2, %xmm2 3856; AVX1-NEXT: vmovd %xmm2, %r11d 3857; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 3858; AVX1-NEXT: vcvtps2ph $4, %xmm2, %xmm2 3859; AVX1-NEXT: vmovd %xmm2, %eax 3860; AVX1-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,1,2,3] 3861; AVX1-NEXT: vcvtps2ph $4, %xmm2, %xmm2 3862; AVX1-NEXT: vmovd %xmm2, %ecx 3863; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3864; AVX1-NEXT: vmovd %xmm0, %edx 3865; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm0 3866; AVX1-NEXT: vmovd %xmm0, %esi 3867; AVX1-NEXT: movw %si, 8(%rdi) 3868; AVX1-NEXT: movw %dx, (%rdi) 3869; AVX1-NEXT: movw %cx, 14(%rdi) 3870; AVX1-NEXT: movw %ax, 12(%rdi) 3871; AVX1-NEXT: movw %r11w, 10(%rdi) 3872; AVX1-NEXT: movw %r10w, 6(%rdi) 3873; AVX1-NEXT: movw %r9w, 4(%rdi) 3874; AVX1-NEXT: movw %r8w, 2(%rdi) 3875; AVX1-NEXT: vzeroupper 3876; AVX1-NEXT: retq 3877; 3878; AVX2-LABEL: store_cvt_8f32_to_8i16: 3879; AVX2: # BB#0: 3880; AVX2-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3881; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3882; AVX2-NEXT: vmovd %xmm1, %r8d 3883; AVX2-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 3884; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3885; AVX2-NEXT: vmovd %xmm1, %r9d 3886; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 3887; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3888; AVX2-NEXT: vmovd %xmm1, %r10d 3889; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm1 3890; AVX2-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 3891; AVX2-NEXT: vcvtps2ph $4, %xmm2, %xmm2 3892; AVX2-NEXT: vmovd %xmm2, %r11d 3893; AVX2-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 3894; AVX2-NEXT: vcvtps2ph $4, %xmm2, %xmm2 3895; AVX2-NEXT: vmovd %xmm2, %eax 3896; AVX2-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,1,2,3] 3897; AVX2-NEXT: vcvtps2ph $4, %xmm2, %xmm2 3898; AVX2-NEXT: vmovd %xmm2, %ecx 3899; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3900; AVX2-NEXT: vmovd %xmm0, %edx 3901; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm0 3902; AVX2-NEXT: vmovd %xmm0, %esi 3903; AVX2-NEXT: movw %si, 8(%rdi) 3904; AVX2-NEXT: movw %dx, (%rdi) 3905; AVX2-NEXT: movw %cx, 14(%rdi) 3906; AVX2-NEXT: movw %ax, 12(%rdi) 3907; AVX2-NEXT: movw %r11w, 10(%rdi) 3908; AVX2-NEXT: movw %r10w, 6(%rdi) 3909; AVX2-NEXT: movw %r9w, 4(%rdi) 3910; AVX2-NEXT: movw %r8w, 2(%rdi) 3911; AVX2-NEXT: vzeroupper 3912; AVX2-NEXT: retq 3913; 3914; AVX512F-LABEL: store_cvt_8f32_to_8i16: 3915; AVX512F: # BB#0: 3916; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<def> 3917; AVX512F-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3918; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 3919; AVX512F-NEXT: vmovd %xmm1, %r8d 3920; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 3921; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 3922; AVX512F-NEXT: vmovd %xmm1, %r9d 3923; AVX512F-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 3924; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 3925; AVX512F-NEXT: vmovd %xmm1, %r10d 3926; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1 3927; AVX512F-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 3928; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm2 3929; AVX512F-NEXT: vmovd %xmm2, %r11d 3930; AVX512F-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 3931; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm2 3932; AVX512F-NEXT: vmovd %xmm2, %eax 3933; AVX512F-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,1,2,3] 3934; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm2 3935; AVX512F-NEXT: vmovd %xmm2, %ecx 3936; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0 3937; AVX512F-NEXT: vmovd %xmm0, %edx 3938; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm0 3939; AVX512F-NEXT: vmovd %xmm0, %esi 3940; AVX512F-NEXT: movw %si, 8(%rdi) 3941; AVX512F-NEXT: movw %dx, (%rdi) 3942; AVX512F-NEXT: movw %cx, 14(%rdi) 3943; AVX512F-NEXT: movw %ax, 12(%rdi) 3944; AVX512F-NEXT: movw %r11w, 10(%rdi) 3945; AVX512F-NEXT: movw %r10w, 6(%rdi) 3946; AVX512F-NEXT: movw %r9w, 4(%rdi) 3947; AVX512F-NEXT: movw %r8w, 2(%rdi) 3948; AVX512F-NEXT: retq 3949; 3950; AVX512VL-LABEL: store_cvt_8f32_to_8i16: 3951; AVX512VL: # BB#0: 3952; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm1 = xmm0[1,1,3,3] 3953; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3954; AVX512VL-NEXT: vmovd %xmm1, %r8d 3955; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm0[1,0] 3956; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3957; AVX512VL-NEXT: vmovd %xmm1, %r9d 3958; AVX512VL-NEXT: vpermilps {{.*#+}} xmm1 = xmm0[3,1,2,3] 3959; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 3960; AVX512VL-NEXT: vmovd %xmm1, %r10d 3961; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1 3962; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 3963; AVX512VL-NEXT: vcvtps2ph $4, %xmm2, %xmm2 3964; AVX512VL-NEXT: vmovd %xmm2, %r11d 3965; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm2 = xmm1[1,0] 3966; AVX512VL-NEXT: vcvtps2ph $4, %xmm2, %xmm2 3967; AVX512VL-NEXT: vmovd %xmm2, %eax 3968; AVX512VL-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,1,2,3] 3969; AVX512VL-NEXT: vcvtps2ph $4, %xmm2, %xmm2 3970; AVX512VL-NEXT: vmovd %xmm2, %ecx 3971; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 3972; AVX512VL-NEXT: vmovd %xmm0, %edx 3973; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm0 3974; AVX512VL-NEXT: vmovd %xmm0, %esi 3975; AVX512VL-NEXT: movw %si, 8(%rdi) 3976; AVX512VL-NEXT: movw %dx, (%rdi) 3977; AVX512VL-NEXT: movw %cx, 14(%rdi) 3978; AVX512VL-NEXT: movw %ax, 12(%rdi) 3979; AVX512VL-NEXT: movw %r11w, 10(%rdi) 3980; AVX512VL-NEXT: movw %r10w, 6(%rdi) 3981; AVX512VL-NEXT: movw %r9w, 4(%rdi) 3982; AVX512VL-NEXT: movw %r8w, 2(%rdi) 3983; AVX512VL-NEXT: retq 3984 %1 = fptrunc <8 x float> %a0 to <8 x half> 3985 %2 = bitcast <8 x half> %1 to <8 x i16> 3986 store <8 x i16> %2, <8 x i16>* %a1 3987 ret void 3988} 3989 3990define void @store_cvt_16f32_to_16i16(<16 x float> %a0, <16 x i16>* %a1) nounwind { 3991; AVX1-LABEL: store_cvt_16f32_to_16i16: 3992; AVX1: # BB#0: 3993; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 3994; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm3 3995; AVX1-NEXT: vcvtps2ph $4, %xmm3, %xmm4 3996; AVX1-NEXT: vmovd %xmm4, %eax 3997; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm4 3998; AVX1-NEXT: movw %ax, 24(%rdi) 3999; AVX1-NEXT: vmovd %xmm4, %eax 4000; AVX1-NEXT: vcvtps2ph $4, %xmm2, %xmm4 4001; AVX1-NEXT: movw %ax, 16(%rdi) 4002; AVX1-NEXT: vmovd %xmm4, %eax 4003; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm4 4004; AVX1-NEXT: movw %ax, 8(%rdi) 4005; AVX1-NEXT: vmovd %xmm4, %eax 4006; AVX1-NEXT: vpermilps {{.*#+}} xmm4 = xmm3[3,1,2,3] 4007; AVX1-NEXT: vcvtps2ph $4, %xmm4, %xmm4 4008; AVX1-NEXT: movw %ax, (%rdi) 4009; AVX1-NEXT: vmovd %xmm4, %eax 4010; AVX1-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0] 4011; AVX1-NEXT: vcvtps2ph $4, %xmm4, %xmm4 4012; AVX1-NEXT: movw %ax, 30(%rdi) 4013; AVX1-NEXT: vmovd %xmm4, %eax 4014; AVX1-NEXT: vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] 4015; AVX1-NEXT: vcvtps2ph $4, %xmm4, %xmm4 4016; AVX1-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3] 4017; AVX1-NEXT: vcvtps2ph $4, %xmm3, %xmm3 4018; AVX1-NEXT: movw %ax, 28(%rdi) 4019; AVX1-NEXT: vmovd %xmm3, %eax 4020; AVX1-NEXT: vpermilps {{.*#+}} xmm3 = xmm1[3,1,2,3] 4021; AVX1-NEXT: vcvtps2ph $4, %xmm3, %xmm3 4022; AVX1-NEXT: movw %ax, 26(%rdi) 4023; AVX1-NEXT: vmovd %xmm3, %eax 4024; AVX1-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0] 4025; AVX1-NEXT: vcvtps2ph $4, %xmm3, %xmm3 4026; AVX1-NEXT: movw %ax, 22(%rdi) 4027; AVX1-NEXT: vmovd %xmm3, %eax 4028; AVX1-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 4029; AVX1-NEXT: vcvtps2ph $4, %xmm3, %xmm3 4030; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 4031; AVX1-NEXT: vcvtps2ph $4, %xmm0, %xmm0 4032; AVX1-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3] 4033; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 4034; AVX1-NEXT: movw %ax, 20(%rdi) 4035; AVX1-NEXT: vmovd %xmm1, %eax 4036; AVX1-NEXT: vpermilps {{.*#+}} xmm1 = xmm2[3,1,2,3] 4037; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 4038; AVX1-NEXT: movw %ax, 18(%rdi) 4039; AVX1-NEXT: vmovd %xmm1, %eax 4040; AVX1-NEXT: vmovshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] 4041; AVX1-NEXT: vcvtps2ph $4, %xmm1, %xmm1 4042; AVX1-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] 4043; AVX1-NEXT: vcvtps2ph $4, %xmm2, %xmm2 4044; AVX1-NEXT: movw %ax, 14(%rdi) 4045; AVX1-NEXT: vmovd %xmm2, %eax 4046; AVX1-NEXT: movw %ax, 12(%rdi) 4047; AVX1-NEXT: vmovd %xmm1, %eax 4048; AVX1-NEXT: movw %ax, 10(%rdi) 4049; AVX1-NEXT: vmovd %xmm0, %eax 4050; AVX1-NEXT: movw %ax, 6(%rdi) 4051; AVX1-NEXT: vmovd %xmm3, %eax 4052; AVX1-NEXT: movw %ax, 4(%rdi) 4053; AVX1-NEXT: vmovd %xmm4, %eax 4054; AVX1-NEXT: movw %ax, 2(%rdi) 4055; AVX1-NEXT: vzeroupper 4056; AVX1-NEXT: retq 4057; 4058; AVX2-LABEL: store_cvt_16f32_to_16i16: 4059; AVX2: # BB#0: 4060; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm2 4061; AVX2-NEXT: vextractf128 $1, %ymm1, %xmm3 4062; AVX2-NEXT: vcvtps2ph $4, %xmm3, %xmm4 4063; AVX2-NEXT: vmovd %xmm4, %eax 4064; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm4 4065; AVX2-NEXT: movw %ax, 24(%rdi) 4066; AVX2-NEXT: vmovd %xmm4, %eax 4067; AVX2-NEXT: vcvtps2ph $4, %xmm2, %xmm4 4068; AVX2-NEXT: movw %ax, 16(%rdi) 4069; AVX2-NEXT: vmovd %xmm4, %eax 4070; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm4 4071; AVX2-NEXT: movw %ax, 8(%rdi) 4072; AVX2-NEXT: vmovd %xmm4, %eax 4073; AVX2-NEXT: vpermilps {{.*#+}} xmm4 = xmm3[3,1,2,3] 4074; AVX2-NEXT: vcvtps2ph $4, %xmm4, %xmm4 4075; AVX2-NEXT: movw %ax, (%rdi) 4076; AVX2-NEXT: vmovd %xmm4, %eax 4077; AVX2-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0] 4078; AVX2-NEXT: vcvtps2ph $4, %xmm4, %xmm4 4079; AVX2-NEXT: movw %ax, 30(%rdi) 4080; AVX2-NEXT: vmovd %xmm4, %eax 4081; AVX2-NEXT: vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] 4082; AVX2-NEXT: vcvtps2ph $4, %xmm4, %xmm4 4083; AVX2-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3] 4084; AVX2-NEXT: vcvtps2ph $4, %xmm3, %xmm3 4085; AVX2-NEXT: movw %ax, 28(%rdi) 4086; AVX2-NEXT: vmovd %xmm3, %eax 4087; AVX2-NEXT: vpermilps {{.*#+}} xmm3 = xmm1[3,1,2,3] 4088; AVX2-NEXT: vcvtps2ph $4, %xmm3, %xmm3 4089; AVX2-NEXT: movw %ax, 26(%rdi) 4090; AVX2-NEXT: vmovd %xmm3, %eax 4091; AVX2-NEXT: vpermilpd {{.*#+}} xmm3 = xmm1[1,0] 4092; AVX2-NEXT: vcvtps2ph $4, %xmm3, %xmm3 4093; AVX2-NEXT: movw %ax, 22(%rdi) 4094; AVX2-NEXT: vmovd %xmm3, %eax 4095; AVX2-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 4096; AVX2-NEXT: vcvtps2ph $4, %xmm3, %xmm3 4097; AVX2-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 4098; AVX2-NEXT: vcvtps2ph $4, %xmm0, %xmm0 4099; AVX2-NEXT: vmovshdup {{.*#+}} xmm1 = xmm1[1,1,3,3] 4100; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 4101; AVX2-NEXT: movw %ax, 20(%rdi) 4102; AVX2-NEXT: vmovd %xmm1, %eax 4103; AVX2-NEXT: vpermilps {{.*#+}} xmm1 = xmm2[3,1,2,3] 4104; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 4105; AVX2-NEXT: movw %ax, 18(%rdi) 4106; AVX2-NEXT: vmovd %xmm1, %eax 4107; AVX2-NEXT: vmovshdup {{.*#+}} xmm1 = xmm2[1,1,3,3] 4108; AVX2-NEXT: vcvtps2ph $4, %xmm1, %xmm1 4109; AVX2-NEXT: vpermilpd {{.*#+}} xmm2 = xmm2[1,0] 4110; AVX2-NEXT: vcvtps2ph $4, %xmm2, %xmm2 4111; AVX2-NEXT: movw %ax, 14(%rdi) 4112; AVX2-NEXT: vmovd %xmm2, %eax 4113; AVX2-NEXT: movw %ax, 12(%rdi) 4114; AVX2-NEXT: vmovd %xmm1, %eax 4115; AVX2-NEXT: movw %ax, 10(%rdi) 4116; AVX2-NEXT: vmovd %xmm0, %eax 4117; AVX2-NEXT: movw %ax, 6(%rdi) 4118; AVX2-NEXT: vmovd %xmm3, %eax 4119; AVX2-NEXT: movw %ax, 4(%rdi) 4120; AVX2-NEXT: vmovd %xmm4, %eax 4121; AVX2-NEXT: movw %ax, 2(%rdi) 4122; AVX2-NEXT: vzeroupper 4123; AVX2-NEXT: retq 4124; 4125; AVX512F-LABEL: store_cvt_16f32_to_16i16: 4126; AVX512F: # BB#0: 4127; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm1 4128; AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm2 4129; AVX512F-NEXT: vextractf128 $1, %ymm2, %xmm3 4130; AVX512F-NEXT: vcvtps2ph $4, %zmm3, %ymm4 4131; AVX512F-NEXT: vmovd %xmm4, %eax 4132; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm4 4133; AVX512F-NEXT: movw %ax, 24(%rdi) 4134; AVX512F-NEXT: vmovd %xmm4, %eax 4135; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm4 4136; AVX512F-NEXT: movw %ax, 16(%rdi) 4137; AVX512F-NEXT: vmovd %xmm4, %eax 4138; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm4 4139; AVX512F-NEXT: movw %ax, 8(%rdi) 4140; AVX512F-NEXT: vmovd %xmm4, %eax 4141; AVX512F-NEXT: vpermilps {{.*#+}} xmm4 = xmm3[3,1,2,3] 4142; AVX512F-NEXT: vcvtps2ph $4, %zmm4, %ymm4 4143; AVX512F-NEXT: movw %ax, (%rdi) 4144; AVX512F-NEXT: vmovd %xmm4, %eax 4145; AVX512F-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0] 4146; AVX512F-NEXT: vcvtps2ph $4, %zmm4, %ymm4 4147; AVX512F-NEXT: movw %ax, 30(%rdi) 4148; AVX512F-NEXT: vmovd %xmm4, %eax 4149; AVX512F-NEXT: vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] 4150; AVX512F-NEXT: vcvtps2ph $4, %zmm4, %ymm4 4151; AVX512F-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3] 4152; AVX512F-NEXT: vcvtps2ph $4, %zmm3, %ymm3 4153; AVX512F-NEXT: movw %ax, 28(%rdi) 4154; AVX512F-NEXT: vmovd %xmm3, %eax 4155; AVX512F-NEXT: vpermilps {{.*#+}} xmm3 = xmm2[3,1,2,3] 4156; AVX512F-NEXT: vcvtps2ph $4, %zmm3, %ymm3 4157; AVX512F-NEXT: movw %ax, 26(%rdi) 4158; AVX512F-NEXT: vmovd %xmm3, %eax 4159; AVX512F-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] 4160; AVX512F-NEXT: vcvtps2ph $4, %zmm3, %ymm3 4161; AVX512F-NEXT: movw %ax, 22(%rdi) 4162; AVX512F-NEXT: vmovd %xmm3, %eax 4163; AVX512F-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 4164; AVX512F-NEXT: vcvtps2ph $4, %zmm3, %ymm3 4165; AVX512F-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 4166; AVX512F-NEXT: vcvtps2ph $4, %zmm0, %ymm0 4167; AVX512F-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3] 4168; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm2 4169; AVX512F-NEXT: movw %ax, 20(%rdi) 4170; AVX512F-NEXT: vmovd %xmm2, %eax 4171; AVX512F-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,1,2,3] 4172; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm2 4173; AVX512F-NEXT: movw %ax, 18(%rdi) 4174; AVX512F-NEXT: vmovd %xmm2, %eax 4175; AVX512F-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 4176; AVX512F-NEXT: vcvtps2ph $4, %zmm2, %ymm2 4177; AVX512F-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 4178; AVX512F-NEXT: vcvtps2ph $4, %zmm1, %ymm1 4179; AVX512F-NEXT: movw %ax, 14(%rdi) 4180; AVX512F-NEXT: vmovd %xmm1, %eax 4181; AVX512F-NEXT: movw %ax, 12(%rdi) 4182; AVX512F-NEXT: vmovd %xmm2, %eax 4183; AVX512F-NEXT: movw %ax, 10(%rdi) 4184; AVX512F-NEXT: vmovd %xmm0, %eax 4185; AVX512F-NEXT: movw %ax, 6(%rdi) 4186; AVX512F-NEXT: vmovd %xmm3, %eax 4187; AVX512F-NEXT: movw %ax, 4(%rdi) 4188; AVX512F-NEXT: vmovd %xmm4, %eax 4189; AVX512F-NEXT: movw %ax, 2(%rdi) 4190; AVX512F-NEXT: retq 4191; 4192; AVX512VL-LABEL: store_cvt_16f32_to_16i16: 4193; AVX512VL: # BB#0: 4194; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm1 4195; AVX512VL-NEXT: vextractf64x4 $1, %zmm0, %ymm2 4196; AVX512VL-NEXT: vextractf128 $1, %ymm2, %xmm3 4197; AVX512VL-NEXT: vcvtps2ph $4, %xmm3, %xmm4 4198; AVX512VL-NEXT: vmovd %xmm4, %eax 4199; AVX512VL-NEXT: vcvtps2ph $4, %xmm2, %xmm4 4200; AVX512VL-NEXT: movw %ax, 24(%rdi) 4201; AVX512VL-NEXT: vmovd %xmm4, %eax 4202; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm4 4203; AVX512VL-NEXT: movw %ax, 16(%rdi) 4204; AVX512VL-NEXT: vmovd %xmm4, %eax 4205; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm4 4206; AVX512VL-NEXT: movw %ax, 8(%rdi) 4207; AVX512VL-NEXT: vmovd %xmm4, %eax 4208; AVX512VL-NEXT: vpermilps {{.*#+}} xmm4 = xmm3[3,1,2,3] 4209; AVX512VL-NEXT: vcvtps2ph $4, %xmm4, %xmm4 4210; AVX512VL-NEXT: movw %ax, (%rdi) 4211; AVX512VL-NEXT: vmovd %xmm4, %eax 4212; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm4 = xmm3[1,0] 4213; AVX512VL-NEXT: vcvtps2ph $4, %xmm4, %xmm4 4214; AVX512VL-NEXT: movw %ax, 30(%rdi) 4215; AVX512VL-NEXT: vmovd %xmm4, %eax 4216; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm4 = xmm0[1,1,3,3] 4217; AVX512VL-NEXT: vcvtps2ph $4, %xmm4, %xmm4 4218; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm3 = xmm3[1,1,3,3] 4219; AVX512VL-NEXT: vcvtps2ph $4, %xmm3, %xmm3 4220; AVX512VL-NEXT: movw %ax, 28(%rdi) 4221; AVX512VL-NEXT: vmovd %xmm3, %eax 4222; AVX512VL-NEXT: vpermilps {{.*#+}} xmm3 = xmm2[3,1,2,3] 4223; AVX512VL-NEXT: vcvtps2ph $4, %xmm3, %xmm3 4224; AVX512VL-NEXT: movw %ax, 26(%rdi) 4225; AVX512VL-NEXT: vmovd %xmm3, %eax 4226; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm2[1,0] 4227; AVX512VL-NEXT: vcvtps2ph $4, %xmm3, %xmm3 4228; AVX512VL-NEXT: movw %ax, 22(%rdi) 4229; AVX512VL-NEXT: vmovd %xmm3, %eax 4230; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm3 = xmm0[1,0] 4231; AVX512VL-NEXT: vcvtps2ph $4, %xmm3, %xmm3 4232; AVX512VL-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[3,1,2,3] 4233; AVX512VL-NEXT: vcvtps2ph $4, %xmm0, %xmm0 4234; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm2[1,1,3,3] 4235; AVX512VL-NEXT: vcvtps2ph $4, %xmm2, %xmm2 4236; AVX512VL-NEXT: movw %ax, 20(%rdi) 4237; AVX512VL-NEXT: vmovd %xmm2, %eax 4238; AVX512VL-NEXT: vpermilps {{.*#+}} xmm2 = xmm1[3,1,2,3] 4239; AVX512VL-NEXT: vcvtps2ph $4, %xmm2, %xmm2 4240; AVX512VL-NEXT: movw %ax, 18(%rdi) 4241; AVX512VL-NEXT: vmovd %xmm2, %eax 4242; AVX512VL-NEXT: vmovshdup {{.*#+}} xmm2 = xmm1[1,1,3,3] 4243; AVX512VL-NEXT: vcvtps2ph $4, %xmm2, %xmm2 4244; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm1 = xmm1[1,0] 4245; AVX512VL-NEXT: vcvtps2ph $4, %xmm1, %xmm1 4246; AVX512VL-NEXT: movw %ax, 14(%rdi) 4247; AVX512VL-NEXT: vmovd %xmm1, %eax 4248; AVX512VL-NEXT: movw %ax, 12(%rdi) 4249; AVX512VL-NEXT: vmovd %xmm2, %eax 4250; AVX512VL-NEXT: movw %ax, 10(%rdi) 4251; AVX512VL-NEXT: vmovd %xmm0, %eax 4252; AVX512VL-NEXT: movw %ax, 6(%rdi) 4253; AVX512VL-NEXT: vmovd %xmm3, %eax 4254; AVX512VL-NEXT: movw %ax, 4(%rdi) 4255; AVX512VL-NEXT: vmovd %xmm4, %eax 4256; AVX512VL-NEXT: movw %ax, 2(%rdi) 4257; AVX512VL-NEXT: retq 4258 %1 = fptrunc <16 x float> %a0 to <16 x half> 4259 %2 = bitcast <16 x half> %1 to <16 x i16> 4260 store <16 x i16> %2, <16 x i16>* %a1 4261 ret void 4262} 4263 4264; 4265; Double to Half 4266; 4267 4268define i16 @cvt_f64_to_i16(double %a0) nounwind { 4269; ALL-LABEL: cvt_f64_to_i16: 4270; ALL: # BB#0: 4271; ALL-NEXT: jmp __truncdfhf2 # TAILCALL 4272 %1 = fptrunc double %a0 to half 4273 %2 = bitcast half %1 to i16 4274 ret i16 %2 4275} 4276 4277define <2 x i16> @cvt_2f64_to_2i16(<2 x double> %a0) nounwind { 4278; ALL-LABEL: cvt_2f64_to_2i16: 4279; ALL: # BB#0: 4280; ALL-NEXT: pushq %rbx 4281; ALL-NEXT: subq $16, %rsp 4282; ALL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill 4283; ALL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4284; ALL-NEXT: callq __truncdfhf2 4285; ALL-NEXT: movw %ax, %bx 4286; ALL-NEXT: shll $16, %ebx 4287; ALL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 4288; ALL-NEXT: callq __truncdfhf2 4289; ALL-NEXT: movzwl %ax, %eax 4290; ALL-NEXT: orl %ebx, %eax 4291; ALL-NEXT: vmovd %eax, %xmm0 4292; ALL-NEXT: addq $16, %rsp 4293; ALL-NEXT: popq %rbx 4294; ALL-NEXT: retq 4295 %1 = fptrunc <2 x double> %a0 to <2 x half> 4296 %2 = bitcast <2 x half> %1 to <2 x i16> 4297 ret <2 x i16> %2 4298} 4299 4300define <4 x i16> @cvt_4f64_to_4i16(<4 x double> %a0) nounwind { 4301; AVX1-LABEL: cvt_4f64_to_4i16: 4302; AVX1: # BB#0: 4303; AVX1-NEXT: pushq %r14 4304; AVX1-NEXT: pushq %rbx 4305; AVX1-NEXT: subq $40, %rsp 4306; AVX1-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 4307; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4308; AVX1-NEXT: vzeroupper 4309; AVX1-NEXT: callq __truncdfhf2 4310; AVX1-NEXT: movw %ax, %bx 4311; AVX1-NEXT: shll $16, %ebx 4312; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 4313; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 4314; AVX1-NEXT: vzeroupper 4315; AVX1-NEXT: callq __truncdfhf2 4316; AVX1-NEXT: movzwl %ax, %r14d 4317; AVX1-NEXT: orl %ebx, %r14d 4318; AVX1-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload 4319; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4320; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill 4321; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4322; AVX1-NEXT: vzeroupper 4323; AVX1-NEXT: callq __truncdfhf2 4324; AVX1-NEXT: movw %ax, %bx 4325; AVX1-NEXT: shll $16, %ebx 4326; AVX1-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 4327; AVX1-NEXT: callq __truncdfhf2 4328; AVX1-NEXT: movzwl %ax, %eax 4329; AVX1-NEXT: orl %ebx, %eax 4330; AVX1-NEXT: shlq $32, %rax 4331; AVX1-NEXT: orq %r14, %rax 4332; AVX1-NEXT: vmovq %rax, %xmm0 4333; AVX1-NEXT: addq $40, %rsp 4334; AVX1-NEXT: popq %rbx 4335; AVX1-NEXT: popq %r14 4336; AVX1-NEXT: retq 4337; 4338; AVX2-LABEL: cvt_4f64_to_4i16: 4339; AVX2: # BB#0: 4340; AVX2-NEXT: pushq %r14 4341; AVX2-NEXT: pushq %rbx 4342; AVX2-NEXT: subq $40, %rsp 4343; AVX2-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 4344; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4345; AVX2-NEXT: vzeroupper 4346; AVX2-NEXT: callq __truncdfhf2 4347; AVX2-NEXT: movw %ax, %bx 4348; AVX2-NEXT: shll $16, %ebx 4349; AVX2-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 4350; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 4351; AVX2-NEXT: vzeroupper 4352; AVX2-NEXT: callq __truncdfhf2 4353; AVX2-NEXT: movzwl %ax, %r14d 4354; AVX2-NEXT: orl %ebx, %r14d 4355; AVX2-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload 4356; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 4357; AVX2-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill 4358; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4359; AVX2-NEXT: vzeroupper 4360; AVX2-NEXT: callq __truncdfhf2 4361; AVX2-NEXT: movw %ax, %bx 4362; AVX2-NEXT: shll $16, %ebx 4363; AVX2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 4364; AVX2-NEXT: callq __truncdfhf2 4365; AVX2-NEXT: movzwl %ax, %eax 4366; AVX2-NEXT: orl %ebx, %eax 4367; AVX2-NEXT: shlq $32, %rax 4368; AVX2-NEXT: orq %r14, %rax 4369; AVX2-NEXT: vmovq %rax, %xmm0 4370; AVX2-NEXT: addq $40, %rsp 4371; AVX2-NEXT: popq %rbx 4372; AVX2-NEXT: popq %r14 4373; AVX2-NEXT: retq 4374; 4375; AVX512F-LABEL: cvt_4f64_to_4i16: 4376; AVX512F: # BB#0: 4377; AVX512F-NEXT: pushq %r14 4378; AVX512F-NEXT: pushq %rbx 4379; AVX512F-NEXT: subq $40, %rsp 4380; AVX512F-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 4381; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4382; AVX512F-NEXT: callq __truncdfhf2 4383; AVX512F-NEXT: movw %ax, %bx 4384; AVX512F-NEXT: shll $16, %ebx 4385; AVX512F-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 4386; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 4387; AVX512F-NEXT: callq __truncdfhf2 4388; AVX512F-NEXT: movzwl %ax, %r14d 4389; AVX512F-NEXT: orl %ebx, %r14d 4390; AVX512F-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload 4391; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 4392; AVX512F-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill 4393; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4394; AVX512F-NEXT: callq __truncdfhf2 4395; AVX512F-NEXT: movw %ax, %bx 4396; AVX512F-NEXT: shll $16, %ebx 4397; AVX512F-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 4398; AVX512F-NEXT: callq __truncdfhf2 4399; AVX512F-NEXT: movzwl %ax, %eax 4400; AVX512F-NEXT: orl %ebx, %eax 4401; AVX512F-NEXT: shlq $32, %rax 4402; AVX512F-NEXT: orq %r14, %rax 4403; AVX512F-NEXT: vmovq %rax, %xmm0 4404; AVX512F-NEXT: addq $40, %rsp 4405; AVX512F-NEXT: popq %rbx 4406; AVX512F-NEXT: popq %r14 4407; AVX512F-NEXT: retq 4408; 4409; AVX512VL-LABEL: cvt_4f64_to_4i16: 4410; AVX512VL: # BB#0: 4411; AVX512VL-NEXT: pushq %r14 4412; AVX512VL-NEXT: pushq %rbx 4413; AVX512VL-NEXT: subq $40, %rsp 4414; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 4415; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4416; AVX512VL-NEXT: callq __truncdfhf2 4417; AVX512VL-NEXT: movw %ax, %bx 4418; AVX512VL-NEXT: shll $16, %ebx 4419; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 4420; AVX512VL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 4421; AVX512VL-NEXT: callq __truncdfhf2 4422; AVX512VL-NEXT: movzwl %ax, %r14d 4423; AVX512VL-NEXT: orl %ebx, %r14d 4424; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 4425; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 4426; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 4427; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4428; AVX512VL-NEXT: callq __truncdfhf2 4429; AVX512VL-NEXT: movw %ax, %bx 4430; AVX512VL-NEXT: shll $16, %ebx 4431; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 4432; AVX512VL-NEXT: callq __truncdfhf2 4433; AVX512VL-NEXT: movzwl %ax, %eax 4434; AVX512VL-NEXT: orl %ebx, %eax 4435; AVX512VL-NEXT: shlq $32, %rax 4436; AVX512VL-NEXT: orq %r14, %rax 4437; AVX512VL-NEXT: vmovq %rax, %xmm0 4438; AVX512VL-NEXT: addq $40, %rsp 4439; AVX512VL-NEXT: popq %rbx 4440; AVX512VL-NEXT: popq %r14 4441; AVX512VL-NEXT: retq 4442 %1 = fptrunc <4 x double> %a0 to <4 x half> 4443 %2 = bitcast <4 x half> %1 to <4 x i16> 4444 ret <4 x i16> %2 4445} 4446 4447define <8 x i16> @cvt_4f64_to_8i16_undef(<4 x double> %a0) nounwind { 4448; AVX1-LABEL: cvt_4f64_to_8i16_undef: 4449; AVX1: # BB#0: 4450; AVX1-NEXT: pushq %r14 4451; AVX1-NEXT: pushq %rbx 4452; AVX1-NEXT: subq $40, %rsp 4453; AVX1-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 4454; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4455; AVX1-NEXT: vzeroupper 4456; AVX1-NEXT: callq __truncdfhf2 4457; AVX1-NEXT: movw %ax, %bx 4458; AVX1-NEXT: shll $16, %ebx 4459; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 4460; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 4461; AVX1-NEXT: vzeroupper 4462; AVX1-NEXT: callq __truncdfhf2 4463; AVX1-NEXT: movzwl %ax, %r14d 4464; AVX1-NEXT: orl %ebx, %r14d 4465; AVX1-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload 4466; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4467; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill 4468; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4469; AVX1-NEXT: vzeroupper 4470; AVX1-NEXT: callq __truncdfhf2 4471; AVX1-NEXT: movw %ax, %bx 4472; AVX1-NEXT: shll $16, %ebx 4473; AVX1-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 4474; AVX1-NEXT: callq __truncdfhf2 4475; AVX1-NEXT: movzwl %ax, %eax 4476; AVX1-NEXT: orl %ebx, %eax 4477; AVX1-NEXT: shlq $32, %rax 4478; AVX1-NEXT: orq %r14, %rax 4479; AVX1-NEXT: vmovq %rax, %xmm0 4480; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 4481; AVX1-NEXT: addq $40, %rsp 4482; AVX1-NEXT: popq %rbx 4483; AVX1-NEXT: popq %r14 4484; AVX1-NEXT: retq 4485; 4486; AVX2-LABEL: cvt_4f64_to_8i16_undef: 4487; AVX2: # BB#0: 4488; AVX2-NEXT: pushq %r14 4489; AVX2-NEXT: pushq %rbx 4490; AVX2-NEXT: subq $40, %rsp 4491; AVX2-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 4492; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4493; AVX2-NEXT: vzeroupper 4494; AVX2-NEXT: callq __truncdfhf2 4495; AVX2-NEXT: movw %ax, %bx 4496; AVX2-NEXT: shll $16, %ebx 4497; AVX2-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 4498; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 4499; AVX2-NEXT: vzeroupper 4500; AVX2-NEXT: callq __truncdfhf2 4501; AVX2-NEXT: movzwl %ax, %r14d 4502; AVX2-NEXT: orl %ebx, %r14d 4503; AVX2-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload 4504; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 4505; AVX2-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill 4506; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4507; AVX2-NEXT: vzeroupper 4508; AVX2-NEXT: callq __truncdfhf2 4509; AVX2-NEXT: movw %ax, %bx 4510; AVX2-NEXT: shll $16, %ebx 4511; AVX2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 4512; AVX2-NEXT: callq __truncdfhf2 4513; AVX2-NEXT: movzwl %ax, %eax 4514; AVX2-NEXT: orl %ebx, %eax 4515; AVX2-NEXT: shlq $32, %rax 4516; AVX2-NEXT: orq %r14, %rax 4517; AVX2-NEXT: vmovq %rax, %xmm0 4518; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 4519; AVX2-NEXT: addq $40, %rsp 4520; AVX2-NEXT: popq %rbx 4521; AVX2-NEXT: popq %r14 4522; AVX2-NEXT: retq 4523; 4524; AVX512F-LABEL: cvt_4f64_to_8i16_undef: 4525; AVX512F: # BB#0: 4526; AVX512F-NEXT: pushq %r14 4527; AVX512F-NEXT: pushq %rbx 4528; AVX512F-NEXT: subq $40, %rsp 4529; AVX512F-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 4530; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4531; AVX512F-NEXT: callq __truncdfhf2 4532; AVX512F-NEXT: movw %ax, %bx 4533; AVX512F-NEXT: shll $16, %ebx 4534; AVX512F-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 4535; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 4536; AVX512F-NEXT: callq __truncdfhf2 4537; AVX512F-NEXT: movzwl %ax, %r14d 4538; AVX512F-NEXT: orl %ebx, %r14d 4539; AVX512F-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload 4540; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 4541; AVX512F-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill 4542; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4543; AVX512F-NEXT: callq __truncdfhf2 4544; AVX512F-NEXT: movw %ax, %bx 4545; AVX512F-NEXT: shll $16, %ebx 4546; AVX512F-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 4547; AVX512F-NEXT: callq __truncdfhf2 4548; AVX512F-NEXT: movzwl %ax, %eax 4549; AVX512F-NEXT: orl %ebx, %eax 4550; AVX512F-NEXT: shlq $32, %rax 4551; AVX512F-NEXT: orq %r14, %rax 4552; AVX512F-NEXT: vmovq %rax, %xmm0 4553; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 4554; AVX512F-NEXT: addq $40, %rsp 4555; AVX512F-NEXT: popq %rbx 4556; AVX512F-NEXT: popq %r14 4557; AVX512F-NEXT: retq 4558; 4559; AVX512VL-LABEL: cvt_4f64_to_8i16_undef: 4560; AVX512VL: # BB#0: 4561; AVX512VL-NEXT: pushq %r14 4562; AVX512VL-NEXT: pushq %rbx 4563; AVX512VL-NEXT: subq $40, %rsp 4564; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 4565; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4566; AVX512VL-NEXT: callq __truncdfhf2 4567; AVX512VL-NEXT: movw %ax, %bx 4568; AVX512VL-NEXT: shll $16, %ebx 4569; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 4570; AVX512VL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 4571; AVX512VL-NEXT: callq __truncdfhf2 4572; AVX512VL-NEXT: movzwl %ax, %r14d 4573; AVX512VL-NEXT: orl %ebx, %r14d 4574; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 4575; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 4576; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 4577; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4578; AVX512VL-NEXT: callq __truncdfhf2 4579; AVX512VL-NEXT: movw %ax, %bx 4580; AVX512VL-NEXT: shll $16, %ebx 4581; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 4582; AVX512VL-NEXT: callq __truncdfhf2 4583; AVX512VL-NEXT: movzwl %ax, %eax 4584; AVX512VL-NEXT: orl %ebx, %eax 4585; AVX512VL-NEXT: shlq $32, %rax 4586; AVX512VL-NEXT: orq %r14, %rax 4587; AVX512VL-NEXT: vmovq %rax, %xmm0 4588; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 4589; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 4590; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 4591; AVX512VL-NEXT: addq $40, %rsp 4592; AVX512VL-NEXT: popq %rbx 4593; AVX512VL-NEXT: popq %r14 4594; AVX512VL-NEXT: retq 4595 %1 = fptrunc <4 x double> %a0 to <4 x half> 4596 %2 = bitcast <4 x half> %1 to <4 x i16> 4597 %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 4598 ret <8 x i16> %3 4599} 4600 4601define <8 x i16> @cvt_4f64_to_8i16_zero(<4 x double> %a0) nounwind { 4602; AVX1-LABEL: cvt_4f64_to_8i16_zero: 4603; AVX1: # BB#0: 4604; AVX1-NEXT: pushq %r14 4605; AVX1-NEXT: pushq %rbx 4606; AVX1-NEXT: subq $40, %rsp 4607; AVX1-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 4608; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4609; AVX1-NEXT: vzeroupper 4610; AVX1-NEXT: callq __truncdfhf2 4611; AVX1-NEXT: movw %ax, %bx 4612; AVX1-NEXT: shll $16, %ebx 4613; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 4614; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 4615; AVX1-NEXT: vzeroupper 4616; AVX1-NEXT: callq __truncdfhf2 4617; AVX1-NEXT: movzwl %ax, %r14d 4618; AVX1-NEXT: orl %ebx, %r14d 4619; AVX1-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload 4620; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4621; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill 4622; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4623; AVX1-NEXT: vzeroupper 4624; AVX1-NEXT: callq __truncdfhf2 4625; AVX1-NEXT: movw %ax, %bx 4626; AVX1-NEXT: shll $16, %ebx 4627; AVX1-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 4628; AVX1-NEXT: callq __truncdfhf2 4629; AVX1-NEXT: movzwl %ax, %eax 4630; AVX1-NEXT: orl %ebx, %eax 4631; AVX1-NEXT: shlq $32, %rax 4632; AVX1-NEXT: orq %r14, %rax 4633; AVX1-NEXT: vmovq %rax, %xmm0 4634; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero 4635; AVX1-NEXT: addq $40, %rsp 4636; AVX1-NEXT: popq %rbx 4637; AVX1-NEXT: popq %r14 4638; AVX1-NEXT: retq 4639; 4640; AVX2-LABEL: cvt_4f64_to_8i16_zero: 4641; AVX2: # BB#0: 4642; AVX2-NEXT: pushq %r14 4643; AVX2-NEXT: pushq %rbx 4644; AVX2-NEXT: subq $40, %rsp 4645; AVX2-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 4646; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4647; AVX2-NEXT: vzeroupper 4648; AVX2-NEXT: callq __truncdfhf2 4649; AVX2-NEXT: movw %ax, %bx 4650; AVX2-NEXT: shll $16, %ebx 4651; AVX2-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 4652; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 4653; AVX2-NEXT: vzeroupper 4654; AVX2-NEXT: callq __truncdfhf2 4655; AVX2-NEXT: movzwl %ax, %r14d 4656; AVX2-NEXT: orl %ebx, %r14d 4657; AVX2-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload 4658; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 4659; AVX2-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill 4660; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4661; AVX2-NEXT: vzeroupper 4662; AVX2-NEXT: callq __truncdfhf2 4663; AVX2-NEXT: movw %ax, %bx 4664; AVX2-NEXT: shll $16, %ebx 4665; AVX2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 4666; AVX2-NEXT: callq __truncdfhf2 4667; AVX2-NEXT: movzwl %ax, %eax 4668; AVX2-NEXT: orl %ebx, %eax 4669; AVX2-NEXT: shlq $32, %rax 4670; AVX2-NEXT: orq %r14, %rax 4671; AVX2-NEXT: vmovq %rax, %xmm0 4672; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero 4673; AVX2-NEXT: addq $40, %rsp 4674; AVX2-NEXT: popq %rbx 4675; AVX2-NEXT: popq %r14 4676; AVX2-NEXT: retq 4677; 4678; AVX512F-LABEL: cvt_4f64_to_8i16_zero: 4679; AVX512F: # BB#0: 4680; AVX512F-NEXT: pushq %r14 4681; AVX512F-NEXT: pushq %rbx 4682; AVX512F-NEXT: subq $40, %rsp 4683; AVX512F-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 4684; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4685; AVX512F-NEXT: callq __truncdfhf2 4686; AVX512F-NEXT: movw %ax, %bx 4687; AVX512F-NEXT: shll $16, %ebx 4688; AVX512F-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 4689; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 4690; AVX512F-NEXT: callq __truncdfhf2 4691; AVX512F-NEXT: movzwl %ax, %r14d 4692; AVX512F-NEXT: orl %ebx, %r14d 4693; AVX512F-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload 4694; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 4695; AVX512F-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill 4696; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4697; AVX512F-NEXT: callq __truncdfhf2 4698; AVX512F-NEXT: movw %ax, %bx 4699; AVX512F-NEXT: shll $16, %ebx 4700; AVX512F-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 4701; AVX512F-NEXT: callq __truncdfhf2 4702; AVX512F-NEXT: movzwl %ax, %eax 4703; AVX512F-NEXT: orl %ebx, %eax 4704; AVX512F-NEXT: shlq $32, %rax 4705; AVX512F-NEXT: orq %r14, %rax 4706; AVX512F-NEXT: vmovq %rax, %xmm0 4707; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero 4708; AVX512F-NEXT: addq $40, %rsp 4709; AVX512F-NEXT: popq %rbx 4710; AVX512F-NEXT: popq %r14 4711; AVX512F-NEXT: retq 4712; 4713; AVX512VL-LABEL: cvt_4f64_to_8i16_zero: 4714; AVX512VL: # BB#0: 4715; AVX512VL-NEXT: pushq %r14 4716; AVX512VL-NEXT: pushq %rbx 4717; AVX512VL-NEXT: subq $40, %rsp 4718; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 4719; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4720; AVX512VL-NEXT: callq __truncdfhf2 4721; AVX512VL-NEXT: movw %ax, %bx 4722; AVX512VL-NEXT: shll $16, %ebx 4723; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 4724; AVX512VL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 4725; AVX512VL-NEXT: callq __truncdfhf2 4726; AVX512VL-NEXT: movzwl %ax, %r14d 4727; AVX512VL-NEXT: orl %ebx, %r14d 4728; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 4729; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 4730; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 4731; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4732; AVX512VL-NEXT: callq __truncdfhf2 4733; AVX512VL-NEXT: movw %ax, %bx 4734; AVX512VL-NEXT: shll $16, %ebx 4735; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 4736; AVX512VL-NEXT: callq __truncdfhf2 4737; AVX512VL-NEXT: movzwl %ax, %eax 4738; AVX512VL-NEXT: orl %ebx, %eax 4739; AVX512VL-NEXT: shlq $32, %rax 4740; AVX512VL-NEXT: orq %r14, %rax 4741; AVX512VL-NEXT: vmovq %rax, %xmm0 4742; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 4743; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 4744; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2] 4745; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 4746; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 4747; AVX512VL-NEXT: addq $40, %rsp 4748; AVX512VL-NEXT: popq %rbx 4749; AVX512VL-NEXT: popq %r14 4750; AVX512VL-NEXT: retq 4751 %1 = fptrunc <4 x double> %a0 to <4 x half> 4752 %2 = bitcast <4 x half> %1 to <4 x i16> 4753 %3 = shufflevector <4 x i16> %2, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 4754 ret <8 x i16> %3 4755} 4756 4757define <8 x i16> @cvt_8f64_to_8i16(<8 x double> %a0) nounwind { 4758; AVX1-LABEL: cvt_8f64_to_8i16: 4759; AVX1: # BB#0: 4760; AVX1-NEXT: pushq %r15 4761; AVX1-NEXT: pushq %r14 4762; AVX1-NEXT: pushq %rbx 4763; AVX1-NEXT: subq $64, %rsp 4764; AVX1-NEXT: vmovups %ymm1, (%rsp) # 32-byte Spill 4765; AVX1-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill 4766; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4767; AVX1-NEXT: vzeroupper 4768; AVX1-NEXT: callq __truncdfhf2 4769; AVX1-NEXT: movw %ax, %bx 4770; AVX1-NEXT: shll $16, %ebx 4771; AVX1-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload 4772; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 4773; AVX1-NEXT: vzeroupper 4774; AVX1-NEXT: callq __truncdfhf2 4775; AVX1-NEXT: movzwl %ax, %r15d 4776; AVX1-NEXT: orl %ebx, %r15d 4777; AVX1-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload 4778; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4779; AVX1-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill 4780; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4781; AVX1-NEXT: vzeroupper 4782; AVX1-NEXT: callq __truncdfhf2 4783; AVX1-NEXT: movw %ax, %bx 4784; AVX1-NEXT: shll $16, %ebx 4785; AVX1-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload 4786; AVX1-NEXT: callq __truncdfhf2 4787; AVX1-NEXT: movzwl %ax, %r14d 4788; AVX1-NEXT: orl %ebx, %r14d 4789; AVX1-NEXT: shlq $32, %r14 4790; AVX1-NEXT: orq %r15, %r14 4791; AVX1-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload 4792; AVX1-NEXT: # xmm0 = mem[1,0] 4793; AVX1-NEXT: callq __truncdfhf2 4794; AVX1-NEXT: movw %ax, %bx 4795; AVX1-NEXT: shll $16, %ebx 4796; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 4797; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 4798; AVX1-NEXT: vzeroupper 4799; AVX1-NEXT: callq __truncdfhf2 4800; AVX1-NEXT: movzwl %ax, %r15d 4801; AVX1-NEXT: orl %ebx, %r15d 4802; AVX1-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload 4803; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 4804; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill 4805; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4806; AVX1-NEXT: vzeroupper 4807; AVX1-NEXT: callq __truncdfhf2 4808; AVX1-NEXT: movw %ax, %bx 4809; AVX1-NEXT: shll $16, %ebx 4810; AVX1-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 4811; AVX1-NEXT: callq __truncdfhf2 4812; AVX1-NEXT: movzwl %ax, %eax 4813; AVX1-NEXT: orl %ebx, %eax 4814; AVX1-NEXT: shlq $32, %rax 4815; AVX1-NEXT: orq %r15, %rax 4816; AVX1-NEXT: vmovq %rax, %xmm0 4817; AVX1-NEXT: vmovq %r14, %xmm1 4818; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4819; AVX1-NEXT: addq $64, %rsp 4820; AVX1-NEXT: popq %rbx 4821; AVX1-NEXT: popq %r14 4822; AVX1-NEXT: popq %r15 4823; AVX1-NEXT: retq 4824; 4825; AVX2-LABEL: cvt_8f64_to_8i16: 4826; AVX2: # BB#0: 4827; AVX2-NEXT: pushq %r15 4828; AVX2-NEXT: pushq %r14 4829; AVX2-NEXT: pushq %rbx 4830; AVX2-NEXT: subq $64, %rsp 4831; AVX2-NEXT: vmovups %ymm1, (%rsp) # 32-byte Spill 4832; AVX2-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill 4833; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4834; AVX2-NEXT: vzeroupper 4835; AVX2-NEXT: callq __truncdfhf2 4836; AVX2-NEXT: movw %ax, %bx 4837; AVX2-NEXT: shll $16, %ebx 4838; AVX2-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload 4839; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 4840; AVX2-NEXT: vzeroupper 4841; AVX2-NEXT: callq __truncdfhf2 4842; AVX2-NEXT: movzwl %ax, %r15d 4843; AVX2-NEXT: orl %ebx, %r15d 4844; AVX2-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload 4845; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 4846; AVX2-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill 4847; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4848; AVX2-NEXT: vzeroupper 4849; AVX2-NEXT: callq __truncdfhf2 4850; AVX2-NEXT: movw %ax, %bx 4851; AVX2-NEXT: shll $16, %ebx 4852; AVX2-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload 4853; AVX2-NEXT: callq __truncdfhf2 4854; AVX2-NEXT: movzwl %ax, %r14d 4855; AVX2-NEXT: orl %ebx, %r14d 4856; AVX2-NEXT: shlq $32, %r14 4857; AVX2-NEXT: orq %r15, %r14 4858; AVX2-NEXT: vpermilpd $1, (%rsp), %xmm0 # 16-byte Folded Reload 4859; AVX2-NEXT: # xmm0 = mem[1,0] 4860; AVX2-NEXT: callq __truncdfhf2 4861; AVX2-NEXT: movw %ax, %bx 4862; AVX2-NEXT: shll $16, %ebx 4863; AVX2-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 4864; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 4865; AVX2-NEXT: vzeroupper 4866; AVX2-NEXT: callq __truncdfhf2 4867; AVX2-NEXT: movzwl %ax, %r15d 4868; AVX2-NEXT: orl %ebx, %r15d 4869; AVX2-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload 4870; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 4871; AVX2-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill 4872; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4873; AVX2-NEXT: vzeroupper 4874; AVX2-NEXT: callq __truncdfhf2 4875; AVX2-NEXT: movw %ax, %bx 4876; AVX2-NEXT: shll $16, %ebx 4877; AVX2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 4878; AVX2-NEXT: callq __truncdfhf2 4879; AVX2-NEXT: movzwl %ax, %eax 4880; AVX2-NEXT: orl %ebx, %eax 4881; AVX2-NEXT: shlq $32, %rax 4882; AVX2-NEXT: orq %r15, %rax 4883; AVX2-NEXT: vmovq %rax, %xmm0 4884; AVX2-NEXT: vmovq %r14, %xmm1 4885; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4886; AVX2-NEXT: addq $64, %rsp 4887; AVX2-NEXT: popq %rbx 4888; AVX2-NEXT: popq %r14 4889; AVX2-NEXT: popq %r15 4890; AVX2-NEXT: retq 4891; 4892; AVX512F-LABEL: cvt_8f64_to_8i16: 4893; AVX512F: # BB#0: 4894; AVX512F-NEXT: pushq %r15 4895; AVX512F-NEXT: pushq %r14 4896; AVX512F-NEXT: pushq %rbx 4897; AVX512F-NEXT: subq $96, %rsp 4898; AVX512F-NEXT: vmovupd %zmm0, (%rsp) # 64-byte Spill 4899; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4900; AVX512F-NEXT: callq __truncdfhf2 4901; AVX512F-NEXT: movw %ax, %bx 4902; AVX512F-NEXT: shll $16, %ebx 4903; AVX512F-NEXT: vmovups (%rsp), %zmm0 # 64-byte Reload 4904; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 4905; AVX512F-NEXT: callq __truncdfhf2 4906; AVX512F-NEXT: movzwl %ax, %r15d 4907; AVX512F-NEXT: orl %ebx, %r15d 4908; AVX512F-NEXT: vmovupd (%rsp), %zmm0 # 64-byte Reload 4909; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 4910; AVX512F-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill 4911; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4912; AVX512F-NEXT: callq __truncdfhf2 4913; AVX512F-NEXT: movw %ax, %bx 4914; AVX512F-NEXT: shll $16, %ebx 4915; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload 4916; AVX512F-NEXT: callq __truncdfhf2 4917; AVX512F-NEXT: movzwl %ax, %r14d 4918; AVX512F-NEXT: orl %ebx, %r14d 4919; AVX512F-NEXT: shlq $32, %r14 4920; AVX512F-NEXT: orq %r15, %r14 4921; AVX512F-NEXT: vmovupd (%rsp), %zmm0 # 64-byte Reload 4922; AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm0 4923; AVX512F-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 4924; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4925; AVX512F-NEXT: callq __truncdfhf2 4926; AVX512F-NEXT: movw %ax, %bx 4927; AVX512F-NEXT: shll $16, %ebx 4928; AVX512F-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 4929; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 4930; AVX512F-NEXT: callq __truncdfhf2 4931; AVX512F-NEXT: movzwl %ax, %r15d 4932; AVX512F-NEXT: orl %ebx, %r15d 4933; AVX512F-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload 4934; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 4935; AVX512F-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill 4936; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4937; AVX512F-NEXT: callq __truncdfhf2 4938; AVX512F-NEXT: movw %ax, %bx 4939; AVX512F-NEXT: shll $16, %ebx 4940; AVX512F-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 4941; AVX512F-NEXT: callq __truncdfhf2 4942; AVX512F-NEXT: movzwl %ax, %eax 4943; AVX512F-NEXT: orl %ebx, %eax 4944; AVX512F-NEXT: shlq $32, %rax 4945; AVX512F-NEXT: orq %r15, %rax 4946; AVX512F-NEXT: vmovq %rax, %xmm0 4947; AVX512F-NEXT: vmovq %r14, %xmm1 4948; AVX512F-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 4949; AVX512F-NEXT: addq $96, %rsp 4950; AVX512F-NEXT: popq %rbx 4951; AVX512F-NEXT: popq %r14 4952; AVX512F-NEXT: popq %r15 4953; AVX512F-NEXT: retq 4954; 4955; AVX512VL-LABEL: cvt_8f64_to_8i16: 4956; AVX512VL: # BB#0: 4957; AVX512VL-NEXT: pushq %r15 4958; AVX512VL-NEXT: pushq %r14 4959; AVX512VL-NEXT: pushq %rbx 4960; AVX512VL-NEXT: subq $96, %rsp 4961; AVX512VL-NEXT: vmovupd %zmm0, (%rsp) # 64-byte Spill 4962; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4963; AVX512VL-NEXT: callq __truncdfhf2 4964; AVX512VL-NEXT: movw %ax, %bx 4965; AVX512VL-NEXT: shll $16, %ebx 4966; AVX512VL-NEXT: vmovups (%rsp), %zmm0 # 64-byte Reload 4967; AVX512VL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 4968; AVX512VL-NEXT: callq __truncdfhf2 4969; AVX512VL-NEXT: movzwl %ax, %r15d 4970; AVX512VL-NEXT: orl %ebx, %r15d 4971; AVX512VL-NEXT: vmovups (%rsp), %zmm0 # 64-byte Reload 4972; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 4973; AVX512VL-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill 4974; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4975; AVX512VL-NEXT: callq __truncdfhf2 4976; AVX512VL-NEXT: movw %ax, %bx 4977; AVX512VL-NEXT: shll $16, %ebx 4978; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload 4979; AVX512VL-NEXT: callq __truncdfhf2 4980; AVX512VL-NEXT: movzwl %ax, %r14d 4981; AVX512VL-NEXT: orl %ebx, %r14d 4982; AVX512VL-NEXT: shlq $32, %r14 4983; AVX512VL-NEXT: orq %r15, %r14 4984; AVX512VL-NEXT: vmovupd (%rsp), %zmm0 # 64-byte Reload 4985; AVX512VL-NEXT: vextractf64x4 $1, %zmm0, %ymm0 4986; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 4987; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 4988; AVX512VL-NEXT: callq __truncdfhf2 4989; AVX512VL-NEXT: movw %ax, %bx 4990; AVX512VL-NEXT: shll $16, %ebx 4991; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 4992; AVX512VL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 4993; AVX512VL-NEXT: callq __truncdfhf2 4994; AVX512VL-NEXT: movzwl %ax, %r15d 4995; AVX512VL-NEXT: orl %ebx, %r15d 4996; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 4997; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 4998; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 4999; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5000; AVX512VL-NEXT: callq __truncdfhf2 5001; AVX512VL-NEXT: movw %ax, %bx 5002; AVX512VL-NEXT: shll $16, %ebx 5003; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 5004; AVX512VL-NEXT: callq __truncdfhf2 5005; AVX512VL-NEXT: movzwl %ax, %eax 5006; AVX512VL-NEXT: orl %ebx, %eax 5007; AVX512VL-NEXT: shlq $32, %rax 5008; AVX512VL-NEXT: orq %r15, %rax 5009; AVX512VL-NEXT: vmovq %rax, %xmm0 5010; AVX512VL-NEXT: vmovq %r14, %xmm1 5011; AVX512VL-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 5012; AVX512VL-NEXT: addq $96, %rsp 5013; AVX512VL-NEXT: popq %rbx 5014; AVX512VL-NEXT: popq %r14 5015; AVX512VL-NEXT: popq %r15 5016; AVX512VL-NEXT: retq 5017 %1 = fptrunc <8 x double> %a0 to <8 x half> 5018 %2 = bitcast <8 x half> %1 to <8 x i16> 5019 ret <8 x i16> %2 5020} 5021 5022; 5023; Double to Half (Store) 5024; 5025 5026define void @store_cvt_f64_to_i16(double %a0, i16* %a1) nounwind { 5027; ALL-LABEL: store_cvt_f64_to_i16: 5028; ALL: # BB#0: 5029; ALL-NEXT: pushq %rbx 5030; ALL-NEXT: movq %rdi, %rbx 5031; ALL-NEXT: callq __truncdfhf2 5032; ALL-NEXT: movw %ax, (%rbx) 5033; ALL-NEXT: popq %rbx 5034; ALL-NEXT: retq 5035 %1 = fptrunc double %a0 to half 5036 %2 = bitcast half %1 to i16 5037 store i16 %2, i16* %a1 5038 ret void 5039} 5040 5041define void @store_cvt_2f64_to_2i16(<2 x double> %a0, <2 x i16>* %a1) nounwind { 5042; ALL-LABEL: store_cvt_2f64_to_2i16: 5043; ALL: # BB#0: 5044; ALL-NEXT: pushq %rbp 5045; ALL-NEXT: pushq %rbx 5046; ALL-NEXT: subq $24, %rsp 5047; ALL-NEXT: movq %rdi, %rbx 5048; ALL-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill 5049; ALL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5050; ALL-NEXT: callq __truncdfhf2 5051; ALL-NEXT: movl %eax, %ebp 5052; ALL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 5053; ALL-NEXT: callq __truncdfhf2 5054; ALL-NEXT: movw %ax, (%rbx) 5055; ALL-NEXT: movw %bp, 2(%rbx) 5056; ALL-NEXT: addq $24, %rsp 5057; ALL-NEXT: popq %rbx 5058; ALL-NEXT: popq %rbp 5059; ALL-NEXT: retq 5060 %1 = fptrunc <2 x double> %a0 to <2 x half> 5061 %2 = bitcast <2 x half> %1 to <2 x i16> 5062 store <2 x i16> %2, <2 x i16>* %a1 5063 ret void 5064} 5065 5066define void @store_cvt_4f64_to_4i16(<4 x double> %a0, <4 x i16>* %a1) nounwind { 5067; AVX1-LABEL: store_cvt_4f64_to_4i16: 5068; AVX1: # BB#0: 5069; AVX1-NEXT: pushq %rbp 5070; AVX1-NEXT: pushq %r15 5071; AVX1-NEXT: pushq %r14 5072; AVX1-NEXT: pushq %rbx 5073; AVX1-NEXT: subq $88, %rsp 5074; AVX1-NEXT: movq %rdi, %rbx 5075; AVX1-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill 5076; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5077; AVX1-NEXT: vzeroupper 5078; AVX1-NEXT: callq __truncdfhf2 5079; AVX1-NEXT: movl %eax, %r14d 5080; AVX1-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload 5081; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 5082; AVX1-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill 5083; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5084; AVX1-NEXT: vzeroupper 5085; AVX1-NEXT: callq __truncdfhf2 5086; AVX1-NEXT: movl %eax, %r15d 5087; AVX1-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload 5088; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 5089; AVX1-NEXT: vzeroupper 5090; AVX1-NEXT: callq __truncdfhf2 5091; AVX1-NEXT: movl %eax, %ebp 5092; AVX1-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload 5093; AVX1-NEXT: callq __truncdfhf2 5094; AVX1-NEXT: movw %ax, 4(%rbx) 5095; AVX1-NEXT: movw %bp, (%rbx) 5096; AVX1-NEXT: movw %r15w, 6(%rbx) 5097; AVX1-NEXT: movw %r14w, 2(%rbx) 5098; AVX1-NEXT: addq $88, %rsp 5099; AVX1-NEXT: popq %rbx 5100; AVX1-NEXT: popq %r14 5101; AVX1-NEXT: popq %r15 5102; AVX1-NEXT: popq %rbp 5103; AVX1-NEXT: retq 5104; 5105; AVX2-LABEL: store_cvt_4f64_to_4i16: 5106; AVX2: # BB#0: 5107; AVX2-NEXT: pushq %rbp 5108; AVX2-NEXT: pushq %r15 5109; AVX2-NEXT: pushq %r14 5110; AVX2-NEXT: pushq %rbx 5111; AVX2-NEXT: subq $88, %rsp 5112; AVX2-NEXT: movq %rdi, %rbx 5113; AVX2-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill 5114; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5115; AVX2-NEXT: vzeroupper 5116; AVX2-NEXT: callq __truncdfhf2 5117; AVX2-NEXT: movl %eax, %r14d 5118; AVX2-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload 5119; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 5120; AVX2-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill 5121; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5122; AVX2-NEXT: vzeroupper 5123; AVX2-NEXT: callq __truncdfhf2 5124; AVX2-NEXT: movl %eax, %r15d 5125; AVX2-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload 5126; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 5127; AVX2-NEXT: vzeroupper 5128; AVX2-NEXT: callq __truncdfhf2 5129; AVX2-NEXT: movl %eax, %ebp 5130; AVX2-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload 5131; AVX2-NEXT: callq __truncdfhf2 5132; AVX2-NEXT: movw %ax, 4(%rbx) 5133; AVX2-NEXT: movw %bp, (%rbx) 5134; AVX2-NEXT: movw %r15w, 6(%rbx) 5135; AVX2-NEXT: movw %r14w, 2(%rbx) 5136; AVX2-NEXT: addq $88, %rsp 5137; AVX2-NEXT: popq %rbx 5138; AVX2-NEXT: popq %r14 5139; AVX2-NEXT: popq %r15 5140; AVX2-NEXT: popq %rbp 5141; AVX2-NEXT: retq 5142; 5143; AVX512F-LABEL: store_cvt_4f64_to_4i16: 5144; AVX512F: # BB#0: 5145; AVX512F-NEXT: pushq %rbp 5146; AVX512F-NEXT: pushq %r15 5147; AVX512F-NEXT: pushq %r14 5148; AVX512F-NEXT: pushq %rbx 5149; AVX512F-NEXT: subq $88, %rsp 5150; AVX512F-NEXT: movq %rdi, %rbx 5151; AVX512F-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill 5152; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5153; AVX512F-NEXT: callq __truncdfhf2 5154; AVX512F-NEXT: movl %eax, %r14d 5155; AVX512F-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload 5156; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 5157; AVX512F-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill 5158; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5159; AVX512F-NEXT: callq __truncdfhf2 5160; AVX512F-NEXT: movl %eax, %r15d 5161; AVX512F-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload 5162; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 5163; AVX512F-NEXT: callq __truncdfhf2 5164; AVX512F-NEXT: movl %eax, %ebp 5165; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload 5166; AVX512F-NEXT: callq __truncdfhf2 5167; AVX512F-NEXT: movw %ax, 4(%rbx) 5168; AVX512F-NEXT: movw %bp, (%rbx) 5169; AVX512F-NEXT: movw %r15w, 6(%rbx) 5170; AVX512F-NEXT: movw %r14w, 2(%rbx) 5171; AVX512F-NEXT: addq $88, %rsp 5172; AVX512F-NEXT: popq %rbx 5173; AVX512F-NEXT: popq %r14 5174; AVX512F-NEXT: popq %r15 5175; AVX512F-NEXT: popq %rbp 5176; AVX512F-NEXT: retq 5177; 5178; AVX512VL-LABEL: store_cvt_4f64_to_4i16: 5179; AVX512VL: # BB#0: 5180; AVX512VL-NEXT: pushq %rbp 5181; AVX512VL-NEXT: pushq %r15 5182; AVX512VL-NEXT: pushq %r14 5183; AVX512VL-NEXT: pushq %rbx 5184; AVX512VL-NEXT: subq $88, %rsp 5185; AVX512VL-NEXT: movq %rdi, %rbx 5186; AVX512VL-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill 5187; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5188; AVX512VL-NEXT: callq __truncdfhf2 5189; AVX512VL-NEXT: movl %eax, %r14d 5190; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload 5191; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 5192; AVX512VL-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill 5193; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5194; AVX512VL-NEXT: callq __truncdfhf2 5195; AVX512VL-NEXT: movl %eax, %r15d 5196; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload 5197; AVX512VL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 5198; AVX512VL-NEXT: callq __truncdfhf2 5199; AVX512VL-NEXT: movl %eax, %ebp 5200; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload 5201; AVX512VL-NEXT: callq __truncdfhf2 5202; AVX512VL-NEXT: movw %ax, 4(%rbx) 5203; AVX512VL-NEXT: movw %bp, (%rbx) 5204; AVX512VL-NEXT: movw %r15w, 6(%rbx) 5205; AVX512VL-NEXT: movw %r14w, 2(%rbx) 5206; AVX512VL-NEXT: addq $88, %rsp 5207; AVX512VL-NEXT: popq %rbx 5208; AVX512VL-NEXT: popq %r14 5209; AVX512VL-NEXT: popq %r15 5210; AVX512VL-NEXT: popq %rbp 5211; AVX512VL-NEXT: retq 5212 %1 = fptrunc <4 x double> %a0 to <4 x half> 5213 %2 = bitcast <4 x half> %1 to <4 x i16> 5214 store <4 x i16> %2, <4 x i16>* %a1 5215 ret void 5216} 5217 5218define void @store_cvt_4f64_to_8i16_undef(<4 x double> %a0, <8 x i16>* %a1) nounwind { 5219; AVX1-LABEL: store_cvt_4f64_to_8i16_undef: 5220; AVX1: # BB#0: 5221; AVX1-NEXT: pushq %rbp 5222; AVX1-NEXT: pushq %r14 5223; AVX1-NEXT: pushq %rbx 5224; AVX1-NEXT: subq $32, %rsp 5225; AVX1-NEXT: movq %rdi, %r14 5226; AVX1-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 5227; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5228; AVX1-NEXT: vzeroupper 5229; AVX1-NEXT: callq __truncdfhf2 5230; AVX1-NEXT: movw %ax, %bp 5231; AVX1-NEXT: shll $16, %ebp 5232; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 5233; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 5234; AVX1-NEXT: vzeroupper 5235; AVX1-NEXT: callq __truncdfhf2 5236; AVX1-NEXT: movzwl %ax, %ebx 5237; AVX1-NEXT: orl %ebp, %ebx 5238; AVX1-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload 5239; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 5240; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill 5241; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5242; AVX1-NEXT: vzeroupper 5243; AVX1-NEXT: callq __truncdfhf2 5244; AVX1-NEXT: movw %ax, %bp 5245; AVX1-NEXT: shll $16, %ebp 5246; AVX1-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 5247; AVX1-NEXT: callq __truncdfhf2 5248; AVX1-NEXT: movzwl %ax, %eax 5249; AVX1-NEXT: orl %ebp, %eax 5250; AVX1-NEXT: shlq $32, %rax 5251; AVX1-NEXT: orq %rbx, %rax 5252; AVX1-NEXT: vmovq %rax, %xmm0 5253; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 5254; AVX1-NEXT: vmovdqa %xmm0, (%r14) 5255; AVX1-NEXT: addq $32, %rsp 5256; AVX1-NEXT: popq %rbx 5257; AVX1-NEXT: popq %r14 5258; AVX1-NEXT: popq %rbp 5259; AVX1-NEXT: retq 5260; 5261; AVX2-LABEL: store_cvt_4f64_to_8i16_undef: 5262; AVX2: # BB#0: 5263; AVX2-NEXT: pushq %rbp 5264; AVX2-NEXT: pushq %r14 5265; AVX2-NEXT: pushq %rbx 5266; AVX2-NEXT: subq $32, %rsp 5267; AVX2-NEXT: movq %rdi, %r14 5268; AVX2-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 5269; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5270; AVX2-NEXT: vzeroupper 5271; AVX2-NEXT: callq __truncdfhf2 5272; AVX2-NEXT: movw %ax, %bp 5273; AVX2-NEXT: shll $16, %ebp 5274; AVX2-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 5275; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 5276; AVX2-NEXT: vzeroupper 5277; AVX2-NEXT: callq __truncdfhf2 5278; AVX2-NEXT: movzwl %ax, %ebx 5279; AVX2-NEXT: orl %ebp, %ebx 5280; AVX2-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload 5281; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 5282; AVX2-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill 5283; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5284; AVX2-NEXT: vzeroupper 5285; AVX2-NEXT: callq __truncdfhf2 5286; AVX2-NEXT: movw %ax, %bp 5287; AVX2-NEXT: shll $16, %ebp 5288; AVX2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 5289; AVX2-NEXT: callq __truncdfhf2 5290; AVX2-NEXT: movzwl %ax, %eax 5291; AVX2-NEXT: orl %ebp, %eax 5292; AVX2-NEXT: shlq $32, %rax 5293; AVX2-NEXT: orq %rbx, %rax 5294; AVX2-NEXT: vmovq %rax, %xmm0 5295; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 5296; AVX2-NEXT: vmovdqa %xmm0, (%r14) 5297; AVX2-NEXT: addq $32, %rsp 5298; AVX2-NEXT: popq %rbx 5299; AVX2-NEXT: popq %r14 5300; AVX2-NEXT: popq %rbp 5301; AVX2-NEXT: retq 5302; 5303; AVX512F-LABEL: store_cvt_4f64_to_8i16_undef: 5304; AVX512F: # BB#0: 5305; AVX512F-NEXT: pushq %rbp 5306; AVX512F-NEXT: pushq %r14 5307; AVX512F-NEXT: pushq %rbx 5308; AVX512F-NEXT: subq $32, %rsp 5309; AVX512F-NEXT: movq %rdi, %r14 5310; AVX512F-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 5311; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5312; AVX512F-NEXT: callq __truncdfhf2 5313; AVX512F-NEXT: movw %ax, %bp 5314; AVX512F-NEXT: shll $16, %ebp 5315; AVX512F-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 5316; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 5317; AVX512F-NEXT: callq __truncdfhf2 5318; AVX512F-NEXT: movzwl %ax, %ebx 5319; AVX512F-NEXT: orl %ebp, %ebx 5320; AVX512F-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload 5321; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 5322; AVX512F-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill 5323; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5324; AVX512F-NEXT: callq __truncdfhf2 5325; AVX512F-NEXT: movw %ax, %bp 5326; AVX512F-NEXT: shll $16, %ebp 5327; AVX512F-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 5328; AVX512F-NEXT: callq __truncdfhf2 5329; AVX512F-NEXT: movzwl %ax, %eax 5330; AVX512F-NEXT: orl %ebp, %eax 5331; AVX512F-NEXT: shlq $32, %rax 5332; AVX512F-NEXT: orq %rbx, %rax 5333; AVX512F-NEXT: vmovq %rax, %xmm0 5334; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13,8,9,12,13,12,13,14,15] 5335; AVX512F-NEXT: vmovdqa %xmm0, (%r14) 5336; AVX512F-NEXT: addq $32, %rsp 5337; AVX512F-NEXT: popq %rbx 5338; AVX512F-NEXT: popq %r14 5339; AVX512F-NEXT: popq %rbp 5340; AVX512F-NEXT: retq 5341; 5342; AVX512VL-LABEL: store_cvt_4f64_to_8i16_undef: 5343; AVX512VL: # BB#0: 5344; AVX512VL-NEXT: pushq %rbp 5345; AVX512VL-NEXT: pushq %r14 5346; AVX512VL-NEXT: pushq %rbx 5347; AVX512VL-NEXT: subq $32, %rsp 5348; AVX512VL-NEXT: movq %rdi, %r14 5349; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 5350; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5351; AVX512VL-NEXT: callq __truncdfhf2 5352; AVX512VL-NEXT: movw %ax, %bp 5353; AVX512VL-NEXT: shll $16, %ebp 5354; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 5355; AVX512VL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 5356; AVX512VL-NEXT: callq __truncdfhf2 5357; AVX512VL-NEXT: movzwl %ax, %ebx 5358; AVX512VL-NEXT: orl %ebp, %ebx 5359; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 5360; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 5361; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 5362; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5363; AVX512VL-NEXT: callq __truncdfhf2 5364; AVX512VL-NEXT: movw %ax, %bp 5365; AVX512VL-NEXT: shll $16, %ebp 5366; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 5367; AVX512VL-NEXT: callq __truncdfhf2 5368; AVX512VL-NEXT: movzwl %ax, %eax 5369; AVX512VL-NEXT: orl %ebp, %eax 5370; AVX512VL-NEXT: shlq $32, %rax 5371; AVX512VL-NEXT: orq %rbx, %rax 5372; AVX512VL-NEXT: vmovq %rax, %xmm0 5373; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 5374; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 5375; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,2,3] 5376; AVX512VL-NEXT: vmovdqa %xmm0, (%r14) 5377; AVX512VL-NEXT: addq $32, %rsp 5378; AVX512VL-NEXT: popq %rbx 5379; AVX512VL-NEXT: popq %r14 5380; AVX512VL-NEXT: popq %rbp 5381; AVX512VL-NEXT: retq 5382 %1 = fptrunc <4 x double> %a0 to <4 x half> 5383 %2 = bitcast <4 x half> %1 to <4 x i16> 5384 %3 = shufflevector <4 x i16> %2, <4 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 5385 store <8 x i16> %3, <8 x i16>* %a1 5386 ret void 5387} 5388 5389define void @store_cvt_4f64_to_8i16_zero(<4 x double> %a0, <8 x i16>* %a1) nounwind { 5390; AVX1-LABEL: store_cvt_4f64_to_8i16_zero: 5391; AVX1: # BB#0: 5392; AVX1-NEXT: pushq %rbp 5393; AVX1-NEXT: pushq %r14 5394; AVX1-NEXT: pushq %rbx 5395; AVX1-NEXT: subq $32, %rsp 5396; AVX1-NEXT: movq %rdi, %r14 5397; AVX1-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 5398; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5399; AVX1-NEXT: vzeroupper 5400; AVX1-NEXT: callq __truncdfhf2 5401; AVX1-NEXT: movw %ax, %bp 5402; AVX1-NEXT: shll $16, %ebp 5403; AVX1-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 5404; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 5405; AVX1-NEXT: vzeroupper 5406; AVX1-NEXT: callq __truncdfhf2 5407; AVX1-NEXT: movzwl %ax, %ebx 5408; AVX1-NEXT: orl %ebp, %ebx 5409; AVX1-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload 5410; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 5411; AVX1-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill 5412; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5413; AVX1-NEXT: vzeroupper 5414; AVX1-NEXT: callq __truncdfhf2 5415; AVX1-NEXT: movw %ax, %bp 5416; AVX1-NEXT: shll $16, %ebp 5417; AVX1-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 5418; AVX1-NEXT: callq __truncdfhf2 5419; AVX1-NEXT: movzwl %ax, %eax 5420; AVX1-NEXT: orl %ebp, %eax 5421; AVX1-NEXT: shlq $32, %rax 5422; AVX1-NEXT: orq %rbx, %rax 5423; AVX1-NEXT: vmovq %rax, %xmm0 5424; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero 5425; AVX1-NEXT: vmovdqa %xmm0, (%r14) 5426; AVX1-NEXT: addq $32, %rsp 5427; AVX1-NEXT: popq %rbx 5428; AVX1-NEXT: popq %r14 5429; AVX1-NEXT: popq %rbp 5430; AVX1-NEXT: retq 5431; 5432; AVX2-LABEL: store_cvt_4f64_to_8i16_zero: 5433; AVX2: # BB#0: 5434; AVX2-NEXT: pushq %rbp 5435; AVX2-NEXT: pushq %r14 5436; AVX2-NEXT: pushq %rbx 5437; AVX2-NEXT: subq $32, %rsp 5438; AVX2-NEXT: movq %rdi, %r14 5439; AVX2-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 5440; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5441; AVX2-NEXT: vzeroupper 5442; AVX2-NEXT: callq __truncdfhf2 5443; AVX2-NEXT: movw %ax, %bp 5444; AVX2-NEXT: shll $16, %ebp 5445; AVX2-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 5446; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 5447; AVX2-NEXT: vzeroupper 5448; AVX2-NEXT: callq __truncdfhf2 5449; AVX2-NEXT: movzwl %ax, %ebx 5450; AVX2-NEXT: orl %ebp, %ebx 5451; AVX2-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload 5452; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 5453; AVX2-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill 5454; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5455; AVX2-NEXT: vzeroupper 5456; AVX2-NEXT: callq __truncdfhf2 5457; AVX2-NEXT: movw %ax, %bp 5458; AVX2-NEXT: shll $16, %ebp 5459; AVX2-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 5460; AVX2-NEXT: callq __truncdfhf2 5461; AVX2-NEXT: movzwl %ax, %eax 5462; AVX2-NEXT: orl %ebp, %eax 5463; AVX2-NEXT: shlq $32, %rax 5464; AVX2-NEXT: orq %rbx, %rax 5465; AVX2-NEXT: vmovq %rax, %xmm0 5466; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero 5467; AVX2-NEXT: vmovdqa %xmm0, (%r14) 5468; AVX2-NEXT: addq $32, %rsp 5469; AVX2-NEXT: popq %rbx 5470; AVX2-NEXT: popq %r14 5471; AVX2-NEXT: popq %rbp 5472; AVX2-NEXT: retq 5473; 5474; AVX512F-LABEL: store_cvt_4f64_to_8i16_zero: 5475; AVX512F: # BB#0: 5476; AVX512F-NEXT: pushq %rbp 5477; AVX512F-NEXT: pushq %r14 5478; AVX512F-NEXT: pushq %rbx 5479; AVX512F-NEXT: subq $32, %rsp 5480; AVX512F-NEXT: movq %rdi, %r14 5481; AVX512F-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 5482; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5483; AVX512F-NEXT: callq __truncdfhf2 5484; AVX512F-NEXT: movw %ax, %bp 5485; AVX512F-NEXT: shll $16, %ebp 5486; AVX512F-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 5487; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 5488; AVX512F-NEXT: callq __truncdfhf2 5489; AVX512F-NEXT: movzwl %ax, %ebx 5490; AVX512F-NEXT: orl %ebp, %ebx 5491; AVX512F-NEXT: vmovupd (%rsp), %ymm0 # 32-byte Reload 5492; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 5493; AVX512F-NEXT: vmovapd %xmm0, (%rsp) # 16-byte Spill 5494; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5495; AVX512F-NEXT: callq __truncdfhf2 5496; AVX512F-NEXT: movw %ax, %bp 5497; AVX512F-NEXT: shll $16, %ebp 5498; AVX512F-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 5499; AVX512F-NEXT: callq __truncdfhf2 5500; AVX512F-NEXT: movzwl %ax, %eax 5501; AVX512F-NEXT: orl %ebp, %eax 5502; AVX512F-NEXT: shlq $32, %rax 5503; AVX512F-NEXT: orq %rbx, %rax 5504; AVX512F-NEXT: vmovq %rax, %xmm0 5505; AVX512F-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,1,4,5,8,9,12,13],zero,zero,zero,zero,zero,zero,zero,zero 5506; AVX512F-NEXT: vmovdqa %xmm0, (%r14) 5507; AVX512F-NEXT: addq $32, %rsp 5508; AVX512F-NEXT: popq %rbx 5509; AVX512F-NEXT: popq %r14 5510; AVX512F-NEXT: popq %rbp 5511; AVX512F-NEXT: retq 5512; 5513; AVX512VL-LABEL: store_cvt_4f64_to_8i16_zero: 5514; AVX512VL: # BB#0: 5515; AVX512VL-NEXT: pushq %rbp 5516; AVX512VL-NEXT: pushq %r14 5517; AVX512VL-NEXT: pushq %rbx 5518; AVX512VL-NEXT: subq $32, %rsp 5519; AVX512VL-NEXT: movq %rdi, %r14 5520; AVX512VL-NEXT: vmovupd %ymm0, (%rsp) # 32-byte Spill 5521; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5522; AVX512VL-NEXT: callq __truncdfhf2 5523; AVX512VL-NEXT: movw %ax, %bp 5524; AVX512VL-NEXT: shll $16, %ebp 5525; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 5526; AVX512VL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 5527; AVX512VL-NEXT: callq __truncdfhf2 5528; AVX512VL-NEXT: movzwl %ax, %ebx 5529; AVX512VL-NEXT: orl %ebp, %ebx 5530; AVX512VL-NEXT: vmovups (%rsp), %ymm0 # 32-byte Reload 5531; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 5532; AVX512VL-NEXT: vmovaps %xmm0, (%rsp) # 16-byte Spill 5533; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5534; AVX512VL-NEXT: callq __truncdfhf2 5535; AVX512VL-NEXT: movw %ax, %bp 5536; AVX512VL-NEXT: shll $16, %ebp 5537; AVX512VL-NEXT: vmovaps (%rsp), %xmm0 # 16-byte Reload 5538; AVX512VL-NEXT: callq __truncdfhf2 5539; AVX512VL-NEXT: movzwl %ax, %eax 5540; AVX512VL-NEXT: orl %ebp, %eax 5541; AVX512VL-NEXT: shlq $32, %rax 5542; AVX512VL-NEXT: orq %rbx, %rax 5543; AVX512VL-NEXT: vmovq %rax, %xmm0 5544; AVX512VL-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,2,2,3,4,5,6,7] 5545; AVX512VL-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,4,6,6,7] 5546; AVX512VL-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,1,0,2] 5547; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 5548; AVX512VL-NEXT: vpunpckhqdq {{.*#+}} xmm0 = xmm0[1],xmm1[1] 5549; AVX512VL-NEXT: vmovdqa %xmm0, (%r14) 5550; AVX512VL-NEXT: addq $32, %rsp 5551; AVX512VL-NEXT: popq %rbx 5552; AVX512VL-NEXT: popq %r14 5553; AVX512VL-NEXT: popq %rbp 5554; AVX512VL-NEXT: retq 5555 %1 = fptrunc <4 x double> %a0 to <4 x half> 5556 %2 = bitcast <4 x half> %1 to <4 x i16> 5557 %3 = shufflevector <4 x i16> %2, <4 x i16> zeroinitializer, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 5558 store <8 x i16> %3, <8 x i16>* %a1 5559 ret void 5560} 5561 5562define void @store_cvt_8f64_to_8i16(<8 x double> %a0, <8 x i16>* %a1) nounwind { 5563; AVX1-LABEL: store_cvt_8f64_to_8i16: 5564; AVX1: # BB#0: 5565; AVX1-NEXT: pushq %rbp 5566; AVX1-NEXT: pushq %r15 5567; AVX1-NEXT: pushq %r14 5568; AVX1-NEXT: pushq %r13 5569; AVX1-NEXT: pushq %r12 5570; AVX1-NEXT: pushq %rbx 5571; AVX1-NEXT: subq $136, %rsp 5572; AVX1-NEXT: movq %rdi, %rbx 5573; AVX1-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) # 32-byte Spill 5574; AVX1-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill 5575; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5576; AVX1-NEXT: vzeroupper 5577; AVX1-NEXT: callq __truncdfhf2 5578; AVX1-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill 5579; AVX1-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload 5580; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 5581; AVX1-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill 5582; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5583; AVX1-NEXT: vzeroupper 5584; AVX1-NEXT: callq __truncdfhf2 5585; AVX1-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill 5586; AVX1-NEXT: vpermilpd $1, {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload 5587; AVX1-NEXT: # xmm0 = mem[1,0] 5588; AVX1-NEXT: callq __truncdfhf2 5589; AVX1-NEXT: movl %eax, %r12d 5590; AVX1-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload 5591; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 5592; AVX1-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill 5593; AVX1-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5594; AVX1-NEXT: vzeroupper 5595; AVX1-NEXT: callq __truncdfhf2 5596; AVX1-NEXT: movl %eax, %r13d 5597; AVX1-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload 5598; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 5599; AVX1-NEXT: vzeroupper 5600; AVX1-NEXT: callq __truncdfhf2 5601; AVX1-NEXT: movl %eax, %ebp 5602; AVX1-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload 5603; AVX1-NEXT: callq __truncdfhf2 5604; AVX1-NEXT: movl %eax, %r14d 5605; AVX1-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload 5606; AVX1-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 5607; AVX1-NEXT: vzeroupper 5608; AVX1-NEXT: callq __truncdfhf2 5609; AVX1-NEXT: movl %eax, %r15d 5610; AVX1-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload 5611; AVX1-NEXT: callq __truncdfhf2 5612; AVX1-NEXT: movw %ax, 12(%rbx) 5613; AVX1-NEXT: movw %r15w, 8(%rbx) 5614; AVX1-NEXT: movw %r14w, 4(%rbx) 5615; AVX1-NEXT: movw %bp, (%rbx) 5616; AVX1-NEXT: movw %r13w, 14(%rbx) 5617; AVX1-NEXT: movw %r12w, 10(%rbx) 5618; AVX1-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # 2-byte Folded Reload 5619; AVX1-NEXT: movw %ax, 6(%rbx) 5620; AVX1-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # 2-byte Folded Reload 5621; AVX1-NEXT: movw %ax, 2(%rbx) 5622; AVX1-NEXT: addq $136, %rsp 5623; AVX1-NEXT: popq %rbx 5624; AVX1-NEXT: popq %r12 5625; AVX1-NEXT: popq %r13 5626; AVX1-NEXT: popq %r14 5627; AVX1-NEXT: popq %r15 5628; AVX1-NEXT: popq %rbp 5629; AVX1-NEXT: retq 5630; 5631; AVX2-LABEL: store_cvt_8f64_to_8i16: 5632; AVX2: # BB#0: 5633; AVX2-NEXT: pushq %rbp 5634; AVX2-NEXT: pushq %r15 5635; AVX2-NEXT: pushq %r14 5636; AVX2-NEXT: pushq %r13 5637; AVX2-NEXT: pushq %r12 5638; AVX2-NEXT: pushq %rbx 5639; AVX2-NEXT: subq $136, %rsp 5640; AVX2-NEXT: movq %rdi, %rbx 5641; AVX2-NEXT: vmovups %ymm1, {{[0-9]+}}(%rsp) # 32-byte Spill 5642; AVX2-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill 5643; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5644; AVX2-NEXT: vzeroupper 5645; AVX2-NEXT: callq __truncdfhf2 5646; AVX2-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill 5647; AVX2-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload 5648; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 5649; AVX2-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill 5650; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5651; AVX2-NEXT: vzeroupper 5652; AVX2-NEXT: callq __truncdfhf2 5653; AVX2-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill 5654; AVX2-NEXT: vpermilpd $1, {{[0-9]+}}(%rsp), %xmm0 # 16-byte Folded Reload 5655; AVX2-NEXT: # xmm0 = mem[1,0] 5656; AVX2-NEXT: callq __truncdfhf2 5657; AVX2-NEXT: movl %eax, %r12d 5658; AVX2-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload 5659; AVX2-NEXT: vextractf128 $1, %ymm0, %xmm0 5660; AVX2-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill 5661; AVX2-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5662; AVX2-NEXT: vzeroupper 5663; AVX2-NEXT: callq __truncdfhf2 5664; AVX2-NEXT: movl %eax, %r13d 5665; AVX2-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload 5666; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 5667; AVX2-NEXT: vzeroupper 5668; AVX2-NEXT: callq __truncdfhf2 5669; AVX2-NEXT: movl %eax, %ebp 5670; AVX2-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload 5671; AVX2-NEXT: callq __truncdfhf2 5672; AVX2-NEXT: movl %eax, %r14d 5673; AVX2-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload 5674; AVX2-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 5675; AVX2-NEXT: vzeroupper 5676; AVX2-NEXT: callq __truncdfhf2 5677; AVX2-NEXT: movl %eax, %r15d 5678; AVX2-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload 5679; AVX2-NEXT: callq __truncdfhf2 5680; AVX2-NEXT: movw %ax, 12(%rbx) 5681; AVX2-NEXT: movw %r15w, 8(%rbx) 5682; AVX2-NEXT: movw %r14w, 4(%rbx) 5683; AVX2-NEXT: movw %bp, (%rbx) 5684; AVX2-NEXT: movw %r13w, 14(%rbx) 5685; AVX2-NEXT: movw %r12w, 10(%rbx) 5686; AVX2-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # 2-byte Folded Reload 5687; AVX2-NEXT: movw %ax, 6(%rbx) 5688; AVX2-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # 2-byte Folded Reload 5689; AVX2-NEXT: movw %ax, 2(%rbx) 5690; AVX2-NEXT: addq $136, %rsp 5691; AVX2-NEXT: popq %rbx 5692; AVX2-NEXT: popq %r12 5693; AVX2-NEXT: popq %r13 5694; AVX2-NEXT: popq %r14 5695; AVX2-NEXT: popq %r15 5696; AVX2-NEXT: popq %rbp 5697; AVX2-NEXT: retq 5698; 5699; AVX512F-LABEL: store_cvt_8f64_to_8i16: 5700; AVX512F: # BB#0: 5701; AVX512F-NEXT: pushq %rbp 5702; AVX512F-NEXT: pushq %r15 5703; AVX512F-NEXT: pushq %r14 5704; AVX512F-NEXT: pushq %r13 5705; AVX512F-NEXT: pushq %r12 5706; AVX512F-NEXT: pushq %rbx 5707; AVX512F-NEXT: subq $200, %rsp 5708; AVX512F-NEXT: movq %rdi, %rbx 5709; AVX512F-NEXT: vmovupd %zmm0, {{[0-9]+}}(%rsp) # 64-byte Spill 5710; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5711; AVX512F-NEXT: callq __truncdfhf2 5712; AVX512F-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill 5713; AVX512F-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload 5714; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 5715; AVX512F-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill 5716; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5717; AVX512F-NEXT: callq __truncdfhf2 5718; AVX512F-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill 5719; AVX512F-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload 5720; AVX512F-NEXT: vextractf64x4 $1, %zmm0, %ymm0 5721; AVX512F-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill 5722; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5723; AVX512F-NEXT: callq __truncdfhf2 5724; AVX512F-NEXT: movl %eax, %r12d 5725; AVX512F-NEXT: vmovupd {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload 5726; AVX512F-NEXT: vextractf128 $1, %ymm0, %xmm0 5727; AVX512F-NEXT: vmovapd %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill 5728; AVX512F-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5729; AVX512F-NEXT: callq __truncdfhf2 5730; AVX512F-NEXT: movl %eax, %r13d 5731; AVX512F-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload 5732; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 5733; AVX512F-NEXT: callq __truncdfhf2 5734; AVX512F-NEXT: movl %eax, %ebp 5735; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload 5736; AVX512F-NEXT: callq __truncdfhf2 5737; AVX512F-NEXT: movl %eax, %r14d 5738; AVX512F-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload 5739; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 5740; AVX512F-NEXT: callq __truncdfhf2 5741; AVX512F-NEXT: movl %eax, %r15d 5742; AVX512F-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload 5743; AVX512F-NEXT: callq __truncdfhf2 5744; AVX512F-NEXT: movw %ax, 12(%rbx) 5745; AVX512F-NEXT: movw %r15w, 8(%rbx) 5746; AVX512F-NEXT: movw %r14w, 4(%rbx) 5747; AVX512F-NEXT: movw %bp, (%rbx) 5748; AVX512F-NEXT: movw %r13w, 14(%rbx) 5749; AVX512F-NEXT: movw %r12w, 10(%rbx) 5750; AVX512F-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # 2-byte Folded Reload 5751; AVX512F-NEXT: movw %ax, 6(%rbx) 5752; AVX512F-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # 2-byte Folded Reload 5753; AVX512F-NEXT: movw %ax, 2(%rbx) 5754; AVX512F-NEXT: addq $200, %rsp 5755; AVX512F-NEXT: popq %rbx 5756; AVX512F-NEXT: popq %r12 5757; AVX512F-NEXT: popq %r13 5758; AVX512F-NEXT: popq %r14 5759; AVX512F-NEXT: popq %r15 5760; AVX512F-NEXT: popq %rbp 5761; AVX512F-NEXT: retq 5762; 5763; AVX512VL-LABEL: store_cvt_8f64_to_8i16: 5764; AVX512VL: # BB#0: 5765; AVX512VL-NEXT: pushq %rbp 5766; AVX512VL-NEXT: pushq %r15 5767; AVX512VL-NEXT: pushq %r14 5768; AVX512VL-NEXT: pushq %r13 5769; AVX512VL-NEXT: pushq %r12 5770; AVX512VL-NEXT: pushq %rbx 5771; AVX512VL-NEXT: subq $200, %rsp 5772; AVX512VL-NEXT: movq %rdi, %rbx 5773; AVX512VL-NEXT: vmovupd %zmm0, {{[0-9]+}}(%rsp) # 64-byte Spill 5774; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5775; AVX512VL-NEXT: callq __truncdfhf2 5776; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill 5777; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload 5778; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 5779; AVX512VL-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill 5780; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5781; AVX512VL-NEXT: callq __truncdfhf2 5782; AVX512VL-NEXT: movw %ax, {{[0-9]+}}(%rsp) # 2-byte Spill 5783; AVX512VL-NEXT: vmovupd {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload 5784; AVX512VL-NEXT: vextractf64x4 $1, %zmm0, %ymm0 5785; AVX512VL-NEXT: vmovupd %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill 5786; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5787; AVX512VL-NEXT: callq __truncdfhf2 5788; AVX512VL-NEXT: movl %eax, %r12d 5789; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload 5790; AVX512VL-NEXT: vextractf128 $1, %ymm0, %xmm0 5791; AVX512VL-NEXT: vmovaps %xmm0, {{[0-9]+}}(%rsp) # 16-byte Spill 5792; AVX512VL-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0] 5793; AVX512VL-NEXT: callq __truncdfhf2 5794; AVX512VL-NEXT: movl %eax, %r13d 5795; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %zmm0 # 64-byte Reload 5796; AVX512VL-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 5797; AVX512VL-NEXT: callq __truncdfhf2 5798; AVX512VL-NEXT: movl %eax, %ebp 5799; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload 5800; AVX512VL-NEXT: callq __truncdfhf2 5801; AVX512VL-NEXT: movl %eax, %r14d 5802; AVX512VL-NEXT: vmovups {{[0-9]+}}(%rsp), %ymm0 # 32-byte Reload 5803; AVX512VL-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 5804; AVX512VL-NEXT: callq __truncdfhf2 5805; AVX512VL-NEXT: movl %eax, %r15d 5806; AVX512VL-NEXT: vmovaps {{[0-9]+}}(%rsp), %xmm0 # 16-byte Reload 5807; AVX512VL-NEXT: callq __truncdfhf2 5808; AVX512VL-NEXT: movw %ax, 12(%rbx) 5809; AVX512VL-NEXT: movw %r15w, 8(%rbx) 5810; AVX512VL-NEXT: movw %r14w, 4(%rbx) 5811; AVX512VL-NEXT: movw %bp, (%rbx) 5812; AVX512VL-NEXT: movw %r13w, 14(%rbx) 5813; AVX512VL-NEXT: movw %r12w, 10(%rbx) 5814; AVX512VL-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # 2-byte Folded Reload 5815; AVX512VL-NEXT: movw %ax, 6(%rbx) 5816; AVX512VL-NEXT: movzwl {{[0-9]+}}(%rsp), %eax # 2-byte Folded Reload 5817; AVX512VL-NEXT: movw %ax, 2(%rbx) 5818; AVX512VL-NEXT: addq $200, %rsp 5819; AVX512VL-NEXT: popq %rbx 5820; AVX512VL-NEXT: popq %r12 5821; AVX512VL-NEXT: popq %r13 5822; AVX512VL-NEXT: popq %r14 5823; AVX512VL-NEXT: popq %r15 5824; AVX512VL-NEXT: popq %rbp 5825; AVX512VL-NEXT: retq 5826 %1 = fptrunc <8 x double> %a0 to <8 x half> 5827 %2 = bitcast <8 x half> %1 to <8 x i16> 5828 store <8 x i16> %2, <8 x i16>* %a1 5829 ret void 5830} 5831