1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefixes=SSE,SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=SSE,SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefixes=AVX,AVX512,AVX512F 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512,AVX512BW 9; 10; Just two 32-bit runs to make sure we do reasonable things there. 11; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=X86-SSE,X86-SSE2 12; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefixes=X86-SSE,X86-SSE41 13 14define <8 x i16> @sext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp { 15; SSE2-LABEL: sext_16i8_to_8i16: 16; SSE2: # %bb.0: # %entry 17; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 18; SSE2-NEXT: psraw $8, %xmm0 19; SSE2-NEXT: retq 20; 21; SSSE3-LABEL: sext_16i8_to_8i16: 22; SSSE3: # %bb.0: # %entry 23; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 24; SSSE3-NEXT: psraw $8, %xmm0 25; SSSE3-NEXT: retq 26; 27; SSE41-LABEL: sext_16i8_to_8i16: 28; SSE41: # %bb.0: # %entry 29; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 30; SSE41-NEXT: retq 31; 32; AVX-LABEL: sext_16i8_to_8i16: 33; AVX: # %bb.0: # %entry 34; AVX-NEXT: vpmovsxbw %xmm0, %xmm0 35; AVX-NEXT: retq 36; 37; X86-SSE2-LABEL: sext_16i8_to_8i16: 38; X86-SSE2: # %bb.0: # %entry 39; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 40; X86-SSE2-NEXT: psraw $8, %xmm0 41; X86-SSE2-NEXT: retl 42; 43; X86-SSE41-LABEL: sext_16i8_to_8i16: 44; X86-SSE41: # %bb.0: # %entry 45; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm0 46; X86-SSE41-NEXT: retl 47entry: 48 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 49 %C = sext <8 x i8> %B to <8 x i16> 50 ret <8 x i16> %C 51} 52 53define <16 x i16> @sext_16i8_to_16i16(<16 x i8> %A) nounwind uwtable readnone ssp { 54; SSE2-LABEL: sext_16i8_to_16i16: 55; SSE2: # %bb.0: # %entry 56; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 57; SSE2-NEXT: psraw $8, %xmm2 58; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 59; SSE2-NEXT: psraw $8, %xmm1 60; SSE2-NEXT: movdqa %xmm2, %xmm0 61; SSE2-NEXT: retq 62; 63; SSSE3-LABEL: sext_16i8_to_16i16: 64; SSSE3: # %bb.0: # %entry 65; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 66; SSSE3-NEXT: psraw $8, %xmm2 67; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 68; SSSE3-NEXT: psraw $8, %xmm1 69; SSSE3-NEXT: movdqa %xmm2, %xmm0 70; SSSE3-NEXT: retq 71; 72; SSE41-LABEL: sext_16i8_to_16i16: 73; SSE41: # %bb.0: # %entry 74; SSE41-NEXT: pmovsxbw %xmm0, %xmm2 75; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 76; SSE41-NEXT: pmovsxbw %xmm0, %xmm1 77; SSE41-NEXT: movdqa %xmm2, %xmm0 78; SSE41-NEXT: retq 79; 80; AVX1-LABEL: sext_16i8_to_16i16: 81; AVX1: # %bb.0: # %entry 82; AVX1-NEXT: vpmovsxbw %xmm0, %xmm1 83; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 84; AVX1-NEXT: vpmovsxbw %xmm0, %xmm0 85; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 86; AVX1-NEXT: retq 87; 88; AVX2-LABEL: sext_16i8_to_16i16: 89; AVX2: # %bb.0: # %entry 90; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 91; AVX2-NEXT: retq 92; 93; AVX512-LABEL: sext_16i8_to_16i16: 94; AVX512: # %bb.0: # %entry 95; AVX512-NEXT: vpmovsxbw %xmm0, %ymm0 96; AVX512-NEXT: retq 97; 98; X86-SSE2-LABEL: sext_16i8_to_16i16: 99; X86-SSE2: # %bb.0: # %entry 100; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 101; X86-SSE2-NEXT: psraw $8, %xmm2 102; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 103; X86-SSE2-NEXT: psraw $8, %xmm1 104; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 105; X86-SSE2-NEXT: retl 106; 107; X86-SSE41-LABEL: sext_16i8_to_16i16: 108; X86-SSE41: # %bb.0: # %entry 109; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm2 110; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 111; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm1 112; X86-SSE41-NEXT: movdqa %xmm2, %xmm0 113; X86-SSE41-NEXT: retl 114entry: 115 %B = sext <16 x i8> %A to <16 x i16> 116 ret <16 x i16> %B 117} 118 119define <32 x i16> @sext_32i8_to_32i16(<32 x i8> %A) nounwind uwtable readnone ssp { 120; SSE2-LABEL: sext_32i8_to_32i16: 121; SSE2: # %bb.0: # %entry 122; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7] 123; SSE2-NEXT: psraw $8, %xmm4 124; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15] 125; SSE2-NEXT: psraw $8, %xmm5 126; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 127; SSE2-NEXT: psraw $8, %xmm2 128; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15] 129; SSE2-NEXT: psraw $8, %xmm3 130; SSE2-NEXT: movdqa %xmm4, %xmm0 131; SSE2-NEXT: movdqa %xmm5, %xmm1 132; SSE2-NEXT: retq 133; 134; SSSE3-LABEL: sext_32i8_to_32i16: 135; SSSE3: # %bb.0: # %entry 136; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7] 137; SSSE3-NEXT: psraw $8, %xmm4 138; SSSE3-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15] 139; SSSE3-NEXT: psraw $8, %xmm5 140; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 141; SSSE3-NEXT: psraw $8, %xmm2 142; SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15] 143; SSSE3-NEXT: psraw $8, %xmm3 144; SSSE3-NEXT: movdqa %xmm4, %xmm0 145; SSSE3-NEXT: movdqa %xmm5, %xmm1 146; SSSE3-NEXT: retq 147; 148; SSE41-LABEL: sext_32i8_to_32i16: 149; SSE41: # %bb.0: # %entry 150; SSE41-NEXT: pmovsxbw %xmm0, %xmm5 151; SSE41-NEXT: pmovsxbw %xmm1, %xmm2 152; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 153; SSE41-NEXT: pmovsxbw %xmm0, %xmm4 154; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 155; SSE41-NEXT: pmovsxbw %xmm0, %xmm3 156; SSE41-NEXT: movdqa %xmm5, %xmm0 157; SSE41-NEXT: movdqa %xmm4, %xmm1 158; SSE41-NEXT: retq 159; 160; AVX1-LABEL: sext_32i8_to_32i16: 161; AVX1: # %bb.0: # %entry 162; AVX1-NEXT: vpmovsxbw %xmm0, %xmm1 163; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 164; AVX1-NEXT: vpmovsxbw %xmm2, %xmm2 165; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 166; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 167; AVX1-NEXT: vpmovsxbw %xmm0, %xmm1 168; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 169; AVX1-NEXT: vpmovsxbw %xmm0, %xmm0 170; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 171; AVX1-NEXT: vmovaps %ymm2, %ymm0 172; AVX1-NEXT: retq 173; 174; AVX2-LABEL: sext_32i8_to_32i16: 175; AVX2: # %bb.0: # %entry 176; AVX2-NEXT: vpmovsxbw %xmm0, %ymm2 177; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 178; AVX2-NEXT: vpmovsxbw %xmm0, %ymm1 179; AVX2-NEXT: vmovdqa %ymm2, %ymm0 180; AVX2-NEXT: retq 181; 182; AVX512F-LABEL: sext_32i8_to_32i16: 183; AVX512F: # %bb.0: # %entry 184; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm1 185; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 186; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm0 187; AVX512F-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm0 188; AVX512F-NEXT: retq 189; 190; AVX512BW-LABEL: sext_32i8_to_32i16: 191; AVX512BW: # %bb.0: # %entry 192; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0 193; AVX512BW-NEXT: retq 194; 195; X86-SSE2-LABEL: sext_32i8_to_32i16: 196; X86-SSE2: # %bb.0: # %entry 197; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7] 198; X86-SSE2-NEXT: psraw $8, %xmm4 199; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15] 200; X86-SSE2-NEXT: psraw $8, %xmm5 201; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 202; X86-SSE2-NEXT: psraw $8, %xmm2 203; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15] 204; X86-SSE2-NEXT: psraw $8, %xmm3 205; X86-SSE2-NEXT: movdqa %xmm4, %xmm0 206; X86-SSE2-NEXT: movdqa %xmm5, %xmm1 207; X86-SSE2-NEXT: retl 208; 209; X86-SSE41-LABEL: sext_32i8_to_32i16: 210; X86-SSE41: # %bb.0: # %entry 211; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm5 212; X86-SSE41-NEXT: pmovsxbw %xmm1, %xmm2 213; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 214; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm4 215; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 216; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm3 217; X86-SSE41-NEXT: movdqa %xmm5, %xmm0 218; X86-SSE41-NEXT: movdqa %xmm4, %xmm1 219; X86-SSE41-NEXT: retl 220entry: 221 %B = sext <32 x i8> %A to <32 x i16> 222 ret <32 x i16> %B 223} 224 225define <4 x i32> @sext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp { 226; SSE2-LABEL: sext_16i8_to_4i32: 227; SSE2: # %bb.0: # %entry 228; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 229; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 230; SSE2-NEXT: psrad $24, %xmm0 231; SSE2-NEXT: retq 232; 233; SSSE3-LABEL: sext_16i8_to_4i32: 234; SSSE3: # %bb.0: # %entry 235; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 236; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 237; SSSE3-NEXT: psrad $24, %xmm0 238; SSSE3-NEXT: retq 239; 240; SSE41-LABEL: sext_16i8_to_4i32: 241; SSE41: # %bb.0: # %entry 242; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 243; SSE41-NEXT: retq 244; 245; AVX-LABEL: sext_16i8_to_4i32: 246; AVX: # %bb.0: # %entry 247; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 248; AVX-NEXT: retq 249; 250; X86-SSE2-LABEL: sext_16i8_to_4i32: 251; X86-SSE2: # %bb.0: # %entry 252; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 253; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 254; X86-SSE2-NEXT: psrad $24, %xmm0 255; X86-SSE2-NEXT: retl 256; 257; X86-SSE41-LABEL: sext_16i8_to_4i32: 258; X86-SSE41: # %bb.0: # %entry 259; X86-SSE41-NEXT: pmovsxbd %xmm0, %xmm0 260; X86-SSE41-NEXT: retl 261entry: 262 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 263 %C = sext <4 x i8> %B to <4 x i32> 264 ret <4 x i32> %C 265} 266 267define <8 x i32> @sext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp { 268; SSE2-LABEL: sext_16i8_to_8i32: 269; SSE2: # %bb.0: # %entry 270; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 271; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 272; SSE2-NEXT: psrad $24, %xmm0 273; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 274; SSE2-NEXT: psrad $24, %xmm1 275; SSE2-NEXT: retq 276; 277; SSSE3-LABEL: sext_16i8_to_8i32: 278; SSSE3: # %bb.0: # %entry 279; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 280; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 281; SSSE3-NEXT: psrad $24, %xmm0 282; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 283; SSSE3-NEXT: psrad $24, %xmm1 284; SSSE3-NEXT: retq 285; 286; SSE41-LABEL: sext_16i8_to_8i32: 287; SSE41: # %bb.0: # %entry 288; SSE41-NEXT: pmovsxbd %xmm0, %xmm2 289; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 290; SSE41-NEXT: pmovsxbd %xmm0, %xmm1 291; SSE41-NEXT: movdqa %xmm2, %xmm0 292; SSE41-NEXT: retq 293; 294; AVX1-LABEL: sext_16i8_to_8i32: 295; AVX1: # %bb.0: # %entry 296; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1 297; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 298; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 299; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 300; AVX1-NEXT: retq 301; 302; AVX2-LABEL: sext_16i8_to_8i32: 303; AVX2: # %bb.0: # %entry 304; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 305; AVX2-NEXT: retq 306; 307; AVX512-LABEL: sext_16i8_to_8i32: 308; AVX512: # %bb.0: # %entry 309; AVX512-NEXT: vpmovsxbd %xmm0, %ymm0 310; AVX512-NEXT: retq 311; 312; X86-SSE2-LABEL: sext_16i8_to_8i32: 313; X86-SSE2: # %bb.0: # %entry 314; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 315; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 316; X86-SSE2-NEXT: psrad $24, %xmm0 317; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 318; X86-SSE2-NEXT: psrad $24, %xmm1 319; X86-SSE2-NEXT: retl 320; 321; X86-SSE41-LABEL: sext_16i8_to_8i32: 322; X86-SSE41: # %bb.0: # %entry 323; X86-SSE41-NEXT: pmovsxbd %xmm0, %xmm2 324; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 325; X86-SSE41-NEXT: pmovsxbd %xmm0, %xmm1 326; X86-SSE41-NEXT: movdqa %xmm2, %xmm0 327; X86-SSE41-NEXT: retl 328entry: 329 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 330 %C = sext <8 x i8> %B to <8 x i32> 331 ret <8 x i32> %C 332} 333 334define <16 x i32> @sext_16i8_to_16i32(<16 x i8> %A) nounwind uwtable readnone ssp { 335; SSE2-LABEL: sext_16i8_to_16i32: 336; SSE2: # %bb.0: # %entry 337; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 338; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3] 339; SSE2-NEXT: psrad $24, %xmm4 340; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 341; SSE2-NEXT: psrad $24, %xmm1 342; SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 343; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 344; SSE2-NEXT: psrad $24, %xmm2 345; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 346; SSE2-NEXT: psrad $24, %xmm3 347; SSE2-NEXT: movdqa %xmm4, %xmm0 348; SSE2-NEXT: retq 349; 350; SSSE3-LABEL: sext_16i8_to_16i32: 351; SSSE3: # %bb.0: # %entry 352; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 353; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3] 354; SSSE3-NEXT: psrad $24, %xmm4 355; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 356; SSSE3-NEXT: psrad $24, %xmm1 357; SSSE3-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 358; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 359; SSSE3-NEXT: psrad $24, %xmm2 360; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 361; SSSE3-NEXT: psrad $24, %xmm3 362; SSSE3-NEXT: movdqa %xmm4, %xmm0 363; SSSE3-NEXT: retq 364; 365; SSE41-LABEL: sext_16i8_to_16i32: 366; SSE41: # %bb.0: # %entry 367; SSE41-NEXT: pmovsxbd %xmm0, %xmm4 368; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 369; SSE41-NEXT: pmovsxbd %xmm1, %xmm1 370; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 371; SSE41-NEXT: pmovsxbd %xmm2, %xmm2 372; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 373; SSE41-NEXT: pmovsxbd %xmm0, %xmm3 374; SSE41-NEXT: movdqa %xmm4, %xmm0 375; SSE41-NEXT: retq 376; 377; AVX1-LABEL: sext_16i8_to_16i32: 378; AVX1: # %bb.0: # %entry 379; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1 380; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] 381; AVX1-NEXT: vpmovsxbd %xmm2, %xmm2 382; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 383; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 384; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1 385; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 386; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 387; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 388; AVX1-NEXT: vmovaps %ymm2, %ymm0 389; AVX1-NEXT: retq 390; 391; AVX2-LABEL: sext_16i8_to_16i32: 392; AVX2: # %bb.0: # %entry 393; AVX2-NEXT: vpmovsxbd %xmm0, %ymm2 394; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 395; AVX2-NEXT: vpmovsxbd %xmm0, %ymm1 396; AVX2-NEXT: vmovdqa %ymm2, %ymm0 397; AVX2-NEXT: retq 398; 399; AVX512-LABEL: sext_16i8_to_16i32: 400; AVX512: # %bb.0: # %entry 401; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 402; AVX512-NEXT: retq 403; 404; X86-SSE2-LABEL: sext_16i8_to_16i32: 405; X86-SSE2: # %bb.0: # %entry 406; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 407; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3] 408; X86-SSE2-NEXT: psrad $24, %xmm4 409; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 410; X86-SSE2-NEXT: psrad $24, %xmm1 411; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm0 = xmm0[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 412; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 413; X86-SSE2-NEXT: psrad $24, %xmm2 414; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 415; X86-SSE2-NEXT: psrad $24, %xmm3 416; X86-SSE2-NEXT: movdqa %xmm4, %xmm0 417; X86-SSE2-NEXT: retl 418; 419; X86-SSE41-LABEL: sext_16i8_to_16i32: 420; X86-SSE41: # %bb.0: # %entry 421; X86-SSE41-NEXT: pmovsxbd %xmm0, %xmm4 422; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 423; X86-SSE41-NEXT: pmovsxbd %xmm1, %xmm1 424; X86-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 425; X86-SSE41-NEXT: pmovsxbd %xmm2, %xmm2 426; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 427; X86-SSE41-NEXT: pmovsxbd %xmm0, %xmm3 428; X86-SSE41-NEXT: movdqa %xmm4, %xmm0 429; X86-SSE41-NEXT: retl 430entry: 431 %B = sext <16 x i8> %A to <16 x i32> 432 ret <16 x i32> %B 433} 434 435define <2 x i64> @sext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp { 436; SSE2-LABEL: sext_16i8_to_2i64: 437; SSE2: # %bb.0: # %entry 438; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 439; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 440; SSE2-NEXT: pxor %xmm1, %xmm1 441; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 442; SSE2-NEXT: psrad $24, %xmm0 443; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 444; SSE2-NEXT: retq 445; 446; SSSE3-LABEL: sext_16i8_to_2i64: 447; SSSE3: # %bb.0: # %entry 448; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 449; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 450; SSSE3-NEXT: pxor %xmm1, %xmm1 451; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 452; SSSE3-NEXT: psrad $24, %xmm0 453; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 454; SSSE3-NEXT: retq 455; 456; SSE41-LABEL: sext_16i8_to_2i64: 457; SSE41: # %bb.0: # %entry 458; SSE41-NEXT: pmovsxbq %xmm0, %xmm0 459; SSE41-NEXT: retq 460; 461; AVX-LABEL: sext_16i8_to_2i64: 462; AVX: # %bb.0: # %entry 463; AVX-NEXT: vpmovsxbq %xmm0, %xmm0 464; AVX-NEXT: retq 465; 466; X86-SSE2-LABEL: sext_16i8_to_2i64: 467; X86-SSE2: # %bb.0: # %entry 468; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 469; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 470; X86-SSE2-NEXT: pxor %xmm1, %xmm1 471; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 472; X86-SSE2-NEXT: psrad $24, %xmm0 473; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 474; X86-SSE2-NEXT: retl 475; 476; X86-SSE41-LABEL: sext_16i8_to_2i64: 477; X86-SSE41: # %bb.0: # %entry 478; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm0 479; X86-SSE41-NEXT: retl 480entry: 481 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 482 %C = sext <2 x i8> %B to <2 x i64> 483 ret <2 x i64> %C 484} 485 486define <4 x i64> @sext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp { 487; SSE2-LABEL: sext_16i8_to_4i64: 488; SSE2: # %bb.0: # %entry 489; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 490; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 491; SSE2-NEXT: psrad $24, %xmm1 492; SSE2-NEXT: pxor %xmm2, %xmm2 493; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 494; SSE2-NEXT: movdqa %xmm1, %xmm0 495; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 496; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 497; SSE2-NEXT: retq 498; 499; SSSE3-LABEL: sext_16i8_to_4i64: 500; SSSE3: # %bb.0: # %entry 501; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 502; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 503; SSSE3-NEXT: psrad $24, %xmm1 504; SSSE3-NEXT: pxor %xmm2, %xmm2 505; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 506; SSSE3-NEXT: movdqa %xmm1, %xmm0 507; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 508; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 509; SSSE3-NEXT: retq 510; 511; SSE41-LABEL: sext_16i8_to_4i64: 512; SSE41: # %bb.0: # %entry 513; SSE41-NEXT: pmovsxbq %xmm0, %xmm2 514; SSE41-NEXT: psrld $16, %xmm0 515; SSE41-NEXT: pmovsxbq %xmm0, %xmm1 516; SSE41-NEXT: movdqa %xmm2, %xmm0 517; SSE41-NEXT: retq 518; 519; AVX1-LABEL: sext_16i8_to_4i64: 520; AVX1: # %bb.0: # %entry 521; AVX1-NEXT: vpmovsxbq %xmm0, %xmm1 522; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 523; AVX1-NEXT: vpmovsxbq %xmm0, %xmm0 524; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 525; AVX1-NEXT: retq 526; 527; AVX2-LABEL: sext_16i8_to_4i64: 528; AVX2: # %bb.0: # %entry 529; AVX2-NEXT: vpmovsxbq %xmm0, %ymm0 530; AVX2-NEXT: retq 531; 532; AVX512-LABEL: sext_16i8_to_4i64: 533; AVX512: # %bb.0: # %entry 534; AVX512-NEXT: vpmovsxbq %xmm0, %ymm0 535; AVX512-NEXT: retq 536; 537; X86-SSE2-LABEL: sext_16i8_to_4i64: 538; X86-SSE2: # %bb.0: # %entry 539; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 540; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 541; X86-SSE2-NEXT: psrad $24, %xmm1 542; X86-SSE2-NEXT: pxor %xmm2, %xmm2 543; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 544; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 545; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 546; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 547; X86-SSE2-NEXT: retl 548; 549; X86-SSE41-LABEL: sext_16i8_to_4i64: 550; X86-SSE41: # %bb.0: # %entry 551; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm2 552; X86-SSE41-NEXT: psrld $16, %xmm0 553; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm1 554; X86-SSE41-NEXT: movdqa %xmm2, %xmm0 555; X86-SSE41-NEXT: retl 556entry: 557 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 558 %C = sext <4 x i8> %B to <4 x i64> 559 ret <4 x i64> %C 560} 561 562define <8 x i64> @sext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp { 563; SSE2-LABEL: sext_16i8_to_8i64: 564; SSE2: # %bb.0: # %entry 565; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 566; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 567; SSE2-NEXT: psrad $24, %xmm1 568; SSE2-NEXT: pxor %xmm4, %xmm4 569; SSE2-NEXT: pxor %xmm3, %xmm3 570; SSE2-NEXT: pcmpgtd %xmm1, %xmm3 571; SSE2-NEXT: movdqa %xmm1, %xmm0 572; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 573; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] 574; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 575; SSE2-NEXT: psrad $24, %xmm3 576; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 577; SSE2-NEXT: movdqa %xmm3, %xmm2 578; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 579; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 580; SSE2-NEXT: retq 581; 582; SSSE3-LABEL: sext_16i8_to_8i64: 583; SSSE3: # %bb.0: # %entry 584; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 585; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 586; SSSE3-NEXT: psrad $24, %xmm1 587; SSSE3-NEXT: pxor %xmm4, %xmm4 588; SSSE3-NEXT: pxor %xmm3, %xmm3 589; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3 590; SSSE3-NEXT: movdqa %xmm1, %xmm0 591; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 592; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] 593; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 594; SSSE3-NEXT: psrad $24, %xmm3 595; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4 596; SSSE3-NEXT: movdqa %xmm3, %xmm2 597; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 598; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 599; SSSE3-NEXT: retq 600; 601; SSE41-LABEL: sext_16i8_to_8i64: 602; SSE41: # %bb.0: # %entry 603; SSE41-NEXT: pmovsxbq %xmm0, %xmm4 604; SSE41-NEXT: movdqa %xmm0, %xmm1 605; SSE41-NEXT: psrld $16, %xmm1 606; SSE41-NEXT: pmovsxbq %xmm1, %xmm1 607; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] 608; SSE41-NEXT: pmovsxbq %xmm2, %xmm2 609; SSE41-NEXT: psrlq $48, %xmm0 610; SSE41-NEXT: pmovsxbq %xmm0, %xmm3 611; SSE41-NEXT: movdqa %xmm4, %xmm0 612; SSE41-NEXT: retq 613; 614; AVX1-LABEL: sext_16i8_to_8i64: 615; AVX1: # %bb.0: # %entry 616; AVX1-NEXT: vpmovsxbq %xmm0, %xmm1 617; AVX1-NEXT: vpsrld $16, %xmm0, %xmm2 618; AVX1-NEXT: vpmovsxbq %xmm2, %xmm2 619; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 620; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 621; AVX1-NEXT: vpmovsxbq %xmm0, %xmm1 622; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 623; AVX1-NEXT: vpmovsxbq %xmm0, %xmm0 624; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 625; AVX1-NEXT: vmovaps %ymm2, %ymm0 626; AVX1-NEXT: retq 627; 628; AVX2-LABEL: sext_16i8_to_8i64: 629; AVX2: # %bb.0: # %entry 630; AVX2-NEXT: vpmovsxbq %xmm0, %ymm2 631; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 632; AVX2-NEXT: vpmovsxbq %xmm0, %ymm1 633; AVX2-NEXT: vmovdqa %ymm2, %ymm0 634; AVX2-NEXT: retq 635; 636; AVX512-LABEL: sext_16i8_to_8i64: 637; AVX512: # %bb.0: # %entry 638; AVX512-NEXT: vpmovsxbq %xmm0, %zmm0 639; AVX512-NEXT: retq 640; 641; X86-SSE2-LABEL: sext_16i8_to_8i64: 642; X86-SSE2: # %bb.0: # %entry 643; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 644; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 645; X86-SSE2-NEXT: psrad $24, %xmm1 646; X86-SSE2-NEXT: pxor %xmm4, %xmm4 647; X86-SSE2-NEXT: pxor %xmm3, %xmm3 648; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 649; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 650; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 651; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] 652; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 653; X86-SSE2-NEXT: psrad $24, %xmm3 654; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 655; X86-SSE2-NEXT: movdqa %xmm3, %xmm2 656; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 657; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 658; X86-SSE2-NEXT: retl 659; 660; X86-SSE41-LABEL: sext_16i8_to_8i64: 661; X86-SSE41: # %bb.0: # %entry 662; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm4 663; X86-SSE41-NEXT: movdqa %xmm0, %xmm1 664; X86-SSE41-NEXT: psrld $16, %xmm1 665; X86-SSE41-NEXT: pmovsxbq %xmm1, %xmm1 666; X86-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] 667; X86-SSE41-NEXT: pmovsxbq %xmm2, %xmm2 668; X86-SSE41-NEXT: psrlq $48, %xmm0 669; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm3 670; X86-SSE41-NEXT: movdqa %xmm4, %xmm0 671; X86-SSE41-NEXT: retl 672entry: 673 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 674 %C = sext <8 x i8> %B to <8 x i64> 675 ret <8 x i64> %C 676} 677 678define <4 x i32> @sext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp { 679; SSE2-LABEL: sext_8i16_to_4i32: 680; SSE2: # %bb.0: # %entry 681; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 682; SSE2-NEXT: psrad $16, %xmm0 683; SSE2-NEXT: retq 684; 685; SSSE3-LABEL: sext_8i16_to_4i32: 686; SSSE3: # %bb.0: # %entry 687; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 688; SSSE3-NEXT: psrad $16, %xmm0 689; SSSE3-NEXT: retq 690; 691; SSE41-LABEL: sext_8i16_to_4i32: 692; SSE41: # %bb.0: # %entry 693; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 694; SSE41-NEXT: retq 695; 696; AVX-LABEL: sext_8i16_to_4i32: 697; AVX: # %bb.0: # %entry 698; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 699; AVX-NEXT: retq 700; 701; X86-SSE2-LABEL: sext_8i16_to_4i32: 702; X86-SSE2: # %bb.0: # %entry 703; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 704; X86-SSE2-NEXT: psrad $16, %xmm0 705; X86-SSE2-NEXT: retl 706; 707; X86-SSE41-LABEL: sext_8i16_to_4i32: 708; X86-SSE41: # %bb.0: # %entry 709; X86-SSE41-NEXT: pmovsxwd %xmm0, %xmm0 710; X86-SSE41-NEXT: retl 711entry: 712 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 713 %C = sext <4 x i16> %B to <4 x i32> 714 ret <4 x i32> %C 715} 716 717define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp { 718; SSE2-LABEL: sext_8i16_to_8i32: 719; SSE2: # %bb.0: # %entry 720; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 721; SSE2-NEXT: psrad $16, %xmm2 722; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 723; SSE2-NEXT: psrad $16, %xmm1 724; SSE2-NEXT: movdqa %xmm2, %xmm0 725; SSE2-NEXT: retq 726; 727; SSSE3-LABEL: sext_8i16_to_8i32: 728; SSSE3: # %bb.0: # %entry 729; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 730; SSSE3-NEXT: psrad $16, %xmm2 731; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 732; SSSE3-NEXT: psrad $16, %xmm1 733; SSSE3-NEXT: movdqa %xmm2, %xmm0 734; SSSE3-NEXT: retq 735; 736; SSE41-LABEL: sext_8i16_to_8i32: 737; SSE41: # %bb.0: # %entry 738; SSE41-NEXT: pmovsxwd %xmm0, %xmm2 739; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 740; SSE41-NEXT: pmovsxwd %xmm0, %xmm1 741; SSE41-NEXT: movdqa %xmm2, %xmm0 742; SSE41-NEXT: retq 743; 744; AVX1-LABEL: sext_8i16_to_8i32: 745; AVX1: # %bb.0: # %entry 746; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 747; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 748; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 749; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 750; AVX1-NEXT: retq 751; 752; AVX2-LABEL: sext_8i16_to_8i32: 753; AVX2: # %bb.0: # %entry 754; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 755; AVX2-NEXT: retq 756; 757; AVX512-LABEL: sext_8i16_to_8i32: 758; AVX512: # %bb.0: # %entry 759; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0 760; AVX512-NEXT: retq 761; 762; X86-SSE2-LABEL: sext_8i16_to_8i32: 763; X86-SSE2: # %bb.0: # %entry 764; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 765; X86-SSE2-NEXT: psrad $16, %xmm2 766; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 767; X86-SSE2-NEXT: psrad $16, %xmm1 768; X86-SSE2-NEXT: movdqa %xmm2, %xmm0 769; X86-SSE2-NEXT: retl 770; 771; X86-SSE41-LABEL: sext_8i16_to_8i32: 772; X86-SSE41: # %bb.0: # %entry 773; X86-SSE41-NEXT: pmovsxwd %xmm0, %xmm2 774; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 775; X86-SSE41-NEXT: pmovsxwd %xmm0, %xmm1 776; X86-SSE41-NEXT: movdqa %xmm2, %xmm0 777; X86-SSE41-NEXT: retl 778entry: 779 %B = sext <8 x i16> %A to <8 x i32> 780 ret <8 x i32> %B 781} 782 783define <16 x i32> @sext_16i16_to_16i32(<16 x i16> %A) nounwind uwtable readnone ssp { 784; SSE2-LABEL: sext_16i16_to_16i32: 785; SSE2: # %bb.0: # %entry 786; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] 787; SSE2-NEXT: psrad $16, %xmm4 788; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] 789; SSE2-NEXT: psrad $16, %xmm5 790; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 791; SSE2-NEXT: psrad $16, %xmm2 792; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 793; SSE2-NEXT: psrad $16, %xmm3 794; SSE2-NEXT: movdqa %xmm4, %xmm0 795; SSE2-NEXT: movdqa %xmm5, %xmm1 796; SSE2-NEXT: retq 797; 798; SSSE3-LABEL: sext_16i16_to_16i32: 799; SSSE3: # %bb.0: # %entry 800; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] 801; SSSE3-NEXT: psrad $16, %xmm4 802; SSSE3-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] 803; SSSE3-NEXT: psrad $16, %xmm5 804; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 805; SSSE3-NEXT: psrad $16, %xmm2 806; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 807; SSSE3-NEXT: psrad $16, %xmm3 808; SSSE3-NEXT: movdqa %xmm4, %xmm0 809; SSSE3-NEXT: movdqa %xmm5, %xmm1 810; SSSE3-NEXT: retq 811; 812; SSE41-LABEL: sext_16i16_to_16i32: 813; SSE41: # %bb.0: # %entry 814; SSE41-NEXT: pmovsxwd %xmm0, %xmm5 815; SSE41-NEXT: pmovsxwd %xmm1, %xmm2 816; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 817; SSE41-NEXT: pmovsxwd %xmm0, %xmm4 818; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 819; SSE41-NEXT: pmovsxwd %xmm0, %xmm3 820; SSE41-NEXT: movdqa %xmm5, %xmm0 821; SSE41-NEXT: movdqa %xmm4, %xmm1 822; SSE41-NEXT: retq 823; 824; AVX1-LABEL: sext_16i16_to_16i32: 825; AVX1: # %bb.0: # %entry 826; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 827; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 828; AVX1-NEXT: vpmovsxwd %xmm2, %xmm2 829; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 830; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 831; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 832; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 833; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 834; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 835; AVX1-NEXT: vmovaps %ymm2, %ymm0 836; AVX1-NEXT: retq 837; 838; AVX2-LABEL: sext_16i16_to_16i32: 839; AVX2: # %bb.0: # %entry 840; AVX2-NEXT: vpmovsxwd %xmm0, %ymm2 841; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 842; AVX2-NEXT: vpmovsxwd %xmm0, %ymm1 843; AVX2-NEXT: vmovdqa %ymm2, %ymm0 844; AVX2-NEXT: retq 845; 846; AVX512-LABEL: sext_16i16_to_16i32: 847; AVX512: # %bb.0: # %entry 848; AVX512-NEXT: vpmovsxwd %ymm0, %zmm0 849; AVX512-NEXT: retq 850; 851; X86-SSE2-LABEL: sext_16i16_to_16i32: 852; X86-SSE2: # %bb.0: # %entry 853; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] 854; X86-SSE2-NEXT: psrad $16, %xmm4 855; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] 856; X86-SSE2-NEXT: psrad $16, %xmm5 857; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 858; X86-SSE2-NEXT: psrad $16, %xmm2 859; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 860; X86-SSE2-NEXT: psrad $16, %xmm3 861; X86-SSE2-NEXT: movdqa %xmm4, %xmm0 862; X86-SSE2-NEXT: movdqa %xmm5, %xmm1 863; X86-SSE2-NEXT: retl 864; 865; X86-SSE41-LABEL: sext_16i16_to_16i32: 866; X86-SSE41: # %bb.0: # %entry 867; X86-SSE41-NEXT: pmovsxwd %xmm0, %xmm5 868; X86-SSE41-NEXT: pmovsxwd %xmm1, %xmm2 869; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 870; X86-SSE41-NEXT: pmovsxwd %xmm0, %xmm4 871; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 872; X86-SSE41-NEXT: pmovsxwd %xmm0, %xmm3 873; X86-SSE41-NEXT: movdqa %xmm5, %xmm0 874; X86-SSE41-NEXT: movdqa %xmm4, %xmm1 875; X86-SSE41-NEXT: retl 876entry: 877 %B = sext <16 x i16> %A to <16 x i32> 878 ret <16 x i32> %B 879} 880 881define <2 x i64> @sext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp { 882; SSE2-LABEL: sext_8i16_to_2i64: 883; SSE2: # %bb.0: # %entry 884; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 885; SSE2-NEXT: pxor %xmm1, %xmm1 886; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 887; SSE2-NEXT: psrad $16, %xmm0 888; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 889; SSE2-NEXT: retq 890; 891; SSSE3-LABEL: sext_8i16_to_2i64: 892; SSSE3: # %bb.0: # %entry 893; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 894; SSSE3-NEXT: pxor %xmm1, %xmm1 895; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 896; SSSE3-NEXT: psrad $16, %xmm0 897; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 898; SSSE3-NEXT: retq 899; 900; SSE41-LABEL: sext_8i16_to_2i64: 901; SSE41: # %bb.0: # %entry 902; SSE41-NEXT: pmovsxwq %xmm0, %xmm0 903; SSE41-NEXT: retq 904; 905; AVX-LABEL: sext_8i16_to_2i64: 906; AVX: # %bb.0: # %entry 907; AVX-NEXT: vpmovsxwq %xmm0, %xmm0 908; AVX-NEXT: retq 909; 910; X86-SSE2-LABEL: sext_8i16_to_2i64: 911; X86-SSE2: # %bb.0: # %entry 912; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 913; X86-SSE2-NEXT: pxor %xmm1, %xmm1 914; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 915; X86-SSE2-NEXT: psrad $16, %xmm0 916; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 917; X86-SSE2-NEXT: retl 918; 919; X86-SSE41-LABEL: sext_8i16_to_2i64: 920; X86-SSE41: # %bb.0: # %entry 921; X86-SSE41-NEXT: pmovsxwq %xmm0, %xmm0 922; X86-SSE41-NEXT: retl 923entry: 924 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 925 %C = sext <2 x i16> %B to <2 x i64> 926 ret <2 x i64> %C 927} 928 929define <4 x i64> @sext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp { 930; SSE2-LABEL: sext_8i16_to_4i64: 931; SSE2: # %bb.0: # %entry 932; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 933; SSE2-NEXT: psrad $16, %xmm1 934; SSE2-NEXT: pxor %xmm2, %xmm2 935; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 936; SSE2-NEXT: movdqa %xmm1, %xmm0 937; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 938; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 939; SSE2-NEXT: retq 940; 941; SSSE3-LABEL: sext_8i16_to_4i64: 942; SSSE3: # %bb.0: # %entry 943; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 944; SSSE3-NEXT: psrad $16, %xmm1 945; SSSE3-NEXT: pxor %xmm2, %xmm2 946; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 947; SSSE3-NEXT: movdqa %xmm1, %xmm0 948; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 949; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 950; SSSE3-NEXT: retq 951; 952; SSE41-LABEL: sext_8i16_to_4i64: 953; SSE41: # %bb.0: # %entry 954; SSE41-NEXT: pmovsxwq %xmm0, %xmm2 955; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 956; SSE41-NEXT: pmovsxwq %xmm0, %xmm1 957; SSE41-NEXT: movdqa %xmm2, %xmm0 958; SSE41-NEXT: retq 959; 960; AVX1-LABEL: sext_8i16_to_4i64: 961; AVX1: # %bb.0: # %entry 962; AVX1-NEXT: vpmovsxwq %xmm0, %xmm1 963; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 964; AVX1-NEXT: vpmovsxwq %xmm0, %xmm0 965; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 966; AVX1-NEXT: retq 967; 968; AVX2-LABEL: sext_8i16_to_4i64: 969; AVX2: # %bb.0: # %entry 970; AVX2-NEXT: vpmovsxwq %xmm0, %ymm0 971; AVX2-NEXT: retq 972; 973; AVX512-LABEL: sext_8i16_to_4i64: 974; AVX512: # %bb.0: # %entry 975; AVX512-NEXT: vpmovsxwq %xmm0, %ymm0 976; AVX512-NEXT: retq 977; 978; X86-SSE2-LABEL: sext_8i16_to_4i64: 979; X86-SSE2: # %bb.0: # %entry 980; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 981; X86-SSE2-NEXT: psrad $16, %xmm1 982; X86-SSE2-NEXT: pxor %xmm2, %xmm2 983; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 984; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 985; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 986; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 987; X86-SSE2-NEXT: retl 988; 989; X86-SSE41-LABEL: sext_8i16_to_4i64: 990; X86-SSE41: # %bb.0: # %entry 991; X86-SSE41-NEXT: pmovsxwq %xmm0, %xmm2 992; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 993; X86-SSE41-NEXT: pmovsxwq %xmm0, %xmm1 994; X86-SSE41-NEXT: movdqa %xmm2, %xmm0 995; X86-SSE41-NEXT: retl 996entry: 997 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 998 %C = sext <4 x i16> %B to <4 x i64> 999 ret <4 x i64> %C 1000} 1001 1002define <8 x i64> @sext_8i16_to_8i64(<8 x i16> %A) nounwind uwtable readnone ssp { 1003; SSE2-LABEL: sext_8i16_to_8i64: 1004; SSE2: # %bb.0: # %entry 1005; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1006; SSE2-NEXT: psrad $16, %xmm1 1007; SSE2-NEXT: pxor %xmm5, %xmm5 1008; SSE2-NEXT: pxor %xmm2, %xmm2 1009; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1010; SSE2-NEXT: movdqa %xmm1, %xmm4 1011; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] 1012; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1013; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 1014; SSE2-NEXT: psrad $16, %xmm3 1015; SSE2-NEXT: pcmpgtd %xmm3, %xmm5 1016; SSE2-NEXT: movdqa %xmm3, %xmm2 1017; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] 1018; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm5[2],xmm3[3],xmm5[3] 1019; SSE2-NEXT: movdqa %xmm4, %xmm0 1020; SSE2-NEXT: retq 1021; 1022; SSSE3-LABEL: sext_8i16_to_8i64: 1023; SSSE3: # %bb.0: # %entry 1024; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1025; SSSE3-NEXT: psrad $16, %xmm1 1026; SSSE3-NEXT: pxor %xmm5, %xmm5 1027; SSSE3-NEXT: pxor %xmm2, %xmm2 1028; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 1029; SSSE3-NEXT: movdqa %xmm1, %xmm4 1030; SSSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] 1031; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1032; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 1033; SSSE3-NEXT: psrad $16, %xmm3 1034; SSSE3-NEXT: pcmpgtd %xmm3, %xmm5 1035; SSSE3-NEXT: movdqa %xmm3, %xmm2 1036; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] 1037; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm5[2],xmm3[3],xmm5[3] 1038; SSSE3-NEXT: movdqa %xmm4, %xmm0 1039; SSSE3-NEXT: retq 1040; 1041; SSE41-LABEL: sext_8i16_to_8i64: 1042; SSE41: # %bb.0: # %entry 1043; SSE41-NEXT: pmovsxwq %xmm0, %xmm4 1044; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1045; SSE41-NEXT: pmovsxwq %xmm1, %xmm1 1046; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 1047; SSE41-NEXT: pmovsxwq %xmm2, %xmm2 1048; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 1049; SSE41-NEXT: pmovsxwq %xmm0, %xmm3 1050; SSE41-NEXT: movdqa %xmm4, %xmm0 1051; SSE41-NEXT: retq 1052; 1053; AVX1-LABEL: sext_8i16_to_8i64: 1054; AVX1: # %bb.0: # %entry 1055; AVX1-NEXT: vpmovsxwq %xmm0, %xmm1 1056; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,1,1] 1057; AVX1-NEXT: vpmovsxwq %xmm2, %xmm2 1058; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 1059; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1060; AVX1-NEXT: vpmovsxwq %xmm1, %xmm1 1061; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 1062; AVX1-NEXT: vpmovsxwq %xmm0, %xmm0 1063; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 1064; AVX1-NEXT: vmovaps %ymm2, %ymm0 1065; AVX1-NEXT: retq 1066; 1067; AVX2-LABEL: sext_8i16_to_8i64: 1068; AVX2: # %bb.0: # %entry 1069; AVX2-NEXT: vpmovsxwq %xmm0, %ymm2 1070; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1071; AVX2-NEXT: vpmovsxwq %xmm0, %ymm1 1072; AVX2-NEXT: vmovdqa %ymm2, %ymm0 1073; AVX2-NEXT: retq 1074; 1075; AVX512-LABEL: sext_8i16_to_8i64: 1076; AVX512: # %bb.0: # %entry 1077; AVX512-NEXT: vpmovsxwq %xmm0, %zmm0 1078; AVX512-NEXT: retq 1079; 1080; X86-SSE2-LABEL: sext_8i16_to_8i64: 1081; X86-SSE2: # %bb.0: # %entry 1082; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1083; X86-SSE2-NEXT: psrad $16, %xmm1 1084; X86-SSE2-NEXT: pxor %xmm5, %xmm5 1085; X86-SSE2-NEXT: pxor %xmm2, %xmm2 1086; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1087; X86-SSE2-NEXT: movdqa %xmm1, %xmm4 1088; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1] 1089; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1090; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 1091; X86-SSE2-NEXT: psrad $16, %xmm3 1092; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm5 1093; X86-SSE2-NEXT: movdqa %xmm3, %xmm2 1094; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] 1095; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm5[2],xmm3[3],xmm5[3] 1096; X86-SSE2-NEXT: movdqa %xmm4, %xmm0 1097; X86-SSE2-NEXT: retl 1098; 1099; X86-SSE41-LABEL: sext_8i16_to_8i64: 1100; X86-SSE41: # %bb.0: # %entry 1101; X86-SSE41-NEXT: pmovsxwq %xmm0, %xmm4 1102; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 1103; X86-SSE41-NEXT: pmovsxwq %xmm1, %xmm1 1104; X86-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 1105; X86-SSE41-NEXT: pmovsxwq %xmm2, %xmm2 1106; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,3,3,3] 1107; X86-SSE41-NEXT: pmovsxwq %xmm0, %xmm3 1108; X86-SSE41-NEXT: movdqa %xmm4, %xmm0 1109; X86-SSE41-NEXT: retl 1110entry: 1111 %B = sext <8 x i16> %A to <8 x i64> 1112 ret <8 x i64> %B 1113} 1114 1115define <2 x i64> @sext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp { 1116; SSE2-LABEL: sext_4i32_to_2i64: 1117; SSE2: # %bb.0: # %entry 1118; SSE2-NEXT: pxor %xmm1, %xmm1 1119; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 1120; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1121; SSE2-NEXT: retq 1122; 1123; SSSE3-LABEL: sext_4i32_to_2i64: 1124; SSSE3: # %bb.0: # %entry 1125; SSSE3-NEXT: pxor %xmm1, %xmm1 1126; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 1127; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1128; SSSE3-NEXT: retq 1129; 1130; SSE41-LABEL: sext_4i32_to_2i64: 1131; SSE41: # %bb.0: # %entry 1132; SSE41-NEXT: pmovsxdq %xmm0, %xmm0 1133; SSE41-NEXT: retq 1134; 1135; AVX-LABEL: sext_4i32_to_2i64: 1136; AVX: # %bb.0: # %entry 1137; AVX-NEXT: vpmovsxdq %xmm0, %xmm0 1138; AVX-NEXT: retq 1139; 1140; X86-SSE2-LABEL: sext_4i32_to_2i64: 1141; X86-SSE2: # %bb.0: # %entry 1142; X86-SSE2-NEXT: pxor %xmm1, %xmm1 1143; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 1144; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1145; X86-SSE2-NEXT: retl 1146; 1147; X86-SSE41-LABEL: sext_4i32_to_2i64: 1148; X86-SSE41: # %bb.0: # %entry 1149; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm0 1150; X86-SSE41-NEXT: retl 1151entry: 1152 %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 1153 %C = sext <2 x i32> %B to <2 x i64> 1154 ret <2 x i64> %C 1155} 1156 1157define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { 1158; SSE2-LABEL: sext_4i32_to_4i64: 1159; SSE2: # %bb.0: # %entry 1160; SSE2-NEXT: pxor %xmm2, %xmm2 1161; SSE2-NEXT: pxor %xmm3, %xmm3 1162; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1163; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1164; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 1165; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1166; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1167; SSE2-NEXT: retq 1168; 1169; SSSE3-LABEL: sext_4i32_to_4i64: 1170; SSSE3: # %bb.0: # %entry 1171; SSSE3-NEXT: pxor %xmm2, %xmm2 1172; SSSE3-NEXT: pxor %xmm3, %xmm3 1173; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3 1174; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1175; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 1176; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 1177; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1178; SSSE3-NEXT: retq 1179; 1180; SSE41-LABEL: sext_4i32_to_4i64: 1181; SSE41: # %bb.0: # %entry 1182; SSE41-NEXT: pmovsxdq %xmm0, %xmm2 1183; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1184; SSE41-NEXT: pmovsxdq %xmm0, %xmm1 1185; SSE41-NEXT: movdqa %xmm2, %xmm0 1186; SSE41-NEXT: retq 1187; 1188; AVX1-LABEL: sext_4i32_to_4i64: 1189; AVX1: # %bb.0: # %entry 1190; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1191; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1192; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1193; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1194; AVX1-NEXT: retq 1195; 1196; AVX2-LABEL: sext_4i32_to_4i64: 1197; AVX2: # %bb.0: # %entry 1198; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 1199; AVX2-NEXT: retq 1200; 1201; AVX512-LABEL: sext_4i32_to_4i64: 1202; AVX512: # %bb.0: # %entry 1203; AVX512-NEXT: vpmovsxdq %xmm0, %ymm0 1204; AVX512-NEXT: retq 1205; 1206; X86-SSE2-LABEL: sext_4i32_to_4i64: 1207; X86-SSE2: # %bb.0: # %entry 1208; X86-SSE2-NEXT: pxor %xmm2, %xmm2 1209; X86-SSE2-NEXT: pxor %xmm3, %xmm3 1210; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1211; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1212; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 1213; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1214; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1215; X86-SSE2-NEXT: retl 1216; 1217; X86-SSE41-LABEL: sext_4i32_to_4i64: 1218; X86-SSE41: # %bb.0: # %entry 1219; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm2 1220; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1221; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm1 1222; X86-SSE41-NEXT: movdqa %xmm2, %xmm0 1223; X86-SSE41-NEXT: retl 1224entry: 1225 %B = sext <4 x i32> %A to <4 x i64> 1226 ret <4 x i64> %B 1227} 1228 1229define <8 x i64> @sext_8i32_to_8i64(<8 x i32> %A) nounwind uwtable readnone ssp { 1230; SSE2-LABEL: sext_8i32_to_8i64: 1231; SSE2: # %bb.0: # %entry 1232; SSE2-NEXT: movdqa %xmm1, %xmm2 1233; SSE2-NEXT: pxor %xmm4, %xmm4 1234; SSE2-NEXT: pxor %xmm3, %xmm3 1235; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1236; SSE2-NEXT: pxor %xmm5, %xmm5 1237; SSE2-NEXT: pcmpgtd %xmm1, %xmm5 1238; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1239; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 1240; SSE2-NEXT: pxor %xmm3, %xmm3 1241; SSE2-NEXT: pcmpgtd %xmm1, %xmm3 1242; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 1243; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3] 1244; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] 1245; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 1246; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 1247; SSE2-NEXT: retq 1248; 1249; SSSE3-LABEL: sext_8i32_to_8i64: 1250; SSSE3: # %bb.0: # %entry 1251; SSSE3-NEXT: movdqa %xmm1, %xmm2 1252; SSSE3-NEXT: pxor %xmm4, %xmm4 1253; SSSE3-NEXT: pxor %xmm3, %xmm3 1254; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3 1255; SSSE3-NEXT: pxor %xmm5, %xmm5 1256; SSSE3-NEXT: pcmpgtd %xmm1, %xmm5 1257; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1258; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 1259; SSSE3-NEXT: pxor %xmm3, %xmm3 1260; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3 1261; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 1262; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3] 1263; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] 1264; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4 1265; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 1266; SSSE3-NEXT: retq 1267; 1268; SSE41-LABEL: sext_8i32_to_8i64: 1269; SSE41: # %bb.0: # %entry 1270; SSE41-NEXT: pmovsxdq %xmm0, %xmm5 1271; SSE41-NEXT: pmovsxdq %xmm1, %xmm2 1272; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1273; SSE41-NEXT: pmovsxdq %xmm0, %xmm4 1274; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1275; SSE41-NEXT: pmovsxdq %xmm0, %xmm3 1276; SSE41-NEXT: movdqa %xmm5, %xmm0 1277; SSE41-NEXT: movdqa %xmm4, %xmm1 1278; SSE41-NEXT: retq 1279; 1280; AVX1-LABEL: sext_8i32_to_8i64: 1281; AVX1: # %bb.0: # %entry 1282; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1283; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,2,3] 1284; AVX1-NEXT: vpmovsxdq %xmm2, %xmm2 1285; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 1286; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1287; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1288; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1289; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1290; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 1291; AVX1-NEXT: vmovaps %ymm2, %ymm0 1292; AVX1-NEXT: retq 1293; 1294; AVX2-LABEL: sext_8i32_to_8i64: 1295; AVX2: # %bb.0: # %entry 1296; AVX2-NEXT: vpmovsxdq %xmm0, %ymm2 1297; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1298; AVX2-NEXT: vpmovsxdq %xmm0, %ymm1 1299; AVX2-NEXT: vmovdqa %ymm2, %ymm0 1300; AVX2-NEXT: retq 1301; 1302; AVX512-LABEL: sext_8i32_to_8i64: 1303; AVX512: # %bb.0: # %entry 1304; AVX512-NEXT: vpmovsxdq %ymm0, %zmm0 1305; AVX512-NEXT: retq 1306; 1307; X86-SSE2-LABEL: sext_8i32_to_8i64: 1308; X86-SSE2: # %bb.0: # %entry 1309; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 1310; X86-SSE2-NEXT: pxor %xmm4, %xmm4 1311; X86-SSE2-NEXT: pxor %xmm3, %xmm3 1312; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3 1313; X86-SSE2-NEXT: pxor %xmm5, %xmm5 1314; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm5 1315; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 1316; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 1317; X86-SSE2-NEXT: pxor %xmm3, %xmm3 1318; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 1319; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 1320; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,2,3] 1321; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm5[0],xmm2[1],xmm5[1] 1322; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 1323; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 1324; X86-SSE2-NEXT: retl 1325; 1326; X86-SSE41-LABEL: sext_8i32_to_8i64: 1327; X86-SSE41: # %bb.0: # %entry 1328; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm5 1329; X86-SSE41-NEXT: pmovsxdq %xmm1, %xmm2 1330; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1331; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm4 1332; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,2,3] 1333; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm3 1334; X86-SSE41-NEXT: movdqa %xmm5, %xmm0 1335; X86-SSE41-NEXT: movdqa %xmm4, %xmm1 1336; X86-SSE41-NEXT: retl 1337entry: 1338 %B = sext <8 x i32> %A to <8 x i64> 1339 ret <8 x i64> %B 1340} 1341 1342define <2 x i64> @load_sext_2i1_to_2i64(ptr%ptr) { 1343; SSE-LABEL: load_sext_2i1_to_2i64: 1344; SSE: # %bb.0: # %entry 1345; SSE-NEXT: movzbl (%rdi), %eax 1346; SSE-NEXT: movzbl %al, %ecx 1347; SSE-NEXT: shrb %al 1348; SSE-NEXT: movzbl %al, %eax 1349; SSE-NEXT: negq %rax 1350; SSE-NEXT: movq %rax, %xmm1 1351; SSE-NEXT: andl $1, %ecx 1352; SSE-NEXT: negq %rcx 1353; SSE-NEXT: movq %rcx, %xmm0 1354; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1355; SSE-NEXT: retq 1356; 1357; AVX1-LABEL: load_sext_2i1_to_2i64: 1358; AVX1: # %bb.0: # %entry 1359; AVX1-NEXT: movzbl (%rdi), %eax 1360; AVX1-NEXT: movzbl %al, %ecx 1361; AVX1-NEXT: shrb %al 1362; AVX1-NEXT: movzbl %al, %eax 1363; AVX1-NEXT: negq %rax 1364; AVX1-NEXT: vmovq %rax, %xmm0 1365; AVX1-NEXT: andl $1, %ecx 1366; AVX1-NEXT: negq %rcx 1367; AVX1-NEXT: vmovq %rcx, %xmm1 1368; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1369; AVX1-NEXT: retq 1370; 1371; AVX2-LABEL: load_sext_2i1_to_2i64: 1372; AVX2: # %bb.0: # %entry 1373; AVX2-NEXT: movzbl (%rdi), %eax 1374; AVX2-NEXT: movzbl %al, %ecx 1375; AVX2-NEXT: shrb %al 1376; AVX2-NEXT: movzbl %al, %eax 1377; AVX2-NEXT: negq %rax 1378; AVX2-NEXT: vmovq %rax, %xmm0 1379; AVX2-NEXT: andl $1, %ecx 1380; AVX2-NEXT: negq %rcx 1381; AVX2-NEXT: vmovq %rcx, %xmm1 1382; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1383; AVX2-NEXT: retq 1384; 1385; AVX512F-LABEL: load_sext_2i1_to_2i64: 1386; AVX512F: # %bb.0: # %entry 1387; AVX512F-NEXT: movzbl (%rdi), %eax 1388; AVX512F-NEXT: kmovw %eax, %k1 1389; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1390; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1391; AVX512F-NEXT: vzeroupper 1392; AVX512F-NEXT: retq 1393; 1394; AVX512BW-LABEL: load_sext_2i1_to_2i64: 1395; AVX512BW: # %bb.0: # %entry 1396; AVX512BW-NEXT: movzbl (%rdi), %eax 1397; AVX512BW-NEXT: kmovd %eax, %k1 1398; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1399; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1400; AVX512BW-NEXT: vzeroupper 1401; AVX512BW-NEXT: retq 1402; 1403; X86-SSE2-LABEL: load_sext_2i1_to_2i64: 1404; X86-SSE2: # %bb.0: # %entry 1405; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1406; X86-SSE2-NEXT: movzbl (%eax), %eax 1407; X86-SSE2-NEXT: movzbl %al, %ecx 1408; X86-SSE2-NEXT: shrb %al 1409; X86-SSE2-NEXT: movzbl %al, %eax 1410; X86-SSE2-NEXT: negl %eax 1411; X86-SSE2-NEXT: movd %eax, %xmm0 1412; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,1,1] 1413; X86-SSE2-NEXT: andl $1, %ecx 1414; X86-SSE2-NEXT: negl %ecx 1415; X86-SSE2-NEXT: movd %ecx, %xmm0 1416; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 1417; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1418; X86-SSE2-NEXT: retl 1419; 1420; X86-SSE41-LABEL: load_sext_2i1_to_2i64: 1421; X86-SSE41: # %bb.0: # %entry 1422; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1423; X86-SSE41-NEXT: movzbl (%eax), %eax 1424; X86-SSE41-NEXT: movzbl %al, %ecx 1425; X86-SSE41-NEXT: andl $1, %ecx 1426; X86-SSE41-NEXT: negl %ecx 1427; X86-SSE41-NEXT: movd %ecx, %xmm0 1428; X86-SSE41-NEXT: pinsrd $1, %ecx, %xmm0 1429; X86-SSE41-NEXT: shrb %al 1430; X86-SSE41-NEXT: movzbl %al, %eax 1431; X86-SSE41-NEXT: negl %eax 1432; X86-SSE41-NEXT: pinsrd $2, %eax, %xmm0 1433; X86-SSE41-NEXT: pinsrd $3, %eax, %xmm0 1434; X86-SSE41-NEXT: retl 1435entry: 1436 %X = load <2 x i1>, ptr %ptr 1437 %Y = sext <2 x i1> %X to <2 x i64> 1438 ret <2 x i64> %Y 1439} 1440 1441define <2 x i64> @load_sext_2i8_to_2i64(ptr%ptr) { 1442; SSE2-LABEL: load_sext_2i8_to_2i64: 1443; SSE2: # %bb.0: # %entry 1444; SSE2-NEXT: movzwl (%rdi), %eax 1445; SSE2-NEXT: movd %eax, %xmm0 1446; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1447; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1448; SSE2-NEXT: pxor %xmm1, %xmm1 1449; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 1450; SSE2-NEXT: psrad $24, %xmm0 1451; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1452; SSE2-NEXT: retq 1453; 1454; SSSE3-LABEL: load_sext_2i8_to_2i64: 1455; SSSE3: # %bb.0: # %entry 1456; SSSE3-NEXT: movzwl (%rdi), %eax 1457; SSSE3-NEXT: movd %eax, %xmm0 1458; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1459; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1460; SSSE3-NEXT: pxor %xmm1, %xmm1 1461; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 1462; SSSE3-NEXT: psrad $24, %xmm0 1463; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1464; SSSE3-NEXT: retq 1465; 1466; SSE41-LABEL: load_sext_2i8_to_2i64: 1467; SSE41: # %bb.0: # %entry 1468; SSE41-NEXT: pmovsxbq (%rdi), %xmm0 1469; SSE41-NEXT: retq 1470; 1471; AVX-LABEL: load_sext_2i8_to_2i64: 1472; AVX: # %bb.0: # %entry 1473; AVX-NEXT: vpmovsxbq (%rdi), %xmm0 1474; AVX-NEXT: retq 1475; 1476; X86-SSE2-LABEL: load_sext_2i8_to_2i64: 1477; X86-SSE2: # %bb.0: # %entry 1478; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1479; X86-SSE2-NEXT: movzwl (%eax), %eax 1480; X86-SSE2-NEXT: movd %eax, %xmm0 1481; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1482; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1483; X86-SSE2-NEXT: pxor %xmm1, %xmm1 1484; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 1485; X86-SSE2-NEXT: psrad $24, %xmm0 1486; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1487; X86-SSE2-NEXT: retl 1488; 1489; X86-SSE41-LABEL: load_sext_2i8_to_2i64: 1490; X86-SSE41: # %bb.0: # %entry 1491; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1492; X86-SSE41-NEXT: pmovsxbq (%eax), %xmm0 1493; X86-SSE41-NEXT: retl 1494entry: 1495 %X = load <2 x i8>, ptr %ptr 1496 %Y = sext <2 x i8> %X to <2 x i64> 1497 ret <2 x i64> %Y 1498} 1499 1500define <4 x i32> @load_sext_4i1_to_4i32(ptr%ptr) { 1501; SSE2-LABEL: load_sext_4i1_to_4i32: 1502; SSE2: # %bb.0: # %entry 1503; SSE2-NEXT: movzbl (%rdi), %eax 1504; SSE2-NEXT: movl %eax, %ecx 1505; SSE2-NEXT: shrb $3, %cl 1506; SSE2-NEXT: movzbl %cl, %ecx 1507; SSE2-NEXT: negl %ecx 1508; SSE2-NEXT: movd %ecx, %xmm0 1509; SSE2-NEXT: movzbl %al, %ecx 1510; SSE2-NEXT: shrb $2, %al 1511; SSE2-NEXT: movzbl %al, %eax 1512; SSE2-NEXT: andl $1, %eax 1513; SSE2-NEXT: negl %eax 1514; SSE2-NEXT: movd %eax, %xmm1 1515; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1516; SSE2-NEXT: movl %ecx, %eax 1517; SSE2-NEXT: andl $1, %eax 1518; SSE2-NEXT: negl %eax 1519; SSE2-NEXT: movd %eax, %xmm0 1520; SSE2-NEXT: shrb %cl 1521; SSE2-NEXT: movzbl %cl, %eax 1522; SSE2-NEXT: andl $1, %eax 1523; SSE2-NEXT: negl %eax 1524; SSE2-NEXT: movd %eax, %xmm2 1525; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1526; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1527; SSE2-NEXT: retq 1528; 1529; SSSE3-LABEL: load_sext_4i1_to_4i32: 1530; SSSE3: # %bb.0: # %entry 1531; SSSE3-NEXT: movzbl (%rdi), %eax 1532; SSSE3-NEXT: movl %eax, %ecx 1533; SSSE3-NEXT: shrb $3, %cl 1534; SSSE3-NEXT: movzbl %cl, %ecx 1535; SSSE3-NEXT: negl %ecx 1536; SSSE3-NEXT: movd %ecx, %xmm0 1537; SSSE3-NEXT: movzbl %al, %ecx 1538; SSSE3-NEXT: shrb $2, %al 1539; SSSE3-NEXT: movzbl %al, %eax 1540; SSSE3-NEXT: andl $1, %eax 1541; SSSE3-NEXT: negl %eax 1542; SSSE3-NEXT: movd %eax, %xmm1 1543; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1544; SSSE3-NEXT: movl %ecx, %eax 1545; SSSE3-NEXT: andl $1, %eax 1546; SSSE3-NEXT: negl %eax 1547; SSSE3-NEXT: movd %eax, %xmm0 1548; SSSE3-NEXT: shrb %cl 1549; SSSE3-NEXT: movzbl %cl, %eax 1550; SSSE3-NEXT: andl $1, %eax 1551; SSSE3-NEXT: negl %eax 1552; SSSE3-NEXT: movd %eax, %xmm2 1553; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1554; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1555; SSSE3-NEXT: retq 1556; 1557; SSE41-LABEL: load_sext_4i1_to_4i32: 1558; SSE41: # %bb.0: # %entry 1559; SSE41-NEXT: movzbl (%rdi), %eax 1560; SSE41-NEXT: movzbl %al, %ecx 1561; SSE41-NEXT: shrb %al 1562; SSE41-NEXT: movzbl %al, %eax 1563; SSE41-NEXT: andl $1, %eax 1564; SSE41-NEXT: negl %eax 1565; SSE41-NEXT: movl %ecx, %edx 1566; SSE41-NEXT: andl $1, %edx 1567; SSE41-NEXT: negl %edx 1568; SSE41-NEXT: movd %edx, %xmm0 1569; SSE41-NEXT: pinsrd $1, %eax, %xmm0 1570; SSE41-NEXT: movl %ecx, %eax 1571; SSE41-NEXT: shrb $2, %al 1572; SSE41-NEXT: movzbl %al, %eax 1573; SSE41-NEXT: andl $1, %eax 1574; SSE41-NEXT: negl %eax 1575; SSE41-NEXT: pinsrd $2, %eax, %xmm0 1576; SSE41-NEXT: shrb $3, %cl 1577; SSE41-NEXT: movzbl %cl, %eax 1578; SSE41-NEXT: negl %eax 1579; SSE41-NEXT: pinsrd $3, %eax, %xmm0 1580; SSE41-NEXT: retq 1581; 1582; AVX1-LABEL: load_sext_4i1_to_4i32: 1583; AVX1: # %bb.0: # %entry 1584; AVX1-NEXT: movzbl (%rdi), %eax 1585; AVX1-NEXT: movzbl %al, %ecx 1586; AVX1-NEXT: shrb %al 1587; AVX1-NEXT: movzbl %al, %eax 1588; AVX1-NEXT: andl $1, %eax 1589; AVX1-NEXT: negl %eax 1590; AVX1-NEXT: movl %ecx, %edx 1591; AVX1-NEXT: andl $1, %edx 1592; AVX1-NEXT: negl %edx 1593; AVX1-NEXT: vmovd %edx, %xmm0 1594; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 1595; AVX1-NEXT: movl %ecx, %eax 1596; AVX1-NEXT: shrb $2, %al 1597; AVX1-NEXT: movzbl %al, %eax 1598; AVX1-NEXT: andl $1, %eax 1599; AVX1-NEXT: negl %eax 1600; AVX1-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 1601; AVX1-NEXT: shrb $3, %cl 1602; AVX1-NEXT: movzbl %cl, %eax 1603; AVX1-NEXT: negl %eax 1604; AVX1-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 1605; AVX1-NEXT: retq 1606; 1607; AVX2-LABEL: load_sext_4i1_to_4i32: 1608; AVX2: # %bb.0: # %entry 1609; AVX2-NEXT: movzbl (%rdi), %eax 1610; AVX2-NEXT: movzbl %al, %ecx 1611; AVX2-NEXT: shrb %al 1612; AVX2-NEXT: movzbl %al, %eax 1613; AVX2-NEXT: andl $1, %eax 1614; AVX2-NEXT: negl %eax 1615; AVX2-NEXT: movl %ecx, %edx 1616; AVX2-NEXT: andl $1, %edx 1617; AVX2-NEXT: negl %edx 1618; AVX2-NEXT: vmovd %edx, %xmm0 1619; AVX2-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 1620; AVX2-NEXT: movl %ecx, %eax 1621; AVX2-NEXT: shrb $2, %al 1622; AVX2-NEXT: movzbl %al, %eax 1623; AVX2-NEXT: andl $1, %eax 1624; AVX2-NEXT: negl %eax 1625; AVX2-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 1626; AVX2-NEXT: shrb $3, %cl 1627; AVX2-NEXT: movzbl %cl, %eax 1628; AVX2-NEXT: negl %eax 1629; AVX2-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 1630; AVX2-NEXT: retq 1631; 1632; AVX512F-LABEL: load_sext_4i1_to_4i32: 1633; AVX512F: # %bb.0: # %entry 1634; AVX512F-NEXT: movzbl (%rdi), %eax 1635; AVX512F-NEXT: kmovw %eax, %k1 1636; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1637; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1638; AVX512F-NEXT: vzeroupper 1639; AVX512F-NEXT: retq 1640; 1641; AVX512BW-LABEL: load_sext_4i1_to_4i32: 1642; AVX512BW: # %bb.0: # %entry 1643; AVX512BW-NEXT: movzbl (%rdi), %eax 1644; AVX512BW-NEXT: kmovd %eax, %k1 1645; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1646; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 1647; AVX512BW-NEXT: vzeroupper 1648; AVX512BW-NEXT: retq 1649; 1650; X86-SSE2-LABEL: load_sext_4i1_to_4i32: 1651; X86-SSE2: # %bb.0: # %entry 1652; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1653; X86-SSE2-NEXT: movzbl (%eax), %eax 1654; X86-SSE2-NEXT: movl %eax, %ecx 1655; X86-SSE2-NEXT: shrb $3, %cl 1656; X86-SSE2-NEXT: movzbl %cl, %ecx 1657; X86-SSE2-NEXT: negl %ecx 1658; X86-SSE2-NEXT: movd %ecx, %xmm0 1659; X86-SSE2-NEXT: movl %eax, %ecx 1660; X86-SSE2-NEXT: shrb $2, %cl 1661; X86-SSE2-NEXT: movzbl %cl, %ecx 1662; X86-SSE2-NEXT: andl $1, %ecx 1663; X86-SSE2-NEXT: negl %ecx 1664; X86-SSE2-NEXT: movd %ecx, %xmm1 1665; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1666; X86-SSE2-NEXT: movzbl %al, %ecx 1667; X86-SSE2-NEXT: andl $1, %ecx 1668; X86-SSE2-NEXT: negl %ecx 1669; X86-SSE2-NEXT: movd %ecx, %xmm0 1670; X86-SSE2-NEXT: shrb %al 1671; X86-SSE2-NEXT: movzbl %al, %eax 1672; X86-SSE2-NEXT: andl $1, %eax 1673; X86-SSE2-NEXT: negl %eax 1674; X86-SSE2-NEXT: movd %eax, %xmm2 1675; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1676; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1677; X86-SSE2-NEXT: retl 1678; 1679; X86-SSE41-LABEL: load_sext_4i1_to_4i32: 1680; X86-SSE41: # %bb.0: # %entry 1681; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1682; X86-SSE41-NEXT: movzbl (%eax), %eax 1683; X86-SSE41-NEXT: movl %eax, %ecx 1684; X86-SSE41-NEXT: shrb %cl 1685; X86-SSE41-NEXT: movzbl %cl, %ecx 1686; X86-SSE41-NEXT: andl $1, %ecx 1687; X86-SSE41-NEXT: negl %ecx 1688; X86-SSE41-NEXT: movzbl %al, %edx 1689; X86-SSE41-NEXT: andl $1, %edx 1690; X86-SSE41-NEXT: negl %edx 1691; X86-SSE41-NEXT: movd %edx, %xmm0 1692; X86-SSE41-NEXT: pinsrd $1, %ecx, %xmm0 1693; X86-SSE41-NEXT: movl %eax, %ecx 1694; X86-SSE41-NEXT: shrb $2, %cl 1695; X86-SSE41-NEXT: movzbl %cl, %ecx 1696; X86-SSE41-NEXT: andl $1, %ecx 1697; X86-SSE41-NEXT: negl %ecx 1698; X86-SSE41-NEXT: pinsrd $2, %ecx, %xmm0 1699; X86-SSE41-NEXT: shrb $3, %al 1700; X86-SSE41-NEXT: movzbl %al, %eax 1701; X86-SSE41-NEXT: negl %eax 1702; X86-SSE41-NEXT: pinsrd $3, %eax, %xmm0 1703; X86-SSE41-NEXT: retl 1704entry: 1705 %X = load <4 x i1>, ptr %ptr 1706 %Y = sext <4 x i1> %X to <4 x i32> 1707 ret <4 x i32> %Y 1708} 1709 1710define <4 x i32> @load_sext_4i8_to_4i32(ptr%ptr) { 1711; SSE2-LABEL: load_sext_4i8_to_4i32: 1712; SSE2: # %bb.0: # %entry 1713; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1714; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1715; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1716; SSE2-NEXT: psrad $24, %xmm0 1717; SSE2-NEXT: retq 1718; 1719; SSSE3-LABEL: load_sext_4i8_to_4i32: 1720; SSSE3: # %bb.0: # %entry 1721; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1722; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1723; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1724; SSSE3-NEXT: psrad $24, %xmm0 1725; SSSE3-NEXT: retq 1726; 1727; SSE41-LABEL: load_sext_4i8_to_4i32: 1728; SSE41: # %bb.0: # %entry 1729; SSE41-NEXT: pmovsxbd (%rdi), %xmm0 1730; SSE41-NEXT: retq 1731; 1732; AVX-LABEL: load_sext_4i8_to_4i32: 1733; AVX: # %bb.0: # %entry 1734; AVX-NEXT: vpmovsxbd (%rdi), %xmm0 1735; AVX-NEXT: retq 1736; 1737; X86-SSE2-LABEL: load_sext_4i8_to_4i32: 1738; X86-SSE2: # %bb.0: # %entry 1739; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1740; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1741; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1742; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1743; X86-SSE2-NEXT: psrad $24, %xmm0 1744; X86-SSE2-NEXT: retl 1745; 1746; X86-SSE41-LABEL: load_sext_4i8_to_4i32: 1747; X86-SSE41: # %bb.0: # %entry 1748; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1749; X86-SSE41-NEXT: pmovsxbd (%eax), %xmm0 1750; X86-SSE41-NEXT: retl 1751entry: 1752 %X = load <4 x i8>, ptr %ptr 1753 %Y = sext <4 x i8> %X to <4 x i32> 1754 ret <4 x i32> %Y 1755} 1756 1757define <4 x i64> @load_sext_4i1_to_4i64(ptr%ptr) { 1758; SSE2-LABEL: load_sext_4i1_to_4i64: 1759; SSE2: # %bb.0: # %entry 1760; SSE2-NEXT: movzbl (%rdi), %eax 1761; SSE2-NEXT: movl %eax, %ecx 1762; SSE2-NEXT: shrb %cl 1763; SSE2-NEXT: andb $1, %cl 1764; SSE2-NEXT: movzbl %cl, %ecx 1765; SSE2-NEXT: movl %eax, %edx 1766; SSE2-NEXT: andb $1, %dl 1767; SSE2-NEXT: movzbl %dl, %edx 1768; SSE2-NEXT: movd %edx, %xmm1 1769; SSE2-NEXT: pinsrw $2, %ecx, %xmm1 1770; SSE2-NEXT: movl %eax, %ecx 1771; SSE2-NEXT: shrb $2, %cl 1772; SSE2-NEXT: andb $1, %cl 1773; SSE2-NEXT: movzbl %cl, %ecx 1774; SSE2-NEXT: pinsrw $4, %ecx, %xmm1 1775; SSE2-NEXT: shrb $3, %al 1776; SSE2-NEXT: movzbl %al, %eax 1777; SSE2-NEXT: pinsrw $6, %eax, %xmm1 1778; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3] 1779; SSE2-NEXT: psllq $63, %xmm0 1780; SSE2-NEXT: psrad $31, %xmm0 1781; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1782; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3] 1783; SSE2-NEXT: psllq $63, %xmm1 1784; SSE2-NEXT: psrad $31, %xmm1 1785; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1786; SSE2-NEXT: retq 1787; 1788; SSSE3-LABEL: load_sext_4i1_to_4i64: 1789; SSSE3: # %bb.0: # %entry 1790; SSSE3-NEXT: movzbl (%rdi), %eax 1791; SSSE3-NEXT: movl %eax, %ecx 1792; SSSE3-NEXT: shrb %cl 1793; SSSE3-NEXT: andb $1, %cl 1794; SSSE3-NEXT: movzbl %cl, %ecx 1795; SSSE3-NEXT: movl %eax, %edx 1796; SSSE3-NEXT: andb $1, %dl 1797; SSSE3-NEXT: movzbl %dl, %edx 1798; SSSE3-NEXT: movd %edx, %xmm1 1799; SSSE3-NEXT: pinsrw $2, %ecx, %xmm1 1800; SSSE3-NEXT: movl %eax, %ecx 1801; SSSE3-NEXT: shrb $2, %cl 1802; SSSE3-NEXT: andb $1, %cl 1803; SSSE3-NEXT: movzbl %cl, %ecx 1804; SSSE3-NEXT: pinsrw $4, %ecx, %xmm1 1805; SSSE3-NEXT: shrb $3, %al 1806; SSSE3-NEXT: movzbl %al, %eax 1807; SSSE3-NEXT: pinsrw $6, %eax, %xmm1 1808; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3] 1809; SSSE3-NEXT: psllq $63, %xmm0 1810; SSSE3-NEXT: psrad $31, %xmm0 1811; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1812; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3] 1813; SSSE3-NEXT: psllq $63, %xmm1 1814; SSSE3-NEXT: psrad $31, %xmm1 1815; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1816; SSSE3-NEXT: retq 1817; 1818; SSE41-LABEL: load_sext_4i1_to_4i64: 1819; SSE41: # %bb.0: # %entry 1820; SSE41-NEXT: movzbl (%rdi), %eax 1821; SSE41-NEXT: movl %eax, %ecx 1822; SSE41-NEXT: shrb %cl 1823; SSE41-NEXT: andb $1, %cl 1824; SSE41-NEXT: movzbl %cl, %ecx 1825; SSE41-NEXT: movl %eax, %edx 1826; SSE41-NEXT: andb $1, %dl 1827; SSE41-NEXT: movzbl %dl, %edx 1828; SSE41-NEXT: movd %edx, %xmm1 1829; SSE41-NEXT: pinsrb $4, %ecx, %xmm1 1830; SSE41-NEXT: movl %eax, %ecx 1831; SSE41-NEXT: shrb $2, %cl 1832; SSE41-NEXT: andb $1, %cl 1833; SSE41-NEXT: movzbl %cl, %ecx 1834; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero 1835; SSE41-NEXT: pinsrb $8, %ecx, %xmm1 1836; SSE41-NEXT: shrb $3, %al 1837; SSE41-NEXT: movzbl %al, %eax 1838; SSE41-NEXT: pinsrb $12, %eax, %xmm1 1839; SSE41-NEXT: psllq $63, %xmm0 1840; SSE41-NEXT: psrad $31, %xmm0 1841; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1842; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1843; SSE41-NEXT: psllq $63, %xmm1 1844; SSE41-NEXT: psrad $31, %xmm1 1845; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1846; SSE41-NEXT: retq 1847; 1848; AVX1-LABEL: load_sext_4i1_to_4i64: 1849; AVX1: # %bb.0: # %entry 1850; AVX1-NEXT: movzbl (%rdi), %eax 1851; AVX1-NEXT: movzbl %al, %ecx 1852; AVX1-NEXT: shrb %al 1853; AVX1-NEXT: movzbl %al, %eax 1854; AVX1-NEXT: andl $1, %eax 1855; AVX1-NEXT: negl %eax 1856; AVX1-NEXT: movl %ecx, %edx 1857; AVX1-NEXT: andl $1, %edx 1858; AVX1-NEXT: negl %edx 1859; AVX1-NEXT: vmovd %edx, %xmm0 1860; AVX1-NEXT: vpinsrd $1, %eax, %xmm0, %xmm0 1861; AVX1-NEXT: movl %ecx, %eax 1862; AVX1-NEXT: shrb $2, %al 1863; AVX1-NEXT: movzbl %al, %eax 1864; AVX1-NEXT: andl $1, %eax 1865; AVX1-NEXT: negl %eax 1866; AVX1-NEXT: vpinsrd $2, %eax, %xmm0, %xmm0 1867; AVX1-NEXT: shrb $3, %cl 1868; AVX1-NEXT: movzbl %cl, %eax 1869; AVX1-NEXT: negl %eax 1870; AVX1-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 1871; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1872; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 1873; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1874; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1875; AVX1-NEXT: retq 1876; 1877; AVX2-LABEL: load_sext_4i1_to_4i64: 1878; AVX2: # %bb.0: # %entry 1879; AVX2-NEXT: movzbl (%rdi), %eax 1880; AVX2-NEXT: movl %eax, %ecx 1881; AVX2-NEXT: shrb $3, %cl 1882; AVX2-NEXT: movzbl %cl, %ecx 1883; AVX2-NEXT: negq %rcx 1884; AVX2-NEXT: vmovq %rcx, %xmm0 1885; AVX2-NEXT: movzbl %al, %ecx 1886; AVX2-NEXT: shrb $2, %al 1887; AVX2-NEXT: movzbl %al, %eax 1888; AVX2-NEXT: andl $1, %eax 1889; AVX2-NEXT: negq %rax 1890; AVX2-NEXT: vmovq %rax, %xmm1 1891; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1892; AVX2-NEXT: movl %ecx, %eax 1893; AVX2-NEXT: andl $1, %eax 1894; AVX2-NEXT: negq %rax 1895; AVX2-NEXT: vmovq %rax, %xmm1 1896; AVX2-NEXT: shrb %cl 1897; AVX2-NEXT: movzbl %cl, %eax 1898; AVX2-NEXT: andl $1, %eax 1899; AVX2-NEXT: negq %rax 1900; AVX2-NEXT: vmovq %rax, %xmm2 1901; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1902; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1903; AVX2-NEXT: retq 1904; 1905; AVX512F-LABEL: load_sext_4i1_to_4i64: 1906; AVX512F: # %bb.0: # %entry 1907; AVX512F-NEXT: movzbl (%rdi), %eax 1908; AVX512F-NEXT: kmovw %eax, %k1 1909; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1910; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1911; AVX512F-NEXT: retq 1912; 1913; AVX512BW-LABEL: load_sext_4i1_to_4i64: 1914; AVX512BW: # %bb.0: # %entry 1915; AVX512BW-NEXT: movzbl (%rdi), %eax 1916; AVX512BW-NEXT: kmovd %eax, %k1 1917; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1918; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 1919; AVX512BW-NEXT: retq 1920; 1921; X86-SSE2-LABEL: load_sext_4i1_to_4i64: 1922; X86-SSE2: # %bb.0: # %entry 1923; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 1924; X86-SSE2-NEXT: movzbl (%eax), %eax 1925; X86-SSE2-NEXT: movl %eax, %ecx 1926; X86-SSE2-NEXT: shrb %cl 1927; X86-SSE2-NEXT: andb $1, %cl 1928; X86-SSE2-NEXT: movzbl %cl, %ecx 1929; X86-SSE2-NEXT: movl %eax, %edx 1930; X86-SSE2-NEXT: andb $1, %dl 1931; X86-SSE2-NEXT: movzbl %dl, %edx 1932; X86-SSE2-NEXT: movd %edx, %xmm1 1933; X86-SSE2-NEXT: pinsrw $2, %ecx, %xmm1 1934; X86-SSE2-NEXT: movl %eax, %ecx 1935; X86-SSE2-NEXT: shrb $2, %cl 1936; X86-SSE2-NEXT: andb $1, %cl 1937; X86-SSE2-NEXT: movzbl %cl, %ecx 1938; X86-SSE2-NEXT: pinsrw $4, %ecx, %xmm1 1939; X86-SSE2-NEXT: shrb $3, %al 1940; X86-SSE2-NEXT: movzbl %al, %eax 1941; X86-SSE2-NEXT: pinsrw $6, %eax, %xmm1 1942; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm1[0,1,1,3] 1943; X86-SSE2-NEXT: psllq $63, %xmm0 1944; X86-SSE2-NEXT: psrad $31, %xmm0 1945; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1946; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,1,3,3] 1947; X86-SSE2-NEXT: psllq $63, %xmm1 1948; X86-SSE2-NEXT: psrad $31, %xmm1 1949; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1950; X86-SSE2-NEXT: retl 1951; 1952; X86-SSE41-LABEL: load_sext_4i1_to_4i64: 1953; X86-SSE41: # %bb.0: # %entry 1954; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1955; X86-SSE41-NEXT: movzbl (%eax), %eax 1956; X86-SSE41-NEXT: movl %eax, %ecx 1957; X86-SSE41-NEXT: shrb %cl 1958; X86-SSE41-NEXT: andb $1, %cl 1959; X86-SSE41-NEXT: movzbl %cl, %ecx 1960; X86-SSE41-NEXT: movl %eax, %edx 1961; X86-SSE41-NEXT: andb $1, %dl 1962; X86-SSE41-NEXT: movzbl %dl, %edx 1963; X86-SSE41-NEXT: movd %edx, %xmm1 1964; X86-SSE41-NEXT: pinsrb $4, %ecx, %xmm1 1965; X86-SSE41-NEXT: movl %eax, %ecx 1966; X86-SSE41-NEXT: shrb $2, %cl 1967; X86-SSE41-NEXT: andb $1, %cl 1968; X86-SSE41-NEXT: movzbl %cl, %ecx 1969; X86-SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero 1970; X86-SSE41-NEXT: pinsrb $8, %ecx, %xmm1 1971; X86-SSE41-NEXT: shrb $3, %al 1972; X86-SSE41-NEXT: movzbl %al, %eax 1973; X86-SSE41-NEXT: pinsrb $12, %eax, %xmm1 1974; X86-SSE41-NEXT: psllq $63, %xmm0 1975; X86-SSE41-NEXT: psrad $31, %xmm0 1976; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1977; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1978; X86-SSE41-NEXT: psllq $63, %xmm1 1979; X86-SSE41-NEXT: psrad $31, %xmm1 1980; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1981; X86-SSE41-NEXT: retl 1982entry: 1983 %X = load <4 x i1>, ptr %ptr 1984 %Y = sext <4 x i1> %X to <4 x i64> 1985 ret <4 x i64> %Y 1986} 1987 1988define <4 x i64> @load_sext_4i8_to_4i64(ptr%ptr) { 1989; SSE2-LABEL: load_sext_4i8_to_4i64: 1990; SSE2: # %bb.0: # %entry 1991; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1992; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1993; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1994; SSE2-NEXT: psrad $24, %xmm1 1995; SSE2-NEXT: pxor %xmm2, %xmm2 1996; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 1997; SSE2-NEXT: movdqa %xmm1, %xmm0 1998; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1999; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2000; SSE2-NEXT: retq 2001; 2002; SSSE3-LABEL: load_sext_4i8_to_4i64: 2003; SSSE3: # %bb.0: # %entry 2004; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2005; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2006; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2007; SSSE3-NEXT: psrad $24, %xmm1 2008; SSSE3-NEXT: pxor %xmm2, %xmm2 2009; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 2010; SSSE3-NEXT: movdqa %xmm1, %xmm0 2011; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 2012; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2013; SSSE3-NEXT: retq 2014; 2015; SSE41-LABEL: load_sext_4i8_to_4i64: 2016; SSE41: # %bb.0: # %entry 2017; SSE41-NEXT: pmovsxbq (%rdi), %xmm0 2018; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm1 2019; SSE41-NEXT: retq 2020; 2021; AVX1-LABEL: load_sext_4i8_to_4i64: 2022; AVX1: # %bb.0: # %entry 2023; AVX1-NEXT: vpmovsxbq 2(%rdi), %xmm0 2024; AVX1-NEXT: vpmovsxbq (%rdi), %xmm1 2025; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2026; AVX1-NEXT: retq 2027; 2028; AVX2-LABEL: load_sext_4i8_to_4i64: 2029; AVX2: # %bb.0: # %entry 2030; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0 2031; AVX2-NEXT: retq 2032; 2033; AVX512-LABEL: load_sext_4i8_to_4i64: 2034; AVX512: # %bb.0: # %entry 2035; AVX512-NEXT: vpmovsxbq (%rdi), %ymm0 2036; AVX512-NEXT: retq 2037; 2038; X86-SSE2-LABEL: load_sext_4i8_to_4i64: 2039; X86-SSE2: # %bb.0: # %entry 2040; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2041; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2042; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2043; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2044; X86-SSE2-NEXT: psrad $24, %xmm1 2045; X86-SSE2-NEXT: pxor %xmm2, %xmm2 2046; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 2047; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2048; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 2049; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2050; X86-SSE2-NEXT: retl 2051; 2052; X86-SSE41-LABEL: load_sext_4i8_to_4i64: 2053; X86-SSE41: # %bb.0: # %entry 2054; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2055; X86-SSE41-NEXT: pmovsxbq (%eax), %xmm0 2056; X86-SSE41-NEXT: pmovsxbq 2(%eax), %xmm1 2057; X86-SSE41-NEXT: retl 2058entry: 2059 %X = load <4 x i8>, ptr %ptr 2060 %Y = sext <4 x i8> %X to <4 x i64> 2061 ret <4 x i64> %Y 2062} 2063 2064define <2 x i64> @load_sext_4i8_to_4i64_extract(ptr%ptr) { 2065; SSE2-LABEL: load_sext_4i8_to_4i64_extract: 2066; SSE2: # %bb.0: 2067; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2068; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2069; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2070; SSE2-NEXT: psrad $24, %xmm0 2071; SSE2-NEXT: pxor %xmm1, %xmm1 2072; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 2073; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2074; SSE2-NEXT: retq 2075; 2076; SSSE3-LABEL: load_sext_4i8_to_4i64_extract: 2077; SSSE3: # %bb.0: 2078; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2079; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2080; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2081; SSSE3-NEXT: psrad $24, %xmm0 2082; SSSE3-NEXT: pxor %xmm1, %xmm1 2083; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 2084; SSSE3-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2085; SSSE3-NEXT: retq 2086; 2087; SSE41-LABEL: load_sext_4i8_to_4i64_extract: 2088; SSE41: # %bb.0: 2089; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm0 2090; SSE41-NEXT: retq 2091; 2092; AVX1-LABEL: load_sext_4i8_to_4i64_extract: 2093; AVX1: # %bb.0: 2094; AVX1-NEXT: vpmovsxbq 2(%rdi), %xmm0 2095; AVX1-NEXT: retq 2096; 2097; AVX2-LABEL: load_sext_4i8_to_4i64_extract: 2098; AVX2: # %bb.0: 2099; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0 2100; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 2101; AVX2-NEXT: vzeroupper 2102; AVX2-NEXT: retq 2103; 2104; AVX512-LABEL: load_sext_4i8_to_4i64_extract: 2105; AVX512: # %bb.0: 2106; AVX512-NEXT: vpmovsxbq (%rdi), %ymm0 2107; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 2108; AVX512-NEXT: vzeroupper 2109; AVX512-NEXT: retq 2110; 2111; X86-SSE2-LABEL: load_sext_4i8_to_4i64_extract: 2112; X86-SSE2: # %bb.0: 2113; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2114; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2115; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2116; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2117; X86-SSE2-NEXT: psrad $24, %xmm0 2118; X86-SSE2-NEXT: pxor %xmm1, %xmm1 2119; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 2120; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2121; X86-SSE2-NEXT: retl 2122; 2123; X86-SSE41-LABEL: load_sext_4i8_to_4i64_extract: 2124; X86-SSE41: # %bb.0: 2125; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2126; X86-SSE41-NEXT: pmovsxbq 2(%eax), %xmm0 2127; X86-SSE41-NEXT: retl 2128 %ld = load <4 x i8>, ptr %ptr 2129 %sext = sext <4 x i8> %ld to <4 x i64> 2130 %extract = shufflevector <4 x i64> %sext, <4 x i64> undef, <2 x i32> <i32 2, i32 3> 2131 ret <2 x i64> %extract 2132} 2133 2134define <8 x i16> @load_sext_8i1_to_8i16(ptr%ptr) { 2135; SSE-LABEL: load_sext_8i1_to_8i16: 2136; SSE: # %bb.0: # %entry 2137; SSE-NEXT: movzbl (%rdi), %eax 2138; SSE-NEXT: movd %eax, %xmm0 2139; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2140; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2141; SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 2142; SSE-NEXT: pand %xmm1, %xmm0 2143; SSE-NEXT: pcmpeqw %xmm1, %xmm0 2144; SSE-NEXT: retq 2145; 2146; AVX1-LABEL: load_sext_8i1_to_8i16: 2147; AVX1: # %bb.0: # %entry 2148; AVX1-NEXT: movzbl (%rdi), %eax 2149; AVX1-NEXT: vmovd %eax, %xmm0 2150; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2151; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2152; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 2153; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 2154; AVX1-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 2155; AVX1-NEXT: retq 2156; 2157; AVX2-LABEL: load_sext_8i1_to_8i16: 2158; AVX2: # %bb.0: # %entry 2159; AVX2-NEXT: vpbroadcastb (%rdi), %xmm0 2160; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 2161; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 2162; AVX2-NEXT: vpcmpeqw %xmm1, %xmm0, %xmm0 2163; AVX2-NEXT: retq 2164; 2165; AVX512F-LABEL: load_sext_8i1_to_8i16: 2166; AVX512F: # %bb.0: # %entry 2167; AVX512F-NEXT: movzbl (%rdi), %eax 2168; AVX512F-NEXT: kmovw %eax, %k1 2169; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2170; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 2171; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 killed $ymm0 2172; AVX512F-NEXT: vzeroupper 2173; AVX512F-NEXT: retq 2174; 2175; AVX512BW-LABEL: load_sext_8i1_to_8i16: 2176; AVX512BW: # %bb.0: # %entry 2177; AVX512BW-NEXT: movzbl (%rdi), %eax 2178; AVX512BW-NEXT: kmovd %eax, %k0 2179; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 2180; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2181; AVX512BW-NEXT: vzeroupper 2182; AVX512BW-NEXT: retq 2183; 2184; X86-SSE-LABEL: load_sext_8i1_to_8i16: 2185; X86-SSE: # %bb.0: # %entry 2186; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2187; X86-SSE-NEXT: movzbl (%eax), %eax 2188; X86-SSE-NEXT: movd %eax, %xmm0 2189; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2190; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2191; X86-SSE-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128] 2192; X86-SSE-NEXT: pand %xmm1, %xmm0 2193; X86-SSE-NEXT: pcmpeqw %xmm1, %xmm0 2194; X86-SSE-NEXT: retl 2195entry: 2196 %X = load <8 x i1>, ptr %ptr 2197 %Y = sext <8 x i1> %X to <8 x i16> 2198 ret <8 x i16> %Y 2199} 2200 2201define <8 x i16> @load_sext_8i8_to_8i16(ptr%ptr) { 2202; SSE2-LABEL: load_sext_8i8_to_8i16: 2203; SSE2: # %bb.0: # %entry 2204; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2205; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2206; SSE2-NEXT: psraw $8, %xmm0 2207; SSE2-NEXT: retq 2208; 2209; SSSE3-LABEL: load_sext_8i8_to_8i16: 2210; SSSE3: # %bb.0: # %entry 2211; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2212; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2213; SSSE3-NEXT: psraw $8, %xmm0 2214; SSSE3-NEXT: retq 2215; 2216; SSE41-LABEL: load_sext_8i8_to_8i16: 2217; SSE41: # %bb.0: # %entry 2218; SSE41-NEXT: pmovsxbw (%rdi), %xmm0 2219; SSE41-NEXT: retq 2220; 2221; AVX-LABEL: load_sext_8i8_to_8i16: 2222; AVX: # %bb.0: # %entry 2223; AVX-NEXT: vpmovsxbw (%rdi), %xmm0 2224; AVX-NEXT: retq 2225; 2226; X86-SSE2-LABEL: load_sext_8i8_to_8i16: 2227; X86-SSE2: # %bb.0: # %entry 2228; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2229; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2230; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2231; X86-SSE2-NEXT: psraw $8, %xmm0 2232; X86-SSE2-NEXT: retl 2233; 2234; X86-SSE41-LABEL: load_sext_8i8_to_8i16: 2235; X86-SSE41: # %bb.0: # %entry 2236; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2237; X86-SSE41-NEXT: pmovsxbw (%eax), %xmm0 2238; X86-SSE41-NEXT: retl 2239entry: 2240 %X = load <8 x i8>, ptr %ptr 2241 %Y = sext <8 x i8> %X to <8 x i16> 2242 ret <8 x i16> %Y 2243} 2244 2245define <8 x i64> @load_sext_8i8_to_8i64(ptr%ptr) { 2246; SSE2-LABEL: load_sext_8i8_to_8i64: 2247; SSE2: # %bb.0: # %entry 2248; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2249; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2250; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2251; SSE2-NEXT: psrad $24, %xmm1 2252; SSE2-NEXT: pxor %xmm4, %xmm4 2253; SSE2-NEXT: pxor %xmm3, %xmm3 2254; SSE2-NEXT: pcmpgtd %xmm1, %xmm3 2255; SSE2-NEXT: movdqa %xmm1, %xmm0 2256; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 2257; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] 2258; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2259; SSE2-NEXT: psrad $24, %xmm3 2260; SSE2-NEXT: pcmpgtd %xmm3, %xmm4 2261; SSE2-NEXT: movdqa %xmm3, %xmm2 2262; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 2263; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 2264; SSE2-NEXT: retq 2265; 2266; SSSE3-LABEL: load_sext_8i8_to_8i64: 2267; SSSE3: # %bb.0: # %entry 2268; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2269; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2270; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2271; SSSE3-NEXT: psrad $24, %xmm1 2272; SSSE3-NEXT: pxor %xmm4, %xmm4 2273; SSSE3-NEXT: pxor %xmm3, %xmm3 2274; SSSE3-NEXT: pcmpgtd %xmm1, %xmm3 2275; SSSE3-NEXT: movdqa %xmm1, %xmm0 2276; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 2277; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] 2278; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2279; SSSE3-NEXT: psrad $24, %xmm3 2280; SSSE3-NEXT: pcmpgtd %xmm3, %xmm4 2281; SSSE3-NEXT: movdqa %xmm3, %xmm2 2282; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 2283; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 2284; SSSE3-NEXT: retq 2285; 2286; SSE41-LABEL: load_sext_8i8_to_8i64: 2287; SSE41: # %bb.0: # %entry 2288; SSE41-NEXT: pmovsxbq (%rdi), %xmm0 2289; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm1 2290; SSE41-NEXT: pmovsxbq 4(%rdi), %xmm2 2291; SSE41-NEXT: pmovsxbq 6(%rdi), %xmm3 2292; SSE41-NEXT: retq 2293; 2294; AVX1-LABEL: load_sext_8i8_to_8i64: 2295; AVX1: # %bb.0: # %entry 2296; AVX1-NEXT: vpmovsxbq 6(%rdi), %xmm1 2297; AVX1-NEXT: vpmovsxbq 4(%rdi), %xmm2 2298; AVX1-NEXT: vpmovsxbq 2(%rdi), %xmm0 2299; AVX1-NEXT: vpmovsxbq (%rdi), %xmm3 2300; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm3, %ymm0 2301; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 2302; AVX1-NEXT: retq 2303; 2304; AVX2-LABEL: load_sext_8i8_to_8i64: 2305; AVX2: # %bb.0: # %entry 2306; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0 2307; AVX2-NEXT: vpmovsxbq 4(%rdi), %ymm1 2308; AVX2-NEXT: retq 2309; 2310; AVX512-LABEL: load_sext_8i8_to_8i64: 2311; AVX512: # %bb.0: # %entry 2312; AVX512-NEXT: vpmovsxbq (%rdi), %zmm0 2313; AVX512-NEXT: retq 2314; 2315; X86-SSE2-LABEL: load_sext_8i8_to_8i64: 2316; X86-SSE2: # %bb.0: # %entry 2317; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2318; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2319; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2320; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2321; X86-SSE2-NEXT: psrad $24, %xmm1 2322; X86-SSE2-NEXT: pxor %xmm4, %xmm4 2323; X86-SSE2-NEXT: pxor %xmm3, %xmm3 2324; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm3 2325; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2326; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 2327; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm3[2],xmm1[3],xmm3[3] 2328; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2329; X86-SSE2-NEXT: psrad $24, %xmm3 2330; X86-SSE2-NEXT: pcmpgtd %xmm3, %xmm4 2331; X86-SSE2-NEXT: movdqa %xmm3, %xmm2 2332; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 2333; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 2334; X86-SSE2-NEXT: retl 2335; 2336; X86-SSE41-LABEL: load_sext_8i8_to_8i64: 2337; X86-SSE41: # %bb.0: # %entry 2338; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2339; X86-SSE41-NEXT: pmovsxbq (%eax), %xmm0 2340; X86-SSE41-NEXT: pmovsxbq 2(%eax), %xmm1 2341; X86-SSE41-NEXT: pmovsxbq 4(%eax), %xmm2 2342; X86-SSE41-NEXT: pmovsxbq 6(%eax), %xmm3 2343; X86-SSE41-NEXT: retl 2344entry: 2345 %X = load <8 x i8>, ptr %ptr 2346 %Y = sext <8 x i8> %X to <8 x i64> 2347 ret <8 x i64> %Y 2348} 2349 2350define <8 x i32> @load_sext_8i1_to_8i32(ptr%ptr) { 2351; SSE-LABEL: load_sext_8i1_to_8i32: 2352; SSE: # %bb.0: # %entry 2353; SSE-NEXT: movzbl (%rdi), %eax 2354; SSE-NEXT: movd %eax, %xmm0 2355; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 2356; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8] 2357; SSE-NEXT: movdqa %xmm1, %xmm0 2358; SSE-NEXT: pand %xmm2, %xmm0 2359; SSE-NEXT: pcmpeqd %xmm2, %xmm0 2360; SSE-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128] 2361; SSE-NEXT: pand %xmm2, %xmm1 2362; SSE-NEXT: pcmpeqd %xmm2, %xmm1 2363; SSE-NEXT: retq 2364; 2365; AVX1-LABEL: load_sext_8i1_to_8i32: 2366; AVX1: # %bb.0: # %entry 2367; AVX1-NEXT: movzbl (%rdi), %eax 2368; AVX1-NEXT: vmovd %eax, %xmm0 2369; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2370; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2371; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 2372; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 2373; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2374; AVX1-NEXT: vpcmpeqd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2375; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2376; AVX1-NEXT: retq 2377; 2378; AVX2-LABEL: load_sext_8i1_to_8i32: 2379; AVX2: # %bb.0: # %entry 2380; AVX2-NEXT: vpbroadcastb (%rdi), %ymm0 2381; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128] 2382; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 2383; AVX2-NEXT: vpcmpeqd %ymm1, %ymm0, %ymm0 2384; AVX2-NEXT: retq 2385; 2386; AVX512F-LABEL: load_sext_8i1_to_8i32: 2387; AVX512F: # %bb.0: # %entry 2388; AVX512F-NEXT: movzbl (%rdi), %eax 2389; AVX512F-NEXT: kmovw %eax, %k1 2390; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2391; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2392; AVX512F-NEXT: retq 2393; 2394; AVX512BW-LABEL: load_sext_8i1_to_8i32: 2395; AVX512BW: # %bb.0: # %entry 2396; AVX512BW-NEXT: movzbl (%rdi), %eax 2397; AVX512BW-NEXT: kmovd %eax, %k1 2398; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2399; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2400; AVX512BW-NEXT: retq 2401; 2402; X86-SSE-LABEL: load_sext_8i1_to_8i32: 2403; X86-SSE: # %bb.0: # %entry 2404; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2405; X86-SSE-NEXT: movzbl (%eax), %eax 2406; X86-SSE-NEXT: movd %eax, %xmm0 2407; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 2408; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8] 2409; X86-SSE-NEXT: movdqa %xmm1, %xmm0 2410; X86-SSE-NEXT: pand %xmm2, %xmm0 2411; X86-SSE-NEXT: pcmpeqd %xmm2, %xmm0 2412; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [16,32,64,128] 2413; X86-SSE-NEXT: pand %xmm2, %xmm1 2414; X86-SSE-NEXT: pcmpeqd %xmm2, %xmm1 2415; X86-SSE-NEXT: retl 2416entry: 2417 %X = load <8 x i1>, ptr %ptr 2418 %Y = sext <8 x i1> %X to <8 x i32> 2419 ret <8 x i32> %Y 2420} 2421 2422define <8 x i32> @load_sext_8i8_to_8i32(ptr%ptr) { 2423; SSE2-LABEL: load_sext_8i8_to_8i32: 2424; SSE2: # %bb.0: # %entry 2425; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2426; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2427; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2428; SSE2-NEXT: psrad $24, %xmm0 2429; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 2430; SSE2-NEXT: psrad $24, %xmm1 2431; SSE2-NEXT: retq 2432; 2433; SSSE3-LABEL: load_sext_8i8_to_8i32: 2434; SSSE3: # %bb.0: # %entry 2435; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2436; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2437; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2438; SSSE3-NEXT: psrad $24, %xmm0 2439; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 2440; SSSE3-NEXT: psrad $24, %xmm1 2441; SSSE3-NEXT: retq 2442; 2443; SSE41-LABEL: load_sext_8i8_to_8i32: 2444; SSE41: # %bb.0: # %entry 2445; SSE41-NEXT: pmovsxbd (%rdi), %xmm0 2446; SSE41-NEXT: pmovsxbd 4(%rdi), %xmm1 2447; SSE41-NEXT: retq 2448; 2449; AVX1-LABEL: load_sext_8i8_to_8i32: 2450; AVX1: # %bb.0: # %entry 2451; AVX1-NEXT: vpmovsxbd 4(%rdi), %xmm0 2452; AVX1-NEXT: vpmovsxbd (%rdi), %xmm1 2453; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2454; AVX1-NEXT: retq 2455; 2456; AVX2-LABEL: load_sext_8i8_to_8i32: 2457; AVX2: # %bb.0: # %entry 2458; AVX2-NEXT: vpmovsxbd (%rdi), %ymm0 2459; AVX2-NEXT: retq 2460; 2461; AVX512-LABEL: load_sext_8i8_to_8i32: 2462; AVX512: # %bb.0: # %entry 2463; AVX512-NEXT: vpmovsxbd (%rdi), %ymm0 2464; AVX512-NEXT: retq 2465; 2466; X86-SSE2-LABEL: load_sext_8i8_to_8i32: 2467; X86-SSE2: # %bb.0: # %entry 2468; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2469; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2470; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2471; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2472; X86-SSE2-NEXT: psrad $24, %xmm0 2473; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 2474; X86-SSE2-NEXT: psrad $24, %xmm1 2475; X86-SSE2-NEXT: retl 2476; 2477; X86-SSE41-LABEL: load_sext_8i8_to_8i32: 2478; X86-SSE41: # %bb.0: # %entry 2479; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2480; X86-SSE41-NEXT: pmovsxbd (%eax), %xmm0 2481; X86-SSE41-NEXT: pmovsxbd 4(%eax), %xmm1 2482; X86-SSE41-NEXT: retl 2483entry: 2484 %X = load <8 x i8>, ptr %ptr 2485 %Y = sext <8 x i8> %X to <8 x i32> 2486 ret <8 x i32> %Y 2487} 2488 2489define <16 x i8> @load_sext_16i1_to_16i8(ptr%ptr) nounwind readnone { 2490; SSE2-LABEL: load_sext_16i1_to_16i8: 2491; SSE2: # %bb.0: # %entry 2492; SSE2-NEXT: movzwl (%rdi), %eax 2493; SSE2-NEXT: movd %eax, %xmm0 2494; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2495; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7] 2496; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2497; SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 2498; SSE2-NEXT: pand %xmm1, %xmm0 2499; SSE2-NEXT: pcmpeqb %xmm1, %xmm0 2500; SSE2-NEXT: retq 2501; 2502; SSSE3-LABEL: load_sext_16i1_to_16i8: 2503; SSSE3: # %bb.0: # %entry 2504; SSSE3-NEXT: movzwl (%rdi), %eax 2505; SSSE3-NEXT: movd %eax, %xmm0 2506; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 2507; SSSE3-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 2508; SSSE3-NEXT: pand %xmm1, %xmm0 2509; SSSE3-NEXT: pcmpeqb %xmm1, %xmm0 2510; SSSE3-NEXT: retq 2511; 2512; SSE41-LABEL: load_sext_16i1_to_16i8: 2513; SSE41: # %bb.0: # %entry 2514; SSE41-NEXT: movzwl (%rdi), %eax 2515; SSE41-NEXT: movd %eax, %xmm0 2516; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 2517; SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 2518; SSE41-NEXT: pand %xmm1, %xmm0 2519; SSE41-NEXT: pcmpeqb %xmm1, %xmm0 2520; SSE41-NEXT: retq 2521; 2522; AVX1-LABEL: load_sext_16i1_to_16i8: 2523; AVX1: # %bb.0: # %entry 2524; AVX1-NEXT: movzwl (%rdi), %eax 2525; AVX1-NEXT: vmovd %eax, %xmm0 2526; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 2527; AVX1-NEXT: vmovddup {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] 2528; AVX1-NEXT: # xmm1 = mem[0,0] 2529; AVX1-NEXT: vpand %xmm1, %xmm0, %xmm0 2530; AVX1-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 2531; AVX1-NEXT: retq 2532; 2533; AVX2-LABEL: load_sext_16i1_to_16i8: 2534; AVX2: # %bb.0: # %entry 2535; AVX2-NEXT: movzwl (%rdi), %eax 2536; AVX2-NEXT: vmovd %eax, %xmm0 2537; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 2538; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [9241421688590303745,9241421688590303745] 2539; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 2540; AVX2-NEXT: vpcmpeqb %xmm1, %xmm0, %xmm0 2541; AVX2-NEXT: retq 2542; 2543; AVX512F-LABEL: load_sext_16i1_to_16i8: 2544; AVX512F: # %bb.0: # %entry 2545; AVX512F-NEXT: kmovw (%rdi), %k1 2546; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2547; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 2548; AVX512F-NEXT: vzeroupper 2549; AVX512F-NEXT: retq 2550; 2551; AVX512BW-LABEL: load_sext_16i1_to_16i8: 2552; AVX512BW: # %bb.0: # %entry 2553; AVX512BW-NEXT: kmovw (%rdi), %k0 2554; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 2555; AVX512BW-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 2556; AVX512BW-NEXT: vzeroupper 2557; AVX512BW-NEXT: retq 2558; 2559; X86-SSE2-LABEL: load_sext_16i1_to_16i8: 2560; X86-SSE2: # %bb.0: # %entry 2561; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2562; X86-SSE2-NEXT: movzwl (%eax), %eax 2563; X86-SSE2-NEXT: movd %eax, %xmm0 2564; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2565; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,1,1,4,5,6,7] 2566; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2567; X86-SSE2-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 2568; X86-SSE2-NEXT: pand %xmm1, %xmm0 2569; X86-SSE2-NEXT: pcmpeqb %xmm1, %xmm0 2570; X86-SSE2-NEXT: retl 2571; 2572; X86-SSE41-LABEL: load_sext_16i1_to_16i8: 2573; X86-SSE41: # %bb.0: # %entry 2574; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2575; X86-SSE41-NEXT: movzwl (%eax), %eax 2576; X86-SSE41-NEXT: movd %eax, %xmm0 2577; X86-SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1] 2578; X86-SSE41-NEXT: movdqa {{.*#+}} xmm1 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 2579; X86-SSE41-NEXT: pand %xmm1, %xmm0 2580; X86-SSE41-NEXT: pcmpeqb %xmm1, %xmm0 2581; X86-SSE41-NEXT: retl 2582entry: 2583 %X = load <16 x i1>, ptr %ptr 2584 %Y = sext <16 x i1> %X to <16 x i8> 2585 ret <16 x i8> %Y 2586} 2587 2588define <16 x i16> @load_sext_16i1_to_16i16(ptr%ptr) { 2589; SSE-LABEL: load_sext_16i1_to_16i16: 2590; SSE: # %bb.0: # %entry 2591; SSE-NEXT: movzwl (%rdi), %eax 2592; SSE-NEXT: movd %eax, %xmm0 2593; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2594; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 2595; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128] 2596; SSE-NEXT: movdqa %xmm1, %xmm0 2597; SSE-NEXT: pand %xmm2, %xmm0 2598; SSE-NEXT: pcmpeqw %xmm2, %xmm0 2599; SSE-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768] 2600; SSE-NEXT: pand %xmm2, %xmm1 2601; SSE-NEXT: pcmpeqw %xmm2, %xmm1 2602; SSE-NEXT: retq 2603; 2604; AVX1-LABEL: load_sext_16i1_to_16i16: 2605; AVX1: # %bb.0: # %entry 2606; AVX1-NEXT: movzwl (%rdi), %eax 2607; AVX1-NEXT: vmovd %eax, %xmm0 2608; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2609; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 2610; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 2611; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 2612; AVX1-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm1 2613; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2614; AVX1-NEXT: vpcmpeqw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 2615; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2616; AVX1-NEXT: retq 2617; 2618; AVX2-LABEL: load_sext_16i1_to_16i16: 2619; AVX2: # %bb.0: # %entry 2620; AVX2-NEXT: vpbroadcastw (%rdi), %ymm0 2621; AVX2-NEXT: vmovdqa {{.*#+}} ymm1 = [1,2,4,8,16,32,64,128,256,512,1024,2048,4096,8192,16384,32768] 2622; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 2623; AVX2-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 2624; AVX2-NEXT: retq 2625; 2626; AVX512F-LABEL: load_sext_16i1_to_16i16: 2627; AVX512F: # %bb.0: # %entry 2628; AVX512F-NEXT: kmovw (%rdi), %k1 2629; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2630; AVX512F-NEXT: vpmovdw %zmm0, %ymm0 2631; AVX512F-NEXT: retq 2632; 2633; AVX512BW-LABEL: load_sext_16i1_to_16i16: 2634; AVX512BW: # %bb.0: # %entry 2635; AVX512BW-NEXT: kmovw (%rdi), %k0 2636; AVX512BW-NEXT: vpmovm2w %k0, %zmm0 2637; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2638; AVX512BW-NEXT: retq 2639; 2640; X86-SSE-LABEL: load_sext_16i1_to_16i16: 2641; X86-SSE: # %bb.0: # %entry 2642; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2643; X86-SSE-NEXT: movzwl (%eax), %eax 2644; X86-SSE-NEXT: movd %eax, %xmm0 2645; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 2646; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[0,0,0,0] 2647; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128] 2648; X86-SSE-NEXT: movdqa %xmm1, %xmm0 2649; X86-SSE-NEXT: pand %xmm2, %xmm0 2650; X86-SSE-NEXT: pcmpeqw %xmm2, %xmm0 2651; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [256,512,1024,2048,4096,8192,16384,32768] 2652; X86-SSE-NEXT: pand %xmm2, %xmm1 2653; X86-SSE-NEXT: pcmpeqw %xmm2, %xmm1 2654; X86-SSE-NEXT: retl 2655entry: 2656 %X = load <16 x i1>, ptr %ptr 2657 %Y = sext <16 x i1> %X to <16 x i16> 2658 ret <16 x i16> %Y 2659} 2660 2661define <32 x i8> @load_sext_32i1_to_32i8(ptr%ptr) nounwind readnone { 2662; SSE-LABEL: load_sext_32i1_to_32i8: 2663; SSE: # %bb.0: # %entry 2664; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2665; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2666; SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7] 2667; SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2668; SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 2669; SSE-NEXT: pand %xmm2, %xmm0 2670; SSE-NEXT: pcmpeqb %xmm2, %xmm0 2671; SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7] 2672; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 2673; SSE-NEXT: pand %xmm2, %xmm1 2674; SSE-NEXT: pcmpeqb %xmm2, %xmm1 2675; SSE-NEXT: retq 2676; 2677; AVX1-LABEL: load_sext_32i1_to_32i8: 2678; AVX1: # %bb.0: # %entry 2679; AVX1-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2680; AVX1-NEXT: vpunpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2681; AVX1-NEXT: vpshuflw {{.*#+}} xmm1 = xmm0[0,0,1,1,4,5,6,7] 2682; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,2,3,3,4,5,6,7] 2683; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2684; AVX1-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,1,4,4,5,5] 2685; AVX1-NEXT: vandps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 2686; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 2687; AVX1-NEXT: vmovddup {{.*#+}} xmm2 = [9241421688590303745,9241421688590303745] 2688; AVX1-NEXT: # xmm2 = mem[0,0] 2689; AVX1-NEXT: vpcmpeqb %xmm2, %xmm1, %xmm1 2690; AVX1-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm0 2691; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2692; AVX1-NEXT: retq 2693; 2694; AVX2-LABEL: load_sext_32i1_to_32i8: 2695; AVX2: # %bb.0: # %entry 2696; AVX2-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2697; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1] 2698; AVX2-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[0,0,0,0,0,0,0,0,1,1,1,1,1,1,1,1,18,18,18,18,18,18,18,18,19,19,19,19,19,19,19,19] 2699; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [9241421688590303745,9241421688590303745,9241421688590303745,9241421688590303745] 2700; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 2701; AVX2-NEXT: vpcmpeqb %ymm1, %ymm0, %ymm0 2702; AVX2-NEXT: retq 2703; 2704; AVX512F-LABEL: load_sext_32i1_to_32i8: 2705; AVX512F: # %bb.0: # %entry 2706; AVX512F-NEXT: kmovw (%rdi), %k1 2707; AVX512F-NEXT: kmovw 2(%rdi), %k2 2708; AVX512F-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2709; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 2710; AVX512F-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} 2711; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 2712; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 2713; AVX512F-NEXT: retq 2714; 2715; AVX512BW-LABEL: load_sext_32i1_to_32i8: 2716; AVX512BW: # %bb.0: # %entry 2717; AVX512BW-NEXT: kmovd (%rdi), %k0 2718; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 2719; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 2720; AVX512BW-NEXT: retq 2721; 2722; X86-SSE-LABEL: load_sext_32i1_to_32i8: 2723; X86-SSE: # %bb.0: # %entry 2724; X86-SSE-NEXT: movl {{[0-9]+}}(%esp), %eax 2725; X86-SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2726; X86-SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2727; X86-SSE-NEXT: pshuflw {{.*#+}} xmm0 = xmm1[0,0,1,1,4,5,6,7] 2728; X86-SSE-NEXT: pshufd {{.*#+}} xmm0 = xmm0[0,0,1,1] 2729; X86-SSE-NEXT: movdqa {{.*#+}} xmm2 = [1,2,4,8,16,32,64,128,1,2,4,8,16,32,64,128] 2730; X86-SSE-NEXT: pand %xmm2, %xmm0 2731; X86-SSE-NEXT: pcmpeqb %xmm2, %xmm0 2732; X86-SSE-NEXT: pshuflw {{.*#+}} xmm1 = xmm1[2,2,3,3,4,5,6,7] 2733; X86-SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm1[0,0,1,1] 2734; X86-SSE-NEXT: pand %xmm2, %xmm1 2735; X86-SSE-NEXT: pcmpeqb %xmm2, %xmm1 2736; X86-SSE-NEXT: retl 2737entry: 2738 %X = load <32 x i1>, ptr %ptr 2739 %Y = sext <32 x i1> %X to <32 x i8> 2740 ret <32 x i8> %Y 2741} 2742 2743define <16 x i16> @load_sext_16i8_to_16i16(ptr%ptr) { 2744; SSE2-LABEL: load_sext_16i8_to_16i16: 2745; SSE2: # %bb.0: # %entry 2746; SSE2-NEXT: movdqa (%rdi), %xmm1 2747; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2748; SSE2-NEXT: psraw $8, %xmm0 2749; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2750; SSE2-NEXT: psraw $8, %xmm1 2751; SSE2-NEXT: retq 2752; 2753; SSSE3-LABEL: load_sext_16i8_to_16i16: 2754; SSSE3: # %bb.0: # %entry 2755; SSSE3-NEXT: movdqa (%rdi), %xmm1 2756; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2757; SSSE3-NEXT: psraw $8, %xmm0 2758; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2759; SSSE3-NEXT: psraw $8, %xmm1 2760; SSSE3-NEXT: retq 2761; 2762; SSE41-LABEL: load_sext_16i8_to_16i16: 2763; SSE41: # %bb.0: # %entry 2764; SSE41-NEXT: pmovsxbw (%rdi), %xmm0 2765; SSE41-NEXT: pmovsxbw 8(%rdi), %xmm1 2766; SSE41-NEXT: retq 2767; 2768; AVX1-LABEL: load_sext_16i8_to_16i16: 2769; AVX1: # %bb.0: # %entry 2770; AVX1-NEXT: vpmovsxbw 8(%rdi), %xmm0 2771; AVX1-NEXT: vpmovsxbw (%rdi), %xmm1 2772; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2773; AVX1-NEXT: retq 2774; 2775; AVX2-LABEL: load_sext_16i8_to_16i16: 2776; AVX2: # %bb.0: # %entry 2777; AVX2-NEXT: vpmovsxbw (%rdi), %ymm0 2778; AVX2-NEXT: retq 2779; 2780; AVX512-LABEL: load_sext_16i8_to_16i16: 2781; AVX512: # %bb.0: # %entry 2782; AVX512-NEXT: vpmovsxbw (%rdi), %ymm0 2783; AVX512-NEXT: retq 2784; 2785; X86-SSE2-LABEL: load_sext_16i8_to_16i16: 2786; X86-SSE2: # %bb.0: # %entry 2787; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2788; X86-SSE2-NEXT: movdqa (%eax), %xmm1 2789; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 2790; X86-SSE2-NEXT: psraw $8, %xmm0 2791; X86-SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 2792; X86-SSE2-NEXT: psraw $8, %xmm1 2793; X86-SSE2-NEXT: retl 2794; 2795; X86-SSE41-LABEL: load_sext_16i8_to_16i16: 2796; X86-SSE41: # %bb.0: # %entry 2797; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2798; X86-SSE41-NEXT: pmovsxbw (%eax), %xmm0 2799; X86-SSE41-NEXT: pmovsxbw 8(%eax), %xmm1 2800; X86-SSE41-NEXT: retl 2801entry: 2802 %X = load <16 x i8>, ptr %ptr 2803 %Y = sext <16 x i8> %X to <16 x i16> 2804 ret <16 x i16> %Y 2805} 2806 2807define <2 x i64> @load_sext_2i16_to_2i64(ptr%ptr) { 2808; SSE2-LABEL: load_sext_2i16_to_2i64: 2809; SSE2: # %bb.0: # %entry 2810; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2811; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] 2812; SSE2-NEXT: pxor %xmm1, %xmm1 2813; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 2814; SSE2-NEXT: psrad $16, %xmm0 2815; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2816; SSE2-NEXT: retq 2817; 2818; SSSE3-LABEL: load_sext_2i16_to_2i64: 2819; SSSE3: # %bb.0: # %entry 2820; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2821; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] 2822; SSSE3-NEXT: pxor %xmm1, %xmm1 2823; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 2824; SSSE3-NEXT: psrad $16, %xmm0 2825; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2826; SSSE3-NEXT: retq 2827; 2828; SSE41-LABEL: load_sext_2i16_to_2i64: 2829; SSE41: # %bb.0: # %entry 2830; SSE41-NEXT: pmovsxwq (%rdi), %xmm0 2831; SSE41-NEXT: retq 2832; 2833; AVX-LABEL: load_sext_2i16_to_2i64: 2834; AVX: # %bb.0: # %entry 2835; AVX-NEXT: vpmovsxwq (%rdi), %xmm0 2836; AVX-NEXT: retq 2837; 2838; X86-SSE2-LABEL: load_sext_2i16_to_2i64: 2839; X86-SSE2: # %bb.0: # %entry 2840; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2841; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2842; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,2,1,4,5,6,7] 2843; X86-SSE2-NEXT: pxor %xmm1, %xmm1 2844; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 2845; X86-SSE2-NEXT: psrad $16, %xmm0 2846; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 2847; X86-SSE2-NEXT: retl 2848; 2849; X86-SSE41-LABEL: load_sext_2i16_to_2i64: 2850; X86-SSE41: # %bb.0: # %entry 2851; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2852; X86-SSE41-NEXT: pmovsxwq (%eax), %xmm0 2853; X86-SSE41-NEXT: retl 2854entry: 2855 %X = load <2 x i16>, ptr %ptr 2856 %Y = sext <2 x i16> %X to <2 x i64> 2857 ret <2 x i64> %Y 2858} 2859 2860define <4 x i32> @load_sext_4i16_to_4i32(ptr%ptr) { 2861; SSE2-LABEL: load_sext_4i16_to_4i32: 2862; SSE2: # %bb.0: # %entry 2863; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2864; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2865; SSE2-NEXT: psrad $16, %xmm0 2866; SSE2-NEXT: retq 2867; 2868; SSSE3-LABEL: load_sext_4i16_to_4i32: 2869; SSSE3: # %bb.0: # %entry 2870; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2871; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2872; SSSE3-NEXT: psrad $16, %xmm0 2873; SSSE3-NEXT: retq 2874; 2875; SSE41-LABEL: load_sext_4i16_to_4i32: 2876; SSE41: # %bb.0: # %entry 2877; SSE41-NEXT: pmovsxwd (%rdi), %xmm0 2878; SSE41-NEXT: retq 2879; 2880; AVX-LABEL: load_sext_4i16_to_4i32: 2881; AVX: # %bb.0: # %entry 2882; AVX-NEXT: vpmovsxwd (%rdi), %xmm0 2883; AVX-NEXT: retq 2884; 2885; X86-SSE2-LABEL: load_sext_4i16_to_4i32: 2886; X86-SSE2: # %bb.0: # %entry 2887; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2888; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2889; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2890; X86-SSE2-NEXT: psrad $16, %xmm0 2891; X86-SSE2-NEXT: retl 2892; 2893; X86-SSE41-LABEL: load_sext_4i16_to_4i32: 2894; X86-SSE41: # %bb.0: # %entry 2895; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2896; X86-SSE41-NEXT: pmovsxwd (%eax), %xmm0 2897; X86-SSE41-NEXT: retl 2898entry: 2899 %X = load <4 x i16>, ptr %ptr 2900 %Y = sext <4 x i16> %X to <4 x i32> 2901 ret <4 x i32> %Y 2902} 2903 2904define <4 x i64> @load_sext_4i16_to_4i64(ptr%ptr) { 2905; SSE2-LABEL: load_sext_4i16_to_4i64: 2906; SSE2: # %bb.0: # %entry 2907; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2908; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2909; SSE2-NEXT: psrad $16, %xmm1 2910; SSE2-NEXT: pxor %xmm2, %xmm2 2911; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 2912; SSE2-NEXT: movdqa %xmm1, %xmm0 2913; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 2914; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2915; SSE2-NEXT: retq 2916; 2917; SSSE3-LABEL: load_sext_4i16_to_4i64: 2918; SSSE3: # %bb.0: # %entry 2919; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2920; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2921; SSSE3-NEXT: psrad $16, %xmm1 2922; SSSE3-NEXT: pxor %xmm2, %xmm2 2923; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 2924; SSSE3-NEXT: movdqa %xmm1, %xmm0 2925; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 2926; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2927; SSSE3-NEXT: retq 2928; 2929; SSE41-LABEL: load_sext_4i16_to_4i64: 2930; SSE41: # %bb.0: # %entry 2931; SSE41-NEXT: pmovsxwq (%rdi), %xmm0 2932; SSE41-NEXT: pmovsxwq 4(%rdi), %xmm1 2933; SSE41-NEXT: retq 2934; 2935; AVX1-LABEL: load_sext_4i16_to_4i64: 2936; AVX1: # %bb.0: # %entry 2937; AVX1-NEXT: vpmovsxwq 4(%rdi), %xmm0 2938; AVX1-NEXT: vpmovsxwq (%rdi), %xmm1 2939; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2940; AVX1-NEXT: retq 2941; 2942; AVX2-LABEL: load_sext_4i16_to_4i64: 2943; AVX2: # %bb.0: # %entry 2944; AVX2-NEXT: vpmovsxwq (%rdi), %ymm0 2945; AVX2-NEXT: retq 2946; 2947; AVX512-LABEL: load_sext_4i16_to_4i64: 2948; AVX512: # %bb.0: # %entry 2949; AVX512-NEXT: vpmovsxwq (%rdi), %ymm0 2950; AVX512-NEXT: retq 2951; 2952; X86-SSE2-LABEL: load_sext_4i16_to_4i64: 2953; X86-SSE2: # %bb.0: # %entry 2954; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 2955; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2956; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2957; X86-SSE2-NEXT: psrad $16, %xmm1 2958; X86-SSE2-NEXT: pxor %xmm2, %xmm2 2959; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 2960; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 2961; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 2962; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2963; X86-SSE2-NEXT: retl 2964; 2965; X86-SSE41-LABEL: load_sext_4i16_to_4i64: 2966; X86-SSE41: # %bb.0: # %entry 2967; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2968; X86-SSE41-NEXT: pmovsxwq (%eax), %xmm0 2969; X86-SSE41-NEXT: pmovsxwq 4(%eax), %xmm1 2970; X86-SSE41-NEXT: retl 2971entry: 2972 %X = load <4 x i16>, ptr %ptr 2973 %Y = sext <4 x i16> %X to <4 x i64> 2974 ret <4 x i64> %Y 2975} 2976 2977define <8 x i32> @load_sext_8i16_to_8i32(ptr%ptr) { 2978; SSE2-LABEL: load_sext_8i16_to_8i32: 2979; SSE2: # %bb.0: # %entry 2980; SSE2-NEXT: movdqa (%rdi), %xmm1 2981; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2982; SSE2-NEXT: psrad $16, %xmm0 2983; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 2984; SSE2-NEXT: psrad $16, %xmm1 2985; SSE2-NEXT: retq 2986; 2987; SSSE3-LABEL: load_sext_8i16_to_8i32: 2988; SSSE3: # %bb.0: # %entry 2989; SSSE3-NEXT: movdqa (%rdi), %xmm1 2990; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2991; SSSE3-NEXT: psrad $16, %xmm0 2992; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 2993; SSSE3-NEXT: psrad $16, %xmm1 2994; SSSE3-NEXT: retq 2995; 2996; SSE41-LABEL: load_sext_8i16_to_8i32: 2997; SSE41: # %bb.0: # %entry 2998; SSE41-NEXT: pmovsxwd (%rdi), %xmm0 2999; SSE41-NEXT: pmovsxwd 8(%rdi), %xmm1 3000; SSE41-NEXT: retq 3001; 3002; AVX1-LABEL: load_sext_8i16_to_8i32: 3003; AVX1: # %bb.0: # %entry 3004; AVX1-NEXT: vpmovsxwd 8(%rdi), %xmm0 3005; AVX1-NEXT: vpmovsxwd (%rdi), %xmm1 3006; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3007; AVX1-NEXT: retq 3008; 3009; AVX2-LABEL: load_sext_8i16_to_8i32: 3010; AVX2: # %bb.0: # %entry 3011; AVX2-NEXT: vpmovsxwd (%rdi), %ymm0 3012; AVX2-NEXT: retq 3013; 3014; AVX512-LABEL: load_sext_8i16_to_8i32: 3015; AVX512: # %bb.0: # %entry 3016; AVX512-NEXT: vpmovsxwd (%rdi), %ymm0 3017; AVX512-NEXT: retq 3018; 3019; X86-SSE2-LABEL: load_sext_8i16_to_8i32: 3020; X86-SSE2: # %bb.0: # %entry 3021; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 3022; X86-SSE2-NEXT: movdqa (%eax), %xmm1 3023; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3024; X86-SSE2-NEXT: psrad $16, %xmm0 3025; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 3026; X86-SSE2-NEXT: psrad $16, %xmm1 3027; X86-SSE2-NEXT: retl 3028; 3029; X86-SSE41-LABEL: load_sext_8i16_to_8i32: 3030; X86-SSE41: # %bb.0: # %entry 3031; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 3032; X86-SSE41-NEXT: pmovsxwd (%eax), %xmm0 3033; X86-SSE41-NEXT: pmovsxwd 8(%eax), %xmm1 3034; X86-SSE41-NEXT: retl 3035entry: 3036 %X = load <8 x i16>, ptr %ptr 3037 %Y = sext <8 x i16> %X to <8 x i32> 3038 ret <8 x i32> %Y 3039} 3040 3041define <2 x i64> @load_sext_2i32_to_2i64(ptr%ptr) { 3042; SSE2-LABEL: load_sext_2i32_to_2i64: 3043; SSE2: # %bb.0: # %entry 3044; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 3045; SSE2-NEXT: pxor %xmm1, %xmm1 3046; SSE2-NEXT: pcmpgtd %xmm0, %xmm1 3047; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3048; SSE2-NEXT: retq 3049; 3050; SSSE3-LABEL: load_sext_2i32_to_2i64: 3051; SSSE3: # %bb.0: # %entry 3052; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 3053; SSSE3-NEXT: pxor %xmm1, %xmm1 3054; SSSE3-NEXT: pcmpgtd %xmm0, %xmm1 3055; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3056; SSSE3-NEXT: retq 3057; 3058; SSE41-LABEL: load_sext_2i32_to_2i64: 3059; SSE41: # %bb.0: # %entry 3060; SSE41-NEXT: pmovsxdq (%rdi), %xmm0 3061; SSE41-NEXT: retq 3062; 3063; AVX-LABEL: load_sext_2i32_to_2i64: 3064; AVX: # %bb.0: # %entry 3065; AVX-NEXT: vpmovsxdq (%rdi), %xmm0 3066; AVX-NEXT: retq 3067; 3068; X86-SSE2-LABEL: load_sext_2i32_to_2i64: 3069; X86-SSE2: # %bb.0: # %entry 3070; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 3071; X86-SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 3072; X86-SSE2-NEXT: pxor %xmm1, %xmm1 3073; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm1 3074; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3075; X86-SSE2-NEXT: retl 3076; 3077; X86-SSE41-LABEL: load_sext_2i32_to_2i64: 3078; X86-SSE41: # %bb.0: # %entry 3079; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 3080; X86-SSE41-NEXT: pmovsxdq (%eax), %xmm0 3081; X86-SSE41-NEXT: retl 3082entry: 3083 %X = load <2 x i32>, ptr %ptr 3084 %Y = sext <2 x i32> %X to <2 x i64> 3085 ret <2 x i64> %Y 3086} 3087 3088define <4 x i64> @load_sext_4i32_to_4i64(ptr%ptr) { 3089; SSE2-LABEL: load_sext_4i32_to_4i64: 3090; SSE2: # %bb.0: # %entry 3091; SSE2-NEXT: movdqa (%rdi), %xmm0 3092; SSE2-NEXT: pxor %xmm2, %xmm2 3093; SSE2-NEXT: pxor %xmm3, %xmm3 3094; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 3095; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 3096; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 3097; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 3098; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3099; SSE2-NEXT: retq 3100; 3101; SSSE3-LABEL: load_sext_4i32_to_4i64: 3102; SSSE3: # %bb.0: # %entry 3103; SSSE3-NEXT: movdqa (%rdi), %xmm0 3104; SSSE3-NEXT: pxor %xmm2, %xmm2 3105; SSSE3-NEXT: pxor %xmm3, %xmm3 3106; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3 3107; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 3108; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 3109; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 3110; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3111; SSSE3-NEXT: retq 3112; 3113; SSE41-LABEL: load_sext_4i32_to_4i64: 3114; SSE41: # %bb.0: # %entry 3115; SSE41-NEXT: pmovsxdq (%rdi), %xmm0 3116; SSE41-NEXT: pmovsxdq 8(%rdi), %xmm1 3117; SSE41-NEXT: retq 3118; 3119; AVX1-LABEL: load_sext_4i32_to_4i64: 3120; AVX1: # %bb.0: # %entry 3121; AVX1-NEXT: vpmovsxdq 8(%rdi), %xmm0 3122; AVX1-NEXT: vpmovsxdq (%rdi), %xmm1 3123; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3124; AVX1-NEXT: retq 3125; 3126; AVX2-LABEL: load_sext_4i32_to_4i64: 3127; AVX2: # %bb.0: # %entry 3128; AVX2-NEXT: vpmovsxdq (%rdi), %ymm0 3129; AVX2-NEXT: retq 3130; 3131; AVX512-LABEL: load_sext_4i32_to_4i64: 3132; AVX512: # %bb.0: # %entry 3133; AVX512-NEXT: vpmovsxdq (%rdi), %ymm0 3134; AVX512-NEXT: retq 3135; 3136; X86-SSE2-LABEL: load_sext_4i32_to_4i64: 3137; X86-SSE2: # %bb.0: # %entry 3138; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 3139; X86-SSE2-NEXT: movdqa (%eax), %xmm0 3140; X86-SSE2-NEXT: pxor %xmm2, %xmm2 3141; X86-SSE2-NEXT: pxor %xmm3, %xmm3 3142; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3 3143; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 3144; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 3145; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 3146; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3147; X86-SSE2-NEXT: retl 3148; 3149; X86-SSE41-LABEL: load_sext_4i32_to_4i64: 3150; X86-SSE41: # %bb.0: # %entry 3151; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 3152; X86-SSE41-NEXT: pmovsxdq (%eax), %xmm0 3153; X86-SSE41-NEXT: pmovsxdq 8(%eax), %xmm1 3154; X86-SSE41-NEXT: retl 3155entry: 3156 %X = load <4 x i32>, ptr %ptr 3157 %Y = sext <4 x i32> %X to <4 x i64> 3158 ret <4 x i64> %Y 3159} 3160 3161define i32 @sext_2i8_to_i32(<16 x i8> %A) nounwind uwtable readnone ssp { 3162; SSE2-LABEL: sext_2i8_to_i32: 3163; SSE2: # %bb.0: # %entry 3164; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3165; SSE2-NEXT: psraw $8, %xmm0 3166; SSE2-NEXT: movd %xmm0, %eax 3167; SSE2-NEXT: retq 3168; 3169; SSSE3-LABEL: sext_2i8_to_i32: 3170; SSSE3: # %bb.0: # %entry 3171; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3172; SSSE3-NEXT: psraw $8, %xmm0 3173; SSSE3-NEXT: movd %xmm0, %eax 3174; SSSE3-NEXT: retq 3175; 3176; SSE41-LABEL: sext_2i8_to_i32: 3177; SSE41: # %bb.0: # %entry 3178; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 3179; SSE41-NEXT: movd %xmm0, %eax 3180; SSE41-NEXT: retq 3181; 3182; AVX-LABEL: sext_2i8_to_i32: 3183; AVX: # %bb.0: # %entry 3184; AVX-NEXT: vpmovsxbw %xmm0, %xmm0 3185; AVX-NEXT: vmovd %xmm0, %eax 3186; AVX-NEXT: retq 3187; 3188; X86-SSE2-LABEL: sext_2i8_to_i32: 3189; X86-SSE2: # %bb.0: # %entry 3190; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3191; X86-SSE2-NEXT: psraw $8, %xmm0 3192; X86-SSE2-NEXT: movd %xmm0, %eax 3193; X86-SSE2-NEXT: retl 3194; 3195; X86-SSE41-LABEL: sext_2i8_to_i32: 3196; X86-SSE41: # %bb.0: # %entry 3197; X86-SSE41-NEXT: pmovsxbw %xmm0, %xmm0 3198; X86-SSE41-NEXT: movd %xmm0, %eax 3199; X86-SSE41-NEXT: retl 3200entry: 3201 %Shuf = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 3202 %Ex = sext <2 x i8> %Shuf to <2 x i16> 3203 %Bc = bitcast <2 x i16> %Ex to i32 3204 ret i32 %Bc 3205} 3206 3207define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) { 3208; SSE2-LABEL: sext_4i1_to_4i64: 3209; SSE2: # %bb.0: 3210; SSE2-NEXT: pslld $31, %xmm0 3211; SSE2-NEXT: psrad $31, %xmm0 3212; SSE2-NEXT: pxor %xmm2, %xmm2 3213; SSE2-NEXT: pxor %xmm3, %xmm3 3214; SSE2-NEXT: pcmpgtd %xmm0, %xmm3 3215; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 3216; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 3217; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 3218; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3219; SSE2-NEXT: retq 3220; 3221; SSSE3-LABEL: sext_4i1_to_4i64: 3222; SSSE3: # %bb.0: 3223; SSSE3-NEXT: pslld $31, %xmm0 3224; SSSE3-NEXT: psrad $31, %xmm0 3225; SSSE3-NEXT: pxor %xmm2, %xmm2 3226; SSSE3-NEXT: pxor %xmm3, %xmm3 3227; SSSE3-NEXT: pcmpgtd %xmm0, %xmm3 3228; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 3229; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 3230; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 3231; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3232; SSSE3-NEXT: retq 3233; 3234; SSE41-LABEL: sext_4i1_to_4i64: 3235; SSE41: # %bb.0: 3236; SSE41-NEXT: pslld $31, %xmm0 3237; SSE41-NEXT: psrad $31, %xmm0 3238; SSE41-NEXT: pmovsxdq %xmm0, %xmm2 3239; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 3240; SSE41-NEXT: pmovsxdq %xmm0, %xmm1 3241; SSE41-NEXT: movdqa %xmm2, %xmm0 3242; SSE41-NEXT: retq 3243; 3244; AVX1-LABEL: sext_4i1_to_4i64: 3245; AVX1: # %bb.0: 3246; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 3247; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 3248; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 3249; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 3250; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 3251; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3252; AVX1-NEXT: retq 3253; 3254; AVX2-LABEL: sext_4i1_to_4i64: 3255; AVX2: # %bb.0: 3256; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 3257; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 3258; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 3259; AVX2-NEXT: retq 3260; 3261; AVX512-LABEL: sext_4i1_to_4i64: 3262; AVX512: # %bb.0: 3263; AVX512-NEXT: vpslld $31, %xmm0, %xmm0 3264; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0 3265; AVX512-NEXT: vpmovsxdq %xmm0, %ymm0 3266; AVX512-NEXT: retq 3267; 3268; X86-SSE2-LABEL: sext_4i1_to_4i64: 3269; X86-SSE2: # %bb.0: 3270; X86-SSE2-NEXT: pslld $31, %xmm0 3271; X86-SSE2-NEXT: psrad $31, %xmm0 3272; X86-SSE2-NEXT: pxor %xmm2, %xmm2 3273; X86-SSE2-NEXT: pxor %xmm3, %xmm3 3274; X86-SSE2-NEXT: pcmpgtd %xmm0, %xmm3 3275; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,2,3] 3276; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 3277; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 3278; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3279; X86-SSE2-NEXT: retl 3280; 3281; X86-SSE41-LABEL: sext_4i1_to_4i64: 3282; X86-SSE41: # %bb.0: 3283; X86-SSE41-NEXT: pslld $31, %xmm0 3284; X86-SSE41-NEXT: psrad $31, %xmm0 3285; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm2 3286; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,2,3] 3287; X86-SSE41-NEXT: pmovsxdq %xmm0, %xmm1 3288; X86-SSE41-NEXT: movdqa %xmm2, %xmm0 3289; X86-SSE41-NEXT: retl 3290 %extmask = sext <4 x i1> %mask to <4 x i64> 3291 ret <4 x i64> %extmask 3292} 3293 3294define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) { 3295; SSE2-LABEL: sext_4i8_to_4i64: 3296; SSE2: # %bb.0: 3297; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3298; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3299; SSE2-NEXT: psrad $24, %xmm1 3300; SSE2-NEXT: pxor %xmm2, %xmm2 3301; SSE2-NEXT: pcmpgtd %xmm1, %xmm2 3302; SSE2-NEXT: movdqa %xmm1, %xmm0 3303; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3304; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 3305; SSE2-NEXT: retq 3306; 3307; SSSE3-LABEL: sext_4i8_to_4i64: 3308; SSSE3: # %bb.0: 3309; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3310; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3311; SSSE3-NEXT: psrad $24, %xmm1 3312; SSSE3-NEXT: pxor %xmm2, %xmm2 3313; SSSE3-NEXT: pcmpgtd %xmm1, %xmm2 3314; SSSE3-NEXT: movdqa %xmm1, %xmm0 3315; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3316; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 3317; SSSE3-NEXT: retq 3318; 3319; SSE41-LABEL: sext_4i8_to_4i64: 3320; SSE41: # %bb.0: 3321; SSE41-NEXT: pmovsxbq %xmm0, %xmm2 3322; SSE41-NEXT: psrld $16, %xmm0 3323; SSE41-NEXT: pmovsxbq %xmm0, %xmm1 3324; SSE41-NEXT: movdqa %xmm2, %xmm0 3325; SSE41-NEXT: retq 3326; 3327; AVX1-LABEL: sext_4i8_to_4i64: 3328; AVX1: # %bb.0: 3329; AVX1-NEXT: vpmovsxbq %xmm0, %xmm1 3330; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 3331; AVX1-NEXT: vpmovsxbq %xmm0, %xmm0 3332; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3333; AVX1-NEXT: retq 3334; 3335; AVX2-LABEL: sext_4i8_to_4i64: 3336; AVX2: # %bb.0: 3337; AVX2-NEXT: vpmovsxbq %xmm0, %ymm0 3338; AVX2-NEXT: retq 3339; 3340; AVX512-LABEL: sext_4i8_to_4i64: 3341; AVX512: # %bb.0: 3342; AVX512-NEXT: vpmovsxbq %xmm0, %ymm0 3343; AVX512-NEXT: retq 3344; 3345; X86-SSE2-LABEL: sext_4i8_to_4i64: 3346; X86-SSE2: # %bb.0: 3347; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3348; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 3349; X86-SSE2-NEXT: psrad $24, %xmm1 3350; X86-SSE2-NEXT: pxor %xmm2, %xmm2 3351; X86-SSE2-NEXT: pcmpgtd %xmm1, %xmm2 3352; X86-SSE2-NEXT: movdqa %xmm1, %xmm0 3353; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3354; X86-SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 3355; X86-SSE2-NEXT: retl 3356; 3357; X86-SSE41-LABEL: sext_4i8_to_4i64: 3358; X86-SSE41: # %bb.0: 3359; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm2 3360; X86-SSE41-NEXT: psrld $16, %xmm0 3361; X86-SSE41-NEXT: pmovsxbq %xmm0, %xmm1 3362; X86-SSE41-NEXT: movdqa %xmm2, %xmm0 3363; X86-SSE41-NEXT: retl 3364 %extmask = sext <4 x i8> %mask to <4 x i64> 3365 ret <4 x i64> %extmask 3366} 3367 3368define <32 x i8> @sext_32xi1_to_32xi8(<32 x i16> %c1, <32 x i16> %c2)nounwind { 3369; SSE-LABEL: sext_32xi1_to_32xi8: 3370; SSE: # %bb.0: 3371; SSE-NEXT: pcmpeqw %xmm5, %xmm1 3372; SSE-NEXT: pcmpeqw %xmm4, %xmm0 3373; SSE-NEXT: packsswb %xmm1, %xmm0 3374; SSE-NEXT: pcmpeqw %xmm7, %xmm3 3375; SSE-NEXT: pcmpeqw %xmm6, %xmm2 3376; SSE-NEXT: packsswb %xmm3, %xmm2 3377; SSE-NEXT: movdqa %xmm2, %xmm1 3378; SSE-NEXT: retq 3379; 3380; AVX1-LABEL: sext_32xi1_to_32xi8: 3381; AVX1: # %bb.0: 3382; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 3383; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 3384; AVX1-NEXT: vpcmpeqw %xmm4, %xmm5, %xmm4 3385; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1 3386; AVX1-NEXT: vpacksswb %xmm4, %xmm1, %xmm1 3387; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 3388; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 3389; AVX1-NEXT: vpcmpeqw %xmm3, %xmm4, %xmm3 3390; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 3391; AVX1-NEXT: vpacksswb %xmm3, %xmm0, %xmm0 3392; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3393; AVX1-NEXT: retq 3394; 3395; AVX2-LABEL: sext_32xi1_to_32xi8: 3396; AVX2: # %bb.0: 3397; AVX2-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 3398; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 3399; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 3400; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 3401; AVX2-NEXT: retq 3402; 3403; AVX512F-LABEL: sext_32xi1_to_32xi8: 3404; AVX512F: # %bb.0: 3405; AVX512F-NEXT: vextracti64x4 $1, %zmm1, %ymm2 3406; AVX512F-NEXT: vextracti64x4 $1, %zmm0, %ymm3 3407; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm3, %ymm2 3408; AVX512F-NEXT: vpcmpeqw %ymm1, %ymm0, %ymm0 3409; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 3410; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 3411; AVX512F-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm2[0],zero,ymm2[1],zero,ymm2[2],zero,ymm2[3],zero,ymm2[4],zero,ymm2[5],zero,ymm2[6],zero,ymm2[7],zero,ymm2[8],zero,ymm2[9],zero,ymm2[10],zero,ymm2[11],zero,ymm2[12],zero,ymm2[13],zero,ymm2[14],zero,ymm2[15],zero 3412; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 3413; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 3414; AVX512F-NEXT: retq 3415; 3416; AVX512BW-LABEL: sext_32xi1_to_32xi8: 3417; AVX512BW: # %bb.0: 3418; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k0 3419; AVX512BW-NEXT: vpmovm2b %k0, %zmm0 3420; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 3421; AVX512BW-NEXT: retq 3422; 3423; X86-SSE-LABEL: sext_32xi1_to_32xi8: 3424; X86-SSE: # %bb.0: 3425; X86-SSE-NEXT: pushl %ebp 3426; X86-SSE-NEXT: movl %esp, %ebp 3427; X86-SSE-NEXT: andl $-16, %esp 3428; X86-SSE-NEXT: subl $16, %esp 3429; X86-SSE-NEXT: movdqa 8(%ebp), %xmm3 3430; X86-SSE-NEXT: pcmpeqw 40(%ebp), %xmm1 3431; X86-SSE-NEXT: pcmpeqw 24(%ebp), %xmm0 3432; X86-SSE-NEXT: packsswb %xmm1, %xmm0 3433; X86-SSE-NEXT: pcmpeqw 72(%ebp), %xmm3 3434; X86-SSE-NEXT: pcmpeqw 56(%ebp), %xmm2 3435; X86-SSE-NEXT: packsswb %xmm3, %xmm2 3436; X86-SSE-NEXT: movdqa %xmm2, %xmm1 3437; X86-SSE-NEXT: movl %ebp, %esp 3438; X86-SSE-NEXT: popl %ebp 3439; X86-SSE-NEXT: retl 3440 %a = icmp eq <32 x i16> %c1, %c2 3441 %b = sext <32 x i1> %a to <32 x i8> 3442 ret <32 x i8> %b 3443} 3444 3445define <2 x i32> @sext_2i8_to_2i32(ptr %addr) { 3446; SSE2-LABEL: sext_2i8_to_2i32: 3447; SSE2: # %bb.0: 3448; SSE2-NEXT: movzwl (%rdi), %eax 3449; SSE2-NEXT: movd %eax, %xmm0 3450; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3451; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 3452; SSE2-NEXT: psrad $24, %xmm0 3453; SSE2-NEXT: paddd %xmm0, %xmm0 3454; SSE2-NEXT: retq 3455; 3456; SSSE3-LABEL: sext_2i8_to_2i32: 3457; SSSE3: # %bb.0: 3458; SSSE3-NEXT: movzwl (%rdi), %eax 3459; SSSE3-NEXT: movd %eax, %xmm0 3460; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3461; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 3462; SSSE3-NEXT: psrad $24, %xmm0 3463; SSSE3-NEXT: paddd %xmm0, %xmm0 3464; SSSE3-NEXT: retq 3465; 3466; SSE41-LABEL: sext_2i8_to_2i32: 3467; SSE41: # %bb.0: 3468; SSE41-NEXT: movzwl (%rdi), %eax 3469; SSE41-NEXT: movd %eax, %xmm0 3470; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 3471; SSE41-NEXT: paddd %xmm0, %xmm0 3472; SSE41-NEXT: retq 3473; 3474; AVX-LABEL: sext_2i8_to_2i32: 3475; AVX: # %bb.0: 3476; AVX-NEXT: movzwl (%rdi), %eax 3477; AVX-NEXT: vmovd %eax, %xmm0 3478; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 3479; AVX-NEXT: vpaddd %xmm0, %xmm0, %xmm0 3480; AVX-NEXT: retq 3481; 3482; X86-SSE2-LABEL: sext_2i8_to_2i32: 3483; X86-SSE2: # %bb.0: 3484; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %eax 3485; X86-SSE2-NEXT: movzwl (%eax), %eax 3486; X86-SSE2-NEXT: movd %eax, %xmm0 3487; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3488; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 3489; X86-SSE2-NEXT: psrad $24, %xmm0 3490; X86-SSE2-NEXT: paddd %xmm0, %xmm0 3491; X86-SSE2-NEXT: retl 3492; 3493; X86-SSE41-LABEL: sext_2i8_to_2i32: 3494; X86-SSE41: # %bb.0: 3495; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 3496; X86-SSE41-NEXT: movzwl (%eax), %eax 3497; X86-SSE41-NEXT: movd %eax, %xmm0 3498; X86-SSE41-NEXT: pmovsxbd %xmm0, %xmm0 3499; X86-SSE41-NEXT: paddd %xmm0, %xmm0 3500; X86-SSE41-NEXT: retl 3501 %x = load <2 x i8>, ptr %addr, align 1 3502 %y = sext <2 x i8> %x to <2 x i32> 3503 %z = add <2 x i32>%y, %y 3504 ret <2 x i32>%z 3505} 3506 3507define <4 x i32> @sext_4i17_to_4i32(ptr %ptr) { 3508; SSE2-LABEL: sext_4i17_to_4i32: 3509; SSE2: # %bb.0: 3510; SSE2-NEXT: movq (%rdi), %rax 3511; SSE2-NEXT: movl %eax, %ecx 3512; SSE2-NEXT: shll $15, %ecx 3513; SSE2-NEXT: sarl $15, %ecx 3514; SSE2-NEXT: movd %ecx, %xmm0 3515; SSE2-NEXT: movq %rax, %rcx 3516; SSE2-NEXT: shrq $17, %rcx 3517; SSE2-NEXT: shll $15, %ecx 3518; SSE2-NEXT: sarl $15, %ecx 3519; SSE2-NEXT: movd %ecx, %xmm1 3520; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3521; SSE2-NEXT: movl 8(%rdi), %ecx 3522; SSE2-NEXT: shll $13, %ecx 3523; SSE2-NEXT: movq %rax, %rdx 3524; SSE2-NEXT: shrq $51, %rdx 3525; SSE2-NEXT: orl %ecx, %edx 3526; SSE2-NEXT: shll $15, %edx 3527; SSE2-NEXT: sarl $15, %edx 3528; SSE2-NEXT: movd %edx, %xmm1 3529; SSE2-NEXT: shrq $34, %rax 3530; SSE2-NEXT: shll $15, %eax 3531; SSE2-NEXT: sarl $15, %eax 3532; SSE2-NEXT: movd %eax, %xmm2 3533; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 3534; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 3535; SSE2-NEXT: retq 3536; 3537; SSSE3-LABEL: sext_4i17_to_4i32: 3538; SSSE3: # %bb.0: 3539; SSSE3-NEXT: movq (%rdi), %rax 3540; SSSE3-NEXT: movl %eax, %ecx 3541; SSSE3-NEXT: shll $15, %ecx 3542; SSSE3-NEXT: sarl $15, %ecx 3543; SSSE3-NEXT: movd %ecx, %xmm0 3544; SSSE3-NEXT: movq %rax, %rcx 3545; SSSE3-NEXT: shrq $17, %rcx 3546; SSSE3-NEXT: shll $15, %ecx 3547; SSSE3-NEXT: sarl $15, %ecx 3548; SSSE3-NEXT: movd %ecx, %xmm1 3549; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3550; SSSE3-NEXT: movl 8(%rdi), %ecx 3551; SSSE3-NEXT: shll $13, %ecx 3552; SSSE3-NEXT: movq %rax, %rdx 3553; SSSE3-NEXT: shrq $51, %rdx 3554; SSSE3-NEXT: orl %ecx, %edx 3555; SSSE3-NEXT: shll $15, %edx 3556; SSSE3-NEXT: sarl $15, %edx 3557; SSSE3-NEXT: movd %edx, %xmm1 3558; SSSE3-NEXT: shrq $34, %rax 3559; SSSE3-NEXT: shll $15, %eax 3560; SSSE3-NEXT: sarl $15, %eax 3561; SSSE3-NEXT: movd %eax, %xmm2 3562; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 3563; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 3564; SSSE3-NEXT: retq 3565; 3566; SSE41-LABEL: sext_4i17_to_4i32: 3567; SSE41: # %bb.0: 3568; SSE41-NEXT: movq (%rdi), %rax 3569; SSE41-NEXT: movq %rax, %rcx 3570; SSE41-NEXT: shrq $17, %rcx 3571; SSE41-NEXT: shll $15, %ecx 3572; SSE41-NEXT: sarl $15, %ecx 3573; SSE41-NEXT: movl %eax, %edx 3574; SSE41-NEXT: shll $15, %edx 3575; SSE41-NEXT: sarl $15, %edx 3576; SSE41-NEXT: movd %edx, %xmm0 3577; SSE41-NEXT: pinsrd $1, %ecx, %xmm0 3578; SSE41-NEXT: movq %rax, %rcx 3579; SSE41-NEXT: shrq $34, %rcx 3580; SSE41-NEXT: shll $15, %ecx 3581; SSE41-NEXT: sarl $15, %ecx 3582; SSE41-NEXT: pinsrd $2, %ecx, %xmm0 3583; SSE41-NEXT: movl 8(%rdi), %ecx 3584; SSE41-NEXT: shll $13, %ecx 3585; SSE41-NEXT: shrq $51, %rax 3586; SSE41-NEXT: orl %ecx, %eax 3587; SSE41-NEXT: shll $15, %eax 3588; SSE41-NEXT: sarl $15, %eax 3589; SSE41-NEXT: pinsrd $3, %eax, %xmm0 3590; SSE41-NEXT: retq 3591; 3592; AVX-LABEL: sext_4i17_to_4i32: 3593; AVX: # %bb.0: 3594; AVX-NEXT: movq (%rdi), %rax 3595; AVX-NEXT: movq %rax, %rcx 3596; AVX-NEXT: shrq $17, %rcx 3597; AVX-NEXT: shll $15, %ecx 3598; AVX-NEXT: sarl $15, %ecx 3599; AVX-NEXT: movl %eax, %edx 3600; AVX-NEXT: shll $15, %edx 3601; AVX-NEXT: sarl $15, %edx 3602; AVX-NEXT: vmovd %edx, %xmm0 3603; AVX-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 3604; AVX-NEXT: movq %rax, %rcx 3605; AVX-NEXT: shrq $34, %rcx 3606; AVX-NEXT: shll $15, %ecx 3607; AVX-NEXT: sarl $15, %ecx 3608; AVX-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 3609; AVX-NEXT: movl 8(%rdi), %ecx 3610; AVX-NEXT: shll $13, %ecx 3611; AVX-NEXT: shrq $51, %rax 3612; AVX-NEXT: orl %ecx, %eax 3613; AVX-NEXT: shll $15, %eax 3614; AVX-NEXT: sarl $15, %eax 3615; AVX-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 3616; AVX-NEXT: retq 3617; 3618; X86-SSE2-LABEL: sext_4i17_to_4i32: 3619; X86-SSE2: # %bb.0: 3620; X86-SSE2-NEXT: movl {{[0-9]+}}(%esp), %edx 3621; X86-SSE2-NEXT: movl (%edx), %ecx 3622; X86-SSE2-NEXT: movl 4(%edx), %eax 3623; X86-SSE2-NEXT: movl 8(%edx), %edx 3624; X86-SSE2-NEXT: shldl $13, %eax, %edx 3625; X86-SSE2-NEXT: shll $15, %edx 3626; X86-SSE2-NEXT: sarl $15, %edx 3627; X86-SSE2-NEXT: movd %edx, %xmm0 3628; X86-SSE2-NEXT: movl %eax, %edx 3629; X86-SSE2-NEXT: shll $13, %edx 3630; X86-SSE2-NEXT: sarl $15, %edx 3631; X86-SSE2-NEXT: movd %edx, %xmm1 3632; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 3633; X86-SSE2-NEXT: shldl $15, %ecx, %eax 3634; X86-SSE2-NEXT: shll $15, %ecx 3635; X86-SSE2-NEXT: sarl $15, %ecx 3636; X86-SSE2-NEXT: movd %ecx, %xmm0 3637; X86-SSE2-NEXT: shll $15, %eax 3638; X86-SSE2-NEXT: sarl $15, %eax 3639; X86-SSE2-NEXT: movd %eax, %xmm2 3640; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 3641; X86-SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 3642; X86-SSE2-NEXT: retl 3643; 3644; X86-SSE41-LABEL: sext_4i17_to_4i32: 3645; X86-SSE41: # %bb.0: 3646; X86-SSE41-NEXT: pushl %esi 3647; X86-SSE41-NEXT: .cfi_def_cfa_offset 8 3648; X86-SSE41-NEXT: .cfi_offset %esi, -8 3649; X86-SSE41-NEXT: movl {{[0-9]+}}(%esp), %edx 3650; X86-SSE41-NEXT: movl (%edx), %eax 3651; X86-SSE41-NEXT: movl 4(%edx), %ecx 3652; X86-SSE41-NEXT: movl %ecx, %esi 3653; X86-SSE41-NEXT: movl 8(%edx), %edx 3654; X86-SSE41-NEXT: shldl $13, %ecx, %edx 3655; X86-SSE41-NEXT: shldl $15, %eax, %ecx 3656; X86-SSE41-NEXT: shll $15, %ecx 3657; X86-SSE41-NEXT: sarl $15, %ecx 3658; X86-SSE41-NEXT: shll $15, %eax 3659; X86-SSE41-NEXT: sarl $15, %eax 3660; X86-SSE41-NEXT: movd %eax, %xmm0 3661; X86-SSE41-NEXT: pinsrd $1, %ecx, %xmm0 3662; X86-SSE41-NEXT: shll $13, %esi 3663; X86-SSE41-NEXT: sarl $15, %esi 3664; X86-SSE41-NEXT: pinsrd $2, %esi, %xmm0 3665; X86-SSE41-NEXT: shll $15, %edx 3666; X86-SSE41-NEXT: sarl $15, %edx 3667; X86-SSE41-NEXT: pinsrd $3, %edx, %xmm0 3668; X86-SSE41-NEXT: popl %esi 3669; X86-SSE41-NEXT: .cfi_def_cfa_offset 4 3670; X86-SSE41-NEXT: retl 3671 %a = load <4 x i17>, ptr %ptr 3672 %b = sext <4 x i17> %a to <4 x i32> 3673 ret <4 x i32> %b 3674} 3675 3676define <8 x i64> @sext_8i6_to_8i64(i32 %x) nounwind uwtable readnone ssp { 3677; SSE2-LABEL: sext_8i6_to_8i64: 3678; SSE2: # %bb.0: # %entry 3679; SSE2-NEXT: movd %edi, %xmm0 3680; SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 3681; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 3682; SSE2-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 3683; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,0,0] 3684; SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] 3685; SSE2-NEXT: psllq $58, %xmm0 3686; SSE2-NEXT: movdqa %xmm0, %xmm1 3687; SSE2-NEXT: psrad $31, %xmm1 3688; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 3689; SSE2-NEXT: psrad $26, %xmm0 3690; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 3691; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3692; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1] 3693; SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,5] 3694; SSE2-NEXT: psllq $58, %xmm1 3695; SSE2-NEXT: movdqa %xmm1, %xmm2 3696; SSE2-NEXT: psrad $31, %xmm2 3697; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 3698; SSE2-NEXT: psrad $26, %xmm1 3699; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 3700; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3701; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,2,2,2] 3702; SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,5,5] 3703; SSE2-NEXT: psllq $58, %xmm2 3704; SSE2-NEXT: movdqa %xmm2, %xmm4 3705; SSE2-NEXT: psrad $31, %xmm4 3706; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] 3707; SSE2-NEXT: psrad $26, %xmm2 3708; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 3709; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 3710; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3] 3711; SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,5,5] 3712; SSE2-NEXT: psllq $58, %xmm3 3713; SSE2-NEXT: movdqa %xmm3, %xmm4 3714; SSE2-NEXT: psrad $31, %xmm4 3715; SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] 3716; SSE2-NEXT: psrad $26, %xmm3 3717; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] 3718; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 3719; SSE2-NEXT: retq 3720; 3721; SSSE3-LABEL: sext_8i6_to_8i64: 3722; SSSE3: # %bb.0: # %entry 3723; SSSE3-NEXT: movd %edi, %xmm0 3724; SSSE3-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 3725; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 3726; SSSE3-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 3727; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,0,0] 3728; SSSE3-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] 3729; SSSE3-NEXT: psllq $58, %xmm0 3730; SSSE3-NEXT: movdqa %xmm0, %xmm1 3731; SSSE3-NEXT: psrad $31, %xmm1 3732; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 3733; SSSE3-NEXT: psrad $26, %xmm0 3734; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 3735; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3736; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1] 3737; SSSE3-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,5] 3738; SSSE3-NEXT: psllq $58, %xmm1 3739; SSSE3-NEXT: movdqa %xmm1, %xmm2 3740; SSSE3-NEXT: psrad $31, %xmm2 3741; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 3742; SSSE3-NEXT: psrad $26, %xmm1 3743; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 3744; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3745; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,2,2,2] 3746; SSSE3-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,5,5] 3747; SSSE3-NEXT: psllq $58, %xmm2 3748; SSSE3-NEXT: movdqa %xmm2, %xmm4 3749; SSSE3-NEXT: psrad $31, %xmm4 3750; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] 3751; SSSE3-NEXT: psrad $26, %xmm2 3752; SSSE3-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 3753; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 3754; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3] 3755; SSSE3-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,5,5] 3756; SSSE3-NEXT: psllq $58, %xmm3 3757; SSSE3-NEXT: movdqa %xmm3, %xmm4 3758; SSSE3-NEXT: psrad $31, %xmm4 3759; SSSE3-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] 3760; SSSE3-NEXT: psrad $26, %xmm3 3761; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] 3762; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 3763; SSSE3-NEXT: retq 3764; 3765; SSE41-LABEL: sext_8i6_to_8i64: 3766; SSE41: # %bb.0: # %entry 3767; SSE41-NEXT: movd %edi, %xmm0 3768; SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 3769; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 3770; SSE41-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm3 3771; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero 3772; SSE41-NEXT: psllq $58, %xmm0 3773; SSE41-NEXT: movdqa %xmm0, %xmm1 3774; SSE41-NEXT: psrad $31, %xmm1 3775; SSE41-NEXT: psrad $26, %xmm0 3776; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 3777; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 3778; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1] 3779; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 3780; SSE41-NEXT: psllq $58, %xmm1 3781; SSE41-NEXT: movdqa %xmm1, %xmm2 3782; SSE41-NEXT: psrad $31, %xmm2 3783; SSE41-NEXT: psrad $26, %xmm1 3784; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 3785; SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 3786; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3] 3787; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero 3788; SSE41-NEXT: psllq $58, %xmm2 3789; SSE41-NEXT: movdqa %xmm2, %xmm4 3790; SSE41-NEXT: psrad $31, %xmm4 3791; SSE41-NEXT: psrad $26, %xmm2 3792; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 3793; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7] 3794; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3] 3795; SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero 3796; SSE41-NEXT: psllq $58, %xmm3 3797; SSE41-NEXT: movdqa %xmm3, %xmm4 3798; SSE41-NEXT: psrad $31, %xmm4 3799; SSE41-NEXT: psrad $26, %xmm3 3800; SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 3801; SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7] 3802; SSE41-NEXT: retq 3803; 3804; AVX1-LABEL: sext_8i6_to_8i64: 3805; AVX1: # %bb.0: # %entry 3806; AVX1-NEXT: vmovd %edi, %xmm0 3807; AVX1-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 3808; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,0,0,0] 3809; AVX1-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 3810; AVX1-NEXT: vpsllw $10, %xmm0, %xmm0 3811; AVX1-NEXT: vpsraw $10, %xmm0, %xmm1 3812; AVX1-NEXT: vpmovsxwq %xmm1, %xmm0 3813; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[1,1,1,1] 3814; AVX1-NEXT: vpmovsxwq %xmm2, %xmm2 3815; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm0, %ymm0 3816; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm1[2,3,2,3] 3817; AVX1-NEXT: vpmovsxwq %xmm2, %xmm2 3818; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[3,3,3,3] 3819; AVX1-NEXT: vpmovsxwq %xmm1, %xmm1 3820; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 3821; AVX1-NEXT: retq 3822; 3823; AVX2-LABEL: sext_8i6_to_8i64: 3824; AVX2: # %bb.0: # %entry 3825; AVX2-NEXT: vmovd %edi, %xmm0 3826; AVX2-NEXT: vpbroadcastw %xmm0, %xmm0 3827; AVX2-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 3828; AVX2-NEXT: vpsllw $10, %xmm0, %xmm0 3829; AVX2-NEXT: vpsraw $10, %xmm0, %xmm1 3830; AVX2-NEXT: vpmovsxwq %xmm1, %ymm0 3831; AVX2-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3832; AVX2-NEXT: vpmovsxwq %xmm1, %ymm1 3833; AVX2-NEXT: retq 3834; 3835; AVX512-LABEL: sext_8i6_to_8i64: 3836; AVX512: # %bb.0: # %entry 3837; AVX512-NEXT: vmovd %edi, %xmm0 3838; AVX512-NEXT: vpbroadcastw %xmm0, %xmm0 3839; AVX512-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 3840; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 3841; AVX512-NEXT: vpsllq $58, %zmm0, %zmm0 3842; AVX512-NEXT: vpsraq $58, %zmm0, %zmm0 3843; AVX512-NEXT: retq 3844; 3845; X86-SSE2-LABEL: sext_8i6_to_8i64: 3846; X86-SSE2: # %bb.0: # %entry 3847; X86-SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3848; X86-SSE2-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 3849; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 3850; X86-SSE2-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3 3851; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm3[0,0,0,0] 3852; X86-SSE2-NEXT: pshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,5,5,5,5] 3853; X86-SSE2-NEXT: psllq $58, %xmm0 3854; X86-SSE2-NEXT: movdqa %xmm0, %xmm1 3855; X86-SSE2-NEXT: psrad $31, %xmm1 3856; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 3857; X86-SSE2-NEXT: psrad $26, %xmm0 3858; X86-SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,3,2,3] 3859; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 3860; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1] 3861; X86-SSE2-NEXT: pshufhw {{.*#+}} xmm1 = xmm1[0,1,2,3,5,5,5,5] 3862; X86-SSE2-NEXT: psllq $58, %xmm1 3863; X86-SSE2-NEXT: movdqa %xmm1, %xmm2 3864; X86-SSE2-NEXT: psrad $31, %xmm2 3865; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 3866; X86-SSE2-NEXT: psrad $26, %xmm1 3867; X86-SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,3,2,3] 3868; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 3869; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,2,2,2] 3870; X86-SSE2-NEXT: pshufhw {{.*#+}} xmm2 = xmm2[0,1,2,3,5,5,5,5] 3871; X86-SSE2-NEXT: psllq $58, %xmm2 3872; X86-SSE2-NEXT: movdqa %xmm2, %xmm4 3873; X86-SSE2-NEXT: psrad $31, %xmm4 3874; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] 3875; X86-SSE2-NEXT: psrad $26, %xmm2 3876; X86-SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,3,2,3] 3877; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 3878; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3] 3879; X86-SSE2-NEXT: pshufhw {{.*#+}} xmm3 = xmm3[0,1,2,3,5,5,5,5] 3880; X86-SSE2-NEXT: psllq $58, %xmm3 3881; X86-SSE2-NEXT: movdqa %xmm3, %xmm4 3882; X86-SSE2-NEXT: psrad $31, %xmm4 3883; X86-SSE2-NEXT: pshufd {{.*#+}} xmm4 = xmm4[1,3,2,3] 3884; X86-SSE2-NEXT: psrad $26, %xmm3 3885; X86-SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,3,2,3] 3886; X86-SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 3887; X86-SSE2-NEXT: retl 3888; 3889; X86-SSE41-LABEL: sext_8i6_to_8i64: 3890; X86-SSE41: # %bb.0: # %entry 3891; X86-SSE41-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 3892; X86-SSE41-NEXT: pshuflw {{.*#+}} xmm0 = xmm0[0,0,0,0,4,5,6,7] 3893; X86-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm0[0,0,0,0] 3894; X86-SSE41-NEXT: paddw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm3 3895; X86-SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero 3896; X86-SSE41-NEXT: psllq $58, %xmm0 3897; X86-SSE41-NEXT: movdqa %xmm0, %xmm1 3898; X86-SSE41-NEXT: psrad $31, %xmm1 3899; X86-SSE41-NEXT: psrad $26, %xmm0 3900; X86-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 3901; X86-SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 3902; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm3[1,1,1,1] 3903; X86-SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 3904; X86-SSE41-NEXT: psllq $58, %xmm1 3905; X86-SSE41-NEXT: movdqa %xmm1, %xmm2 3906; X86-SSE41-NEXT: psrad $31, %xmm2 3907; X86-SSE41-NEXT: psrad $26, %xmm1 3908; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 3909; X86-SSE41-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0,1],xmm2[2,3],xmm1[4,5],xmm2[6,7] 3910; X86-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm3[2,3,2,3] 3911; X86-SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero 3912; X86-SSE41-NEXT: psllq $58, %xmm2 3913; X86-SSE41-NEXT: movdqa %xmm2, %xmm4 3914; X86-SSE41-NEXT: psrad $31, %xmm4 3915; X86-SSE41-NEXT: psrad $26, %xmm2 3916; X86-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm2[1,1,3,3] 3917; X86-SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm2[0,1],xmm4[2,3],xmm2[4,5],xmm4[6,7] 3918; X86-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[3,3,3,3] 3919; X86-SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,xmm3[1],zero,zero,zero 3920; X86-SSE41-NEXT: psllq $58, %xmm3 3921; X86-SSE41-NEXT: movdqa %xmm3, %xmm4 3922; X86-SSE41-NEXT: psrad $31, %xmm4 3923; X86-SSE41-NEXT: psrad $26, %xmm3 3924; X86-SSE41-NEXT: pshufd {{.*#+}} xmm3 = xmm3[1,1,3,3] 3925; X86-SSE41-NEXT: pblendw {{.*#+}} xmm3 = xmm3[0,1],xmm4[2,3],xmm3[4,5],xmm4[6,7] 3926; X86-SSE41-NEXT: retl 3927entry: 3928 %a = trunc i32 %x to i6 3929 %b = insertelement <8 x i6> undef, i6 %a, i32 0 3930 %c = shufflevector <8 x i6> %b, <8 x i6> undef, <8 x i32> zeroinitializer 3931 %d = add <8 x i6> %c, <i6 0, i6 1, i6 2, i6 3, i6 4, i6 5, i6 6, i6 7> 3932 %e = sext <8 x i6> %d to <8 x i64> 3933 ret <8 x i64> %e 3934} 3935 3936define <8 x i32> @zext_negate_sext(<8 x i8> %x) { 3937; SSE2-LABEL: zext_negate_sext: 3938; SSE2: # %bb.0: 3939; SSE2-NEXT: pxor %xmm1, %xmm1 3940; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3941; SSE2-NEXT: psubw %xmm0, %xmm1 3942; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3943; SSE2-NEXT: psrad $16, %xmm0 3944; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 3945; SSE2-NEXT: psrad $16, %xmm1 3946; SSE2-NEXT: retq 3947; 3948; SSSE3-LABEL: zext_negate_sext: 3949; SSSE3: # %bb.0: 3950; SSSE3-NEXT: pxor %xmm1, %xmm1 3951; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3952; SSSE3-NEXT: psubw %xmm0, %xmm1 3953; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 3954; SSSE3-NEXT: psrad $16, %xmm0 3955; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 3956; SSSE3-NEXT: psrad $16, %xmm1 3957; SSSE3-NEXT: retq 3958; 3959; SSE41-LABEL: zext_negate_sext: 3960; SSE41: # %bb.0: 3961; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 3962; SSE41-NEXT: pxor %xmm1, %xmm1 3963; SSE41-NEXT: psubw %xmm0, %xmm1 3964; SSE41-NEXT: pmovsxwd %xmm1, %xmm0 3965; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 3966; SSE41-NEXT: pmovsxwd %xmm1, %xmm1 3967; SSE41-NEXT: retq 3968; 3969; AVX1-LABEL: zext_negate_sext: 3970; AVX1: # %bb.0: 3971; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 3972; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 3973; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 3974; AVX1-NEXT: vpsubd %xmm1, %xmm2, %xmm1 3975; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 3976; AVX1-NEXT: vpsubd %xmm0, %xmm2, %xmm0 3977; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 3978; AVX1-NEXT: retq 3979; 3980; AVX2-LABEL: zext_negate_sext: 3981; AVX2: # %bb.0: 3982; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 3983; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 3984; AVX2-NEXT: vpsubd %ymm0, %ymm1, %ymm0 3985; AVX2-NEXT: retq 3986; 3987; AVX512-LABEL: zext_negate_sext: 3988; AVX512: # %bb.0: 3989; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 3990; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 3991; AVX512-NEXT: vpsubd %ymm0, %ymm1, %ymm0 3992; AVX512-NEXT: retq 3993; 3994; X86-SSE2-LABEL: zext_negate_sext: 3995; X86-SSE2: # %bb.0: 3996; X86-SSE2-NEXT: pxor %xmm1, %xmm1 3997; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 3998; X86-SSE2-NEXT: psubw %xmm0, %xmm1 3999; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 4000; X86-SSE2-NEXT: psrad $16, %xmm0 4001; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 4002; X86-SSE2-NEXT: psrad $16, %xmm1 4003; X86-SSE2-NEXT: retl 4004; 4005; X86-SSE41-LABEL: zext_negate_sext: 4006; X86-SSE41: # %bb.0: 4007; X86-SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 4008; X86-SSE41-NEXT: pxor %xmm1, %xmm1 4009; X86-SSE41-NEXT: psubw %xmm0, %xmm1 4010; X86-SSE41-NEXT: pmovsxwd %xmm1, %xmm0 4011; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 4012; X86-SSE41-NEXT: pmovsxwd %xmm1, %xmm1 4013; X86-SSE41-NEXT: retl 4014 %z = zext <8 x i8> %x to <8 x i16> 4015 %neg = sub nsw <8 x i16> zeroinitializer, %z 4016 %r = sext <8 x i16> %neg to <8 x i32> 4017 ret <8 x i32> %r 4018} 4019 4020define <8 x i32> @zext_decremenet_sext(<8 x i8> %x) { 4021; SSE2-LABEL: zext_decremenet_sext: 4022; SSE2: # %bb.0: 4023; SSE2-NEXT: pxor %xmm1, %xmm1 4024; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 4025; SSE2-NEXT: pcmpeqd %xmm1, %xmm1 4026; SSE2-NEXT: paddw %xmm0, %xmm1 4027; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 4028; SSE2-NEXT: psrad $16, %xmm0 4029; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 4030; SSE2-NEXT: psrad $16, %xmm1 4031; SSE2-NEXT: retq 4032; 4033; SSSE3-LABEL: zext_decremenet_sext: 4034; SSSE3: # %bb.0: 4035; SSSE3-NEXT: pxor %xmm1, %xmm1 4036; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 4037; SSSE3-NEXT: pcmpeqd %xmm1, %xmm1 4038; SSSE3-NEXT: paddw %xmm0, %xmm1 4039; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 4040; SSSE3-NEXT: psrad $16, %xmm0 4041; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 4042; SSSE3-NEXT: psrad $16, %xmm1 4043; SSSE3-NEXT: retq 4044; 4045; SSE41-LABEL: zext_decremenet_sext: 4046; SSE41: # %bb.0: 4047; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 4048; SSE41-NEXT: pcmpeqd %xmm1, %xmm1 4049; SSE41-NEXT: paddw %xmm0, %xmm1 4050; SSE41-NEXT: pmovsxwd %xmm1, %xmm0 4051; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 4052; SSE41-NEXT: pmovsxwd %xmm1, %xmm1 4053; SSE41-NEXT: retq 4054; 4055; AVX1-LABEL: zext_decremenet_sext: 4056; AVX1: # %bb.0: 4057; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,1,1] 4058; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 4059; AVX1-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 4060; AVX1-NEXT: vpaddd %xmm2, %xmm1, %xmm1 4061; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 4062; AVX1-NEXT: vpaddd %xmm2, %xmm0, %xmm0 4063; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4064; AVX1-NEXT: retq 4065; 4066; AVX2-LABEL: zext_decremenet_sext: 4067; AVX2: # %bb.0: 4068; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 4069; AVX2-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 4070; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 4071; AVX2-NEXT: retq 4072; 4073; AVX512-LABEL: zext_decremenet_sext: 4074; AVX512: # %bb.0: 4075; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 4076; AVX512-NEXT: vpcmpeqd %ymm1, %ymm1, %ymm1 4077; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0 4078; AVX512-NEXT: retq 4079; 4080; X86-SSE2-LABEL: zext_decremenet_sext: 4081; X86-SSE2: # %bb.0: 4082; X86-SSE2-NEXT: pxor %xmm1, %xmm1 4083; X86-SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 4084; X86-SSE2-NEXT: pcmpeqd %xmm1, %xmm1 4085; X86-SSE2-NEXT: paddw %xmm0, %xmm1 4086; X86-SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 4087; X86-SSE2-NEXT: psrad $16, %xmm0 4088; X86-SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4,4,5,5,6,6,7,7] 4089; X86-SSE2-NEXT: psrad $16, %xmm1 4090; X86-SSE2-NEXT: retl 4091; 4092; X86-SSE41-LABEL: zext_decremenet_sext: 4093; X86-SSE41: # %bb.0: 4094; X86-SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 4095; X86-SSE41-NEXT: pcmpeqd %xmm1, %xmm1 4096; X86-SSE41-NEXT: paddw %xmm0, %xmm1 4097; X86-SSE41-NEXT: pmovsxwd %xmm1, %xmm0 4098; X86-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,3,2,3] 4099; X86-SSE41-NEXT: pmovsxwd %xmm1, %xmm1 4100; X86-SSE41-NEXT: retl 4101 %z = zext <8 x i8> %x to <8 x i16> 4102 %dec = add <8 x i16> %z, <i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1, i16 -1> 4103 %r = sext <8 x i16> %dec to <8 x i32> 4104 ret <8 x i32> %r 4105} 4106