1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW 9; 10; Just one 32-bit run to make sure we do reasonable things there. 11; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=X32-SSE41 12 13define <8 x i16> @sext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp { 14; SSE2-LABEL: sext_16i8_to_8i16: 15; SSE2: # BB#0: # %entry 16; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 17; SSE2-NEXT: psraw $8, %xmm0 18; SSE2-NEXT: retq 19; 20; SSSE3-LABEL: sext_16i8_to_8i16: 21; SSSE3: # BB#0: # %entry 22; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 23; SSSE3-NEXT: psraw $8, %xmm0 24; SSSE3-NEXT: retq 25; 26; SSE41-LABEL: sext_16i8_to_8i16: 27; SSE41: # BB#0: # %entry 28; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 29; SSE41-NEXT: retq 30; 31; AVX-LABEL: sext_16i8_to_8i16: 32; AVX: # BB#0: # %entry 33; AVX-NEXT: vpmovsxbw %xmm0, %xmm0 34; AVX-NEXT: retq 35; 36; X32-SSE41-LABEL: sext_16i8_to_8i16: 37; X32-SSE41: # BB#0: # %entry 38; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm0 39; X32-SSE41-NEXT: retl 40entry: 41 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 42 %C = sext <8 x i8> %B to <8 x i16> 43 ret <8 x i16> %C 44} 45 46define <16 x i16> @sext_16i8_to_16i16(<16 x i8> %A) nounwind uwtable readnone ssp { 47; SSE2-LABEL: sext_16i8_to_16i16: 48; SSE2: # BB#0: # %entry 49; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 50; SSE2-NEXT: psraw $8, %xmm2 51; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 52; SSE2-NEXT: psraw $8, %xmm1 53; SSE2-NEXT: movdqa %xmm2, %xmm0 54; SSE2-NEXT: retq 55; 56; SSSE3-LABEL: sext_16i8_to_16i16: 57; SSSE3: # BB#0: # %entry 58; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 59; SSSE3-NEXT: psraw $8, %xmm2 60; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 61; SSSE3-NEXT: psraw $8, %xmm1 62; SSSE3-NEXT: movdqa %xmm2, %xmm0 63; SSSE3-NEXT: retq 64; 65; SSE41-LABEL: sext_16i8_to_16i16: 66; SSE41: # BB#0: # %entry 67; SSE41-NEXT: pmovsxbw %xmm0, %xmm2 68; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 69; SSE41-NEXT: pmovsxbw %xmm0, %xmm1 70; SSE41-NEXT: movdqa %xmm2, %xmm0 71; SSE41-NEXT: retq 72; 73; AVX1-LABEL: sext_16i8_to_16i16: 74; AVX1: # BB#0: # %entry 75; AVX1-NEXT: vpmovsxbw %xmm0, %xmm1 76; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 77; AVX1-NEXT: vpmovsxbw %xmm0, %xmm0 78; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 79; AVX1-NEXT: retq 80; 81; AVX2-LABEL: sext_16i8_to_16i16: 82; AVX2: # BB#0: # %entry 83; AVX2-NEXT: vpmovsxbw %xmm0, %ymm0 84; AVX2-NEXT: retq 85; 86; AVX512-LABEL: sext_16i8_to_16i16: 87; AVX512: # BB#0: # %entry 88; AVX512-NEXT: vpmovsxbw %xmm0, %ymm0 89; AVX512-NEXT: retq 90; 91; X32-SSE41-LABEL: sext_16i8_to_16i16: 92; X32-SSE41: # BB#0: # %entry 93; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm2 94; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 95; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm1 96; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 97; X32-SSE41-NEXT: retl 98entry: 99 %B = sext <16 x i8> %A to <16 x i16> 100 ret <16 x i16> %B 101} 102 103define <32 x i16> @sext_32i8_to_32i16(<32 x i8> %A) nounwind uwtable readnone ssp { 104; SSE2-LABEL: sext_32i8_to_32i16: 105; SSE2: # BB#0: # %entry 106; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7] 107; SSE2-NEXT: psraw $8, %xmm4 108; SSE2-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15] 109; SSE2-NEXT: psraw $8, %xmm5 110; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 111; SSE2-NEXT: psraw $8, %xmm2 112; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15] 113; SSE2-NEXT: psraw $8, %xmm3 114; SSE2-NEXT: movdqa %xmm4, %xmm0 115; SSE2-NEXT: movdqa %xmm5, %xmm1 116; SSE2-NEXT: retq 117; 118; SSSE3-LABEL: sext_32i8_to_32i16: 119; SSSE3: # BB#0: # %entry 120; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7] 121; SSSE3-NEXT: psraw $8, %xmm4 122; SSSE3-NEXT: punpckhbw {{.*#+}} xmm5 = xmm5[8],xmm0[8],xmm5[9],xmm0[9],xmm5[10],xmm0[10],xmm5[11],xmm0[11],xmm5[12],xmm0[12],xmm5[13],xmm0[13],xmm5[14],xmm0[14],xmm5[15],xmm0[15] 123; SSSE3-NEXT: psraw $8, %xmm5 124; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 125; SSSE3-NEXT: psraw $8, %xmm2 126; SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm1[8],xmm3[9],xmm1[9],xmm3[10],xmm1[10],xmm3[11],xmm1[11],xmm3[12],xmm1[12],xmm3[13],xmm1[13],xmm3[14],xmm1[14],xmm3[15],xmm1[15] 127; SSSE3-NEXT: psraw $8, %xmm3 128; SSSE3-NEXT: movdqa %xmm4, %xmm0 129; SSSE3-NEXT: movdqa %xmm5, %xmm1 130; SSSE3-NEXT: retq 131; 132; SSE41-LABEL: sext_32i8_to_32i16: 133; SSE41: # BB#0: # %entry 134; SSE41-NEXT: pmovsxbw %xmm0, %xmm5 135; SSE41-NEXT: pmovsxbw %xmm1, %xmm2 136; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 137; SSE41-NEXT: pmovsxbw %xmm0, %xmm4 138; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 139; SSE41-NEXT: pmovsxbw %xmm0, %xmm3 140; SSE41-NEXT: movdqa %xmm5, %xmm0 141; SSE41-NEXT: movdqa %xmm4, %xmm1 142; SSE41-NEXT: retq 143; 144; AVX1-LABEL: sext_32i8_to_32i16: 145; AVX1: # BB#0: # %entry 146; AVX1-NEXT: vpmovsxbw %xmm0, %xmm1 147; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 148; AVX1-NEXT: vpmovsxbw %xmm2, %xmm2 149; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 150; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 151; AVX1-NEXT: vpmovsxbw %xmm0, %xmm1 152; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 153; AVX1-NEXT: vpmovsxbw %xmm0, %xmm0 154; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 155; AVX1-NEXT: vmovaps %ymm2, %ymm0 156; AVX1-NEXT: retq 157; 158; AVX2-LABEL: sext_32i8_to_32i16: 159; AVX2: # BB#0: # %entry 160; AVX2-NEXT: vpmovsxbw %xmm0, %ymm2 161; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 162; AVX2-NEXT: vpmovsxbw %xmm0, %ymm1 163; AVX2-NEXT: vmovdqa %ymm2, %ymm0 164; AVX2-NEXT: retq 165; 166; AVX512F-LABEL: sext_32i8_to_32i16: 167; AVX512F: # BB#0: # %entry 168; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm2 169; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 170; AVX512F-NEXT: vpmovsxbw %xmm0, %ymm1 171; AVX512F-NEXT: vmovdqa %ymm2, %ymm0 172; AVX512F-NEXT: retq 173; 174; AVX512BW-LABEL: sext_32i8_to_32i16: 175; AVX512BW: # BB#0: # %entry 176; AVX512BW-NEXT: vpmovsxbw %ymm0, %zmm0 177; AVX512BW-NEXT: retq 178; 179; X32-SSE41-LABEL: sext_32i8_to_32i16: 180; X32-SSE41: # BB#0: # %entry 181; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm5 182; X32-SSE41-NEXT: pmovsxbw %xmm1, %xmm2 183; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 184; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm4 185; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 186; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm3 187; X32-SSE41-NEXT: movdqa %xmm5, %xmm0 188; X32-SSE41-NEXT: movdqa %xmm4, %xmm1 189; X32-SSE41-NEXT: retl 190entry: 191 %B = sext <32 x i8> %A to <32 x i16> 192 ret <32 x i16> %B 193} 194 195define <4 x i32> @sext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp { 196; SSE2-LABEL: sext_16i8_to_4i32: 197; SSE2: # BB#0: # %entry 198; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 199; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 200; SSE2-NEXT: psrad $24, %xmm0 201; SSE2-NEXT: retq 202; 203; SSSE3-LABEL: sext_16i8_to_4i32: 204; SSSE3: # BB#0: # %entry 205; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 206; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 207; SSSE3-NEXT: psrad $24, %xmm0 208; SSSE3-NEXT: retq 209; 210; SSE41-LABEL: sext_16i8_to_4i32: 211; SSE41: # BB#0: # %entry 212; SSE41-NEXT: pmovsxbd %xmm0, %xmm0 213; SSE41-NEXT: retq 214; 215; AVX-LABEL: sext_16i8_to_4i32: 216; AVX: # BB#0: # %entry 217; AVX-NEXT: vpmovsxbd %xmm0, %xmm0 218; AVX-NEXT: retq 219; 220; X32-SSE41-LABEL: sext_16i8_to_4i32: 221; X32-SSE41: # BB#0: # %entry 222; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm0 223; X32-SSE41-NEXT: retl 224entry: 225 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 226 %C = sext <4 x i8> %B to <4 x i32> 227 ret <4 x i32> %C 228} 229 230define <8 x i32> @sext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp { 231; SSE2-LABEL: sext_16i8_to_8i32: 232; SSE2: # BB#0: # %entry 233; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 234; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 235; SSE2-NEXT: psrad $24, %xmm2 236; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 237; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 238; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 239; SSE2-NEXT: psrad $24, %xmm1 240; SSE2-NEXT: movdqa %xmm2, %xmm0 241; SSE2-NEXT: retq 242; 243; SSSE3-LABEL: sext_16i8_to_8i32: 244; SSSE3: # BB#0: # %entry 245; SSSE3-NEXT: movdqa %xmm0, %xmm1 246; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 247; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 248; SSSE3-NEXT: psrad $24, %xmm0 249; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,4,u,u,u,5,u,u,u,6,u,u,u,7] 250; SSSE3-NEXT: psrad $24, %xmm1 251; SSSE3-NEXT: retq 252; 253; SSE41-LABEL: sext_16i8_to_8i32: 254; SSE41: # BB#0: # %entry 255; SSE41-NEXT: pmovsxbd %xmm0, %xmm2 256; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 257; SSE41-NEXT: pmovsxbd %xmm0, %xmm1 258; SSE41-NEXT: movdqa %xmm2, %xmm0 259; SSE41-NEXT: retq 260; 261; AVX1-LABEL: sext_16i8_to_8i32: 262; AVX1: # BB#0: # %entry 263; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1 264; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 265; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 266; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 267; AVX1-NEXT: retq 268; 269; AVX2-LABEL: sext_16i8_to_8i32: 270; AVX2: # BB#0: # %entry 271; AVX2-NEXT: vpmovsxbd %xmm0, %ymm0 272; AVX2-NEXT: retq 273; 274; AVX512-LABEL: sext_16i8_to_8i32: 275; AVX512: # BB#0: # %entry 276; AVX512-NEXT: vpmovsxbd %xmm0, %ymm0 277; AVX512-NEXT: retq 278; 279; X32-SSE41-LABEL: sext_16i8_to_8i32: 280; X32-SSE41: # BB#0: # %entry 281; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm2 282; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 283; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm1 284; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 285; X32-SSE41-NEXT: retl 286entry: 287 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 288 %C = sext <8 x i8> %B to <8 x i32> 289 ret <8 x i32> %C 290} 291 292define <16 x i32> @sext_16i8_to_16i32(<16 x i8> %A) nounwind uwtable readnone ssp { 293; SSE2-LABEL: sext_16i8_to_16i32: 294; SSE2: # BB#0: # %entry 295; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 296; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3] 297; SSE2-NEXT: psrad $24, %xmm4 298; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 299; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 300; SSE2-NEXT: psrad $24, %xmm2 301; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 302; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 303; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 304; SSE2-NEXT: psrad $24, %xmm1 305; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 306; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 307; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3] 308; SSE2-NEXT: psrad $24, %xmm3 309; SSE2-NEXT: movdqa %xmm4, %xmm0 310; SSE2-NEXT: retq 311; 312; SSSE3-LABEL: sext_16i8_to_16i32: 313; SSSE3: # BB#0: # %entry 314; SSSE3-NEXT: movdqa %xmm0, %xmm3 315; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 316; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 317; SSSE3-NEXT: psrad $24, %xmm0 318; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm3[8],xmm1[9],xmm3[9],xmm1[10],xmm3[10],xmm1[11],xmm3[11],xmm1[12],xmm3[12],xmm1[13],xmm3[13],xmm1[14],xmm3[14],xmm1[15],xmm3[15] 319; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 320; SSSE3-NEXT: psrad $24, %xmm2 321; SSSE3-NEXT: movdqa %xmm3, %xmm1 322; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,4,u,u,u,5,u,u,u,6,u,u,u,7] 323; SSSE3-NEXT: psrad $24, %xmm1 324; SSSE3-NEXT: pshufb {{.*#+}} xmm3 = xmm3[u,u,u,12,u,u,u,13,u,u,u,14,u,u,u,15] 325; SSSE3-NEXT: psrad $24, %xmm3 326; SSSE3-NEXT: retq 327; 328; SSE41-LABEL: sext_16i8_to_16i32: 329; SSE41: # BB#0: # %entry 330; SSE41-NEXT: pmovsxbd %xmm0, %xmm4 331; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 332; SSE41-NEXT: pmovsxbd %xmm1, %xmm1 333; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 334; SSE41-NEXT: pmovsxbd %xmm2, %xmm2 335; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 336; SSE41-NEXT: pmovsxbd %xmm0, %xmm3 337; SSE41-NEXT: movdqa %xmm4, %xmm0 338; SSE41-NEXT: retq 339; 340; AVX1-LABEL: sext_16i8_to_16i32: 341; AVX1: # BB#0: # %entry 342; AVX1-NEXT: vpmovsxbd %xmm0, %xmm1 343; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3] 344; AVX1-NEXT: vpmovsxbd %xmm2, %xmm2 345; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 346; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 347; AVX1-NEXT: vpmovsxbd %xmm1, %xmm1 348; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 349; AVX1-NEXT: vpmovsxbd %xmm0, %xmm0 350; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 351; AVX1-NEXT: vmovaps %ymm2, %ymm0 352; AVX1-NEXT: retq 353; 354; AVX2-LABEL: sext_16i8_to_16i32: 355; AVX2: # BB#0: # %entry 356; AVX2-NEXT: vpmovsxbd %xmm0, %ymm2 357; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 358; AVX2-NEXT: vpmovsxbd %xmm0, %ymm1 359; AVX2-NEXT: vmovdqa %ymm2, %ymm0 360; AVX2-NEXT: retq 361; 362; AVX512-LABEL: sext_16i8_to_16i32: 363; AVX512: # BB#0: # %entry 364; AVX512-NEXT: vpmovsxbd %xmm0, %zmm0 365; AVX512-NEXT: retq 366; 367; X32-SSE41-LABEL: sext_16i8_to_16i32: 368; X32-SSE41: # BB#0: # %entry 369; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm4 370; X32-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 371; X32-SSE41-NEXT: pmovsxbd %xmm1, %xmm1 372; X32-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 373; X32-SSE41-NEXT: pmovsxbd %xmm2, %xmm2 374; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 375; X32-SSE41-NEXT: pmovsxbd %xmm0, %xmm3 376; X32-SSE41-NEXT: movdqa %xmm4, %xmm0 377; X32-SSE41-NEXT: retl 378entry: 379 %B = sext <16 x i8> %A to <16 x i32> 380 ret <16 x i32> %B 381} 382 383define <2 x i64> @sext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp { 384; SSE2-LABEL: sext_16i8_to_2i64: 385; SSE2: # BB#0: # %entry 386; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 387; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 388; SSE2-NEXT: movdqa %xmm0, %xmm1 389; SSE2-NEXT: psrad $31, %xmm1 390; SSE2-NEXT: psrad $24, %xmm0 391; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 392; SSE2-NEXT: retq 393; 394; SSSE3-LABEL: sext_16i8_to_2i64: 395; SSSE3: # BB#0: # %entry 396; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 397; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 398; SSSE3-NEXT: movdqa %xmm0, %xmm1 399; SSSE3-NEXT: psrad $31, %xmm1 400; SSSE3-NEXT: psrad $24, %xmm0 401; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 402; SSSE3-NEXT: retq 403; 404; SSE41-LABEL: sext_16i8_to_2i64: 405; SSE41: # BB#0: # %entry 406; SSE41-NEXT: pmovsxbq %xmm0, %xmm0 407; SSE41-NEXT: retq 408; 409; AVX-LABEL: sext_16i8_to_2i64: 410; AVX: # BB#0: # %entry 411; AVX-NEXT: vpmovsxbq %xmm0, %xmm0 412; AVX-NEXT: retq 413; 414; X32-SSE41-LABEL: sext_16i8_to_2i64: 415; X32-SSE41: # BB#0: # %entry 416; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm0 417; X32-SSE41-NEXT: retl 418entry: 419 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 420 %C = sext <2 x i8> %B to <2 x i64> 421 ret <2 x i64> %C 422} 423 424define <4 x i64> @sext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp { 425; SSE2-LABEL: sext_16i8_to_4i64: 426; SSE2: # BB#0: # %entry 427; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 428; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 429; SSE2-NEXT: movdqa %xmm2, %xmm1 430; SSE2-NEXT: psrad $31, %xmm1 431; SSE2-NEXT: psrad $24, %xmm2 432; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 433; SSE2-NEXT: psrld $16, %xmm0 434; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 435; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 436; SSE2-NEXT: movdqa %xmm1, %xmm0 437; SSE2-NEXT: psrad $31, %xmm0 438; SSE2-NEXT: psrad $24, %xmm1 439; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 440; SSE2-NEXT: movdqa %xmm2, %xmm0 441; SSE2-NEXT: retq 442; 443; SSSE3-LABEL: sext_16i8_to_4i64: 444; SSSE3: # BB#0: # %entry 445; SSSE3-NEXT: movdqa %xmm0, %xmm1 446; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 447; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 448; SSSE3-NEXT: movdqa %xmm0, %xmm2 449; SSSE3-NEXT: psrad $31, %xmm2 450; SSSE3-NEXT: psrad $24, %xmm0 451; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 452; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[u,u,u,2,u,u,u,3,u,u,u],zero,xmm1[u,u,u],zero 453; SSSE3-NEXT: movdqa %xmm1, %xmm2 454; SSSE3-NEXT: psrad $31, %xmm2 455; SSSE3-NEXT: psrad $24, %xmm1 456; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 457; SSSE3-NEXT: retq 458; 459; SSE41-LABEL: sext_16i8_to_4i64: 460; SSE41: # BB#0: # %entry 461; SSE41-NEXT: pmovsxbq %xmm0, %xmm2 462; SSE41-NEXT: psrld $16, %xmm0 463; SSE41-NEXT: pmovsxbq %xmm0, %xmm1 464; SSE41-NEXT: movdqa %xmm2, %xmm0 465; SSE41-NEXT: retq 466; 467; AVX1-LABEL: sext_16i8_to_4i64: 468; AVX1: # BB#0: # %entry 469; AVX1-NEXT: vpmovsxbq %xmm0, %xmm1 470; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 471; AVX1-NEXT: vpmovsxbq %xmm0, %xmm0 472; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 473; AVX1-NEXT: retq 474; 475; AVX2-LABEL: sext_16i8_to_4i64: 476; AVX2: # BB#0: # %entry 477; AVX2-NEXT: vpmovsxbq %xmm0, %ymm0 478; AVX2-NEXT: retq 479; 480; AVX512-LABEL: sext_16i8_to_4i64: 481; AVX512: # BB#0: # %entry 482; AVX512-NEXT: vpmovsxbq %xmm0, %ymm0 483; AVX512-NEXT: retq 484; 485; X32-SSE41-LABEL: sext_16i8_to_4i64: 486; X32-SSE41: # BB#0: # %entry 487; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm2 488; X32-SSE41-NEXT: psrld $16, %xmm0 489; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm1 490; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 491; X32-SSE41-NEXT: retl 492entry: 493 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 494 %C = sext <4 x i8> %B to <4 x i64> 495 ret <4 x i64> %C 496} 497 498define <8 x i64> @sext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp { 499; SSE2-LABEL: sext_16i8_to_8i64: 500; SSE2: # BB#0: # %entry 501; SSE2-NEXT: movdqa %xmm0, %xmm1 502; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 503; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 504; SSE2-NEXT: movdqa %xmm0, %xmm2 505; SSE2-NEXT: psrad $31, %xmm2 506; SSE2-NEXT: psrad $24, %xmm0 507; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 508; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3] 509; SSE2-NEXT: psrld $16, %xmm1 510; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 511; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 512; SSE2-NEXT: movdqa %xmm1, %xmm2 513; SSE2-NEXT: psrad $31, %xmm2 514; SSE2-NEXT: psrad $24, %xmm1 515; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 516; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm3[0],xmm2[1],xmm3[1],xmm2[2],xmm3[2],xmm2[3],xmm3[3],xmm2[4],xmm3[4],xmm2[5],xmm3[5],xmm2[6],xmm3[6],xmm2[7],xmm3[7] 517; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0,0,1,1,2,2,3,3] 518; SSE2-NEXT: movdqa %xmm2, %xmm4 519; SSE2-NEXT: psrad $31, %xmm4 520; SSE2-NEXT: psrad $24, %xmm2 521; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 522; SSE2-NEXT: psrld $16, %xmm3 523; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 524; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0,0,1,1,2,2,3,3] 525; SSE2-NEXT: movdqa %xmm3, %xmm4 526; SSE2-NEXT: psrad $31, %xmm4 527; SSE2-NEXT: psrad $24, %xmm3 528; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 529; SSE2-NEXT: retq 530; 531; SSSE3-LABEL: sext_16i8_to_8i64: 532; SSSE3: # BB#0: # %entry 533; SSSE3-NEXT: movdqa {{.*#+}} xmm2 = <u,u,u,2,u,u,u,3,u,u,u,255,u,u,u,255> 534; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3],xmm4[4],xmm0[4],xmm4[5],xmm0[5],xmm4[6],xmm0[6],xmm4[7],xmm0[7] 535; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm0[1,1,2,3] 536; SSSE3-NEXT: movdqa %xmm0, %xmm1 537; SSSE3-NEXT: pshufb %xmm2, %xmm1 538; SSSE3-NEXT: movdqa %xmm1, %xmm0 539; SSSE3-NEXT: psrad $31, %xmm0 540; SSSE3-NEXT: psrad $24, %xmm1 541; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 542; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] 543; SSSE3-NEXT: movdqa %xmm0, %xmm4 544; SSSE3-NEXT: psrad $31, %xmm4 545; SSSE3-NEXT: psrad $24, %xmm0 546; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 547; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 548; SSSE3-NEXT: pshufb %xmm2, %xmm3 549; SSSE3-NEXT: movdqa %xmm3, %xmm2 550; SSSE3-NEXT: psrad $31, %xmm2 551; SSSE3-NEXT: psrad $24, %xmm3 552; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1] 553; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] 554; SSSE3-NEXT: movdqa %xmm2, %xmm4 555; SSSE3-NEXT: psrad $31, %xmm4 556; SSSE3-NEXT: psrad $24, %xmm2 557; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 558; SSSE3-NEXT: retq 559; 560; SSE41-LABEL: sext_16i8_to_8i64: 561; SSE41: # BB#0: # %entry 562; SSE41-NEXT: pmovsxbq %xmm0, %xmm4 563; SSE41-NEXT: movdqa %xmm0, %xmm1 564; SSE41-NEXT: psrld $16, %xmm1 565; SSE41-NEXT: pmovsxbq %xmm1, %xmm1 566; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3] 567; SSE41-NEXT: pmovsxbq %xmm2, %xmm2 568; SSE41-NEXT: psrlq $48, %xmm0 569; SSE41-NEXT: pmovsxbq %xmm0, %xmm3 570; SSE41-NEXT: movdqa %xmm4, %xmm0 571; SSE41-NEXT: retq 572; 573; AVX1-LABEL: sext_16i8_to_8i64: 574; AVX1: # BB#0: # %entry 575; AVX1-NEXT: vpmovsxbq %xmm0, %xmm1 576; AVX1-NEXT: vpsrld $16, %xmm0, %xmm2 577; AVX1-NEXT: vpmovsxbq %xmm2, %xmm2 578; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 579; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 580; AVX1-NEXT: vpmovsxbq %xmm1, %xmm1 581; AVX1-NEXT: vpsrlq $48, %xmm0, %xmm0 582; AVX1-NEXT: vpmovsxbq %xmm0, %xmm0 583; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 584; AVX1-NEXT: vmovaps %ymm2, %ymm0 585; AVX1-NEXT: retq 586; 587; AVX2-LABEL: sext_16i8_to_8i64: 588; AVX2: # BB#0: # %entry 589; AVX2-NEXT: vpmovsxbq %xmm0, %ymm2 590; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 591; AVX2-NEXT: vpmovsxbq %xmm0, %ymm1 592; AVX2-NEXT: vmovdqa %ymm2, %ymm0 593; AVX2-NEXT: retq 594; 595; AVX512-LABEL: sext_16i8_to_8i64: 596; AVX512: # BB#0: # %entry 597; AVX512-NEXT: vpmovsxbq %xmm0, %zmm0 598; AVX512-NEXT: retq 599; 600; X32-SSE41-LABEL: sext_16i8_to_8i64: 601; X32-SSE41: # BB#0: # %entry 602; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm4 603; X32-SSE41-NEXT: movdqa %xmm0, %xmm1 604; X32-SSE41-NEXT: psrld $16, %xmm1 605; X32-SSE41-NEXT: pmovsxbq %xmm1, %xmm1 606; X32-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3] 607; X32-SSE41-NEXT: pmovsxbq %xmm2, %xmm2 608; X32-SSE41-NEXT: psrlq $48, %xmm0 609; X32-SSE41-NEXT: pmovsxbq %xmm0, %xmm3 610; X32-SSE41-NEXT: movdqa %xmm4, %xmm0 611; X32-SSE41-NEXT: retl 612entry: 613 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 614 %C = sext <8 x i8> %B to <8 x i64> 615 ret <8 x i64> %C 616} 617 618define <4 x i32> @sext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp { 619; SSE2-LABEL: sext_8i16_to_4i32: 620; SSE2: # BB#0: # %entry 621; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 622; SSE2-NEXT: psrad $16, %xmm0 623; SSE2-NEXT: retq 624; 625; SSSE3-LABEL: sext_8i16_to_4i32: 626; SSSE3: # BB#0: # %entry 627; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 628; SSSE3-NEXT: psrad $16, %xmm0 629; SSSE3-NEXT: retq 630; 631; SSE41-LABEL: sext_8i16_to_4i32: 632; SSE41: # BB#0: # %entry 633; SSE41-NEXT: pmovsxwd %xmm0, %xmm0 634; SSE41-NEXT: retq 635; 636; AVX-LABEL: sext_8i16_to_4i32: 637; AVX: # BB#0: # %entry 638; AVX-NEXT: vpmovsxwd %xmm0, %xmm0 639; AVX-NEXT: retq 640; 641; X32-SSE41-LABEL: sext_8i16_to_4i32: 642; X32-SSE41: # BB#0: # %entry 643; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm0 644; X32-SSE41-NEXT: retl 645entry: 646 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 647 %C = sext <4 x i16> %B to <4 x i32> 648 ret <4 x i32> %C 649} 650 651define <8 x i32> @sext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp { 652; SSE2-LABEL: sext_8i16_to_8i32: 653; SSE2: # BB#0: # %entry 654; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 655; SSE2-NEXT: psrad $16, %xmm2 656; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 657; SSE2-NEXT: psrad $16, %xmm1 658; SSE2-NEXT: movdqa %xmm2, %xmm0 659; SSE2-NEXT: retq 660; 661; SSSE3-LABEL: sext_8i16_to_8i32: 662; SSSE3: # BB#0: # %entry 663; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 664; SSSE3-NEXT: psrad $16, %xmm2 665; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 666; SSSE3-NEXT: psrad $16, %xmm1 667; SSSE3-NEXT: movdqa %xmm2, %xmm0 668; SSSE3-NEXT: retq 669; 670; SSE41-LABEL: sext_8i16_to_8i32: 671; SSE41: # BB#0: # %entry 672; SSE41-NEXT: pmovsxwd %xmm0, %xmm2 673; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 674; SSE41-NEXT: pmovsxwd %xmm0, %xmm1 675; SSE41-NEXT: movdqa %xmm2, %xmm0 676; SSE41-NEXT: retq 677; 678; AVX1-LABEL: sext_8i16_to_8i32: 679; AVX1: # BB#0: # %entry 680; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 681; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 682; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 683; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 684; AVX1-NEXT: retq 685; 686; AVX2-LABEL: sext_8i16_to_8i32: 687; AVX2: # BB#0: # %entry 688; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 689; AVX2-NEXT: retq 690; 691; AVX512-LABEL: sext_8i16_to_8i32: 692; AVX512: # BB#0: # %entry 693; AVX512-NEXT: vpmovsxwd %xmm0, %ymm0 694; AVX512-NEXT: retq 695; 696; X32-SSE41-LABEL: sext_8i16_to_8i32: 697; X32-SSE41: # BB#0: # %entry 698; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm2 699; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 700; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm1 701; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 702; X32-SSE41-NEXT: retl 703entry: 704 %B = sext <8 x i16> %A to <8 x i32> 705 ret <8 x i32> %B 706} 707 708define <16 x i32> @sext_16i16_to_16i32(<16 x i16> %A) nounwind uwtable readnone ssp { 709; SSE2-LABEL: sext_16i16_to_16i32: 710; SSE2: # BB#0: # %entry 711; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] 712; SSE2-NEXT: psrad $16, %xmm4 713; SSE2-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] 714; SSE2-NEXT: psrad $16, %xmm5 715; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 716; SSE2-NEXT: psrad $16, %xmm2 717; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 718; SSE2-NEXT: psrad $16, %xmm3 719; SSE2-NEXT: movdqa %xmm4, %xmm0 720; SSE2-NEXT: movdqa %xmm5, %xmm1 721; SSE2-NEXT: retq 722; 723; SSSE3-LABEL: sext_16i16_to_16i32: 724; SSSE3: # BB#0: # %entry 725; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] 726; SSSE3-NEXT: psrad $16, %xmm4 727; SSSE3-NEXT: punpckhwd {{.*#+}} xmm5 = xmm5[4],xmm0[4],xmm5[5],xmm0[5],xmm5[6],xmm0[6],xmm5[7],xmm0[7] 728; SSSE3-NEXT: psrad $16, %xmm5 729; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 730; SSSE3-NEXT: psrad $16, %xmm2 731; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 732; SSSE3-NEXT: psrad $16, %xmm3 733; SSSE3-NEXT: movdqa %xmm4, %xmm0 734; SSSE3-NEXT: movdqa %xmm5, %xmm1 735; SSSE3-NEXT: retq 736; 737; SSE41-LABEL: sext_16i16_to_16i32: 738; SSE41: # BB#0: # %entry 739; SSE41-NEXT: pmovsxwd %xmm0, %xmm5 740; SSE41-NEXT: pmovsxwd %xmm1, %xmm2 741; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 742; SSE41-NEXT: pmovsxwd %xmm0, %xmm4 743; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 744; SSE41-NEXT: pmovsxwd %xmm0, %xmm3 745; SSE41-NEXT: movdqa %xmm5, %xmm0 746; SSE41-NEXT: movdqa %xmm4, %xmm1 747; SSE41-NEXT: retq 748; 749; AVX1-LABEL: sext_16i16_to_16i32: 750; AVX1: # BB#0: # %entry 751; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 752; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 753; AVX1-NEXT: vpmovsxwd %xmm2, %xmm2 754; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 755; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 756; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 757; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 758; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 759; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 760; AVX1-NEXT: vmovaps %ymm2, %ymm0 761; AVX1-NEXT: retq 762; 763; AVX2-LABEL: sext_16i16_to_16i32: 764; AVX2: # BB#0: # %entry 765; AVX2-NEXT: vpmovsxwd %xmm0, %ymm2 766; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 767; AVX2-NEXT: vpmovsxwd %xmm0, %ymm1 768; AVX2-NEXT: vmovdqa %ymm2, %ymm0 769; AVX2-NEXT: retq 770; 771; AVX512-LABEL: sext_16i16_to_16i32: 772; AVX512: # BB#0: # %entry 773; AVX512-NEXT: vpmovsxwd %ymm0, %zmm0 774; AVX512-NEXT: retq 775; 776; X32-SSE41-LABEL: sext_16i16_to_16i32: 777; X32-SSE41: # BB#0: # %entry 778; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm5 779; X32-SSE41-NEXT: pmovsxwd %xmm1, %xmm2 780; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 781; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm4 782; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 783; X32-SSE41-NEXT: pmovsxwd %xmm0, %xmm3 784; X32-SSE41-NEXT: movdqa %xmm5, %xmm0 785; X32-SSE41-NEXT: movdqa %xmm4, %xmm1 786; X32-SSE41-NEXT: retl 787entry: 788 %B = sext <16 x i16> %A to <16 x i32> 789 ret <16 x i32> %B 790} 791 792define <2 x i64> @sext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp { 793; SSE2-LABEL: sext_8i16_to_2i64: 794; SSE2: # BB#0: # %entry 795; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 796; SSE2-NEXT: movdqa %xmm0, %xmm1 797; SSE2-NEXT: psrad $31, %xmm1 798; SSE2-NEXT: psrad $16, %xmm0 799; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 800; SSE2-NEXT: retq 801; 802; SSSE3-LABEL: sext_8i16_to_2i64: 803; SSSE3: # BB#0: # %entry 804; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 805; SSSE3-NEXT: movdqa %xmm0, %xmm1 806; SSSE3-NEXT: psrad $31, %xmm1 807; SSSE3-NEXT: psrad $16, %xmm0 808; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 809; SSSE3-NEXT: retq 810; 811; SSE41-LABEL: sext_8i16_to_2i64: 812; SSE41: # BB#0: # %entry 813; SSE41-NEXT: pmovsxwq %xmm0, %xmm0 814; SSE41-NEXT: retq 815; 816; AVX-LABEL: sext_8i16_to_2i64: 817; AVX: # BB#0: # %entry 818; AVX-NEXT: vpmovsxwq %xmm0, %xmm0 819; AVX-NEXT: retq 820; 821; X32-SSE41-LABEL: sext_8i16_to_2i64: 822; X32-SSE41: # BB#0: # %entry 823; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm0 824; X32-SSE41-NEXT: retl 825entry: 826 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 827 %C = sext <2 x i16> %B to <2 x i64> 828 ret <2 x i64> %C 829} 830 831define <4 x i64> @sext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp { 832; SSE2-LABEL: sext_8i16_to_4i64: 833; SSE2: # BB#0: # %entry 834; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 835; SSE2-NEXT: movdqa %xmm2, %xmm1 836; SSE2-NEXT: psrad $31, %xmm1 837; SSE2-NEXT: psrad $16, %xmm2 838; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 839; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 840; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 841; SSE2-NEXT: movdqa %xmm1, %xmm0 842; SSE2-NEXT: psrad $31, %xmm0 843; SSE2-NEXT: psrad $16, %xmm1 844; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 845; SSE2-NEXT: movdqa %xmm2, %xmm0 846; SSE2-NEXT: retq 847; 848; SSSE3-LABEL: sext_8i16_to_4i64: 849; SSSE3: # BB#0: # %entry 850; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 851; SSSE3-NEXT: movdqa %xmm2, %xmm1 852; SSSE3-NEXT: psrad $31, %xmm1 853; SSSE3-NEXT: psrad $16, %xmm2 854; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 855; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 856; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 857; SSSE3-NEXT: movdqa %xmm1, %xmm0 858; SSSE3-NEXT: psrad $31, %xmm0 859; SSSE3-NEXT: psrad $16, %xmm1 860; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 861; SSSE3-NEXT: movdqa %xmm2, %xmm0 862; SSSE3-NEXT: retq 863; 864; SSE41-LABEL: sext_8i16_to_4i64: 865; SSE41: # BB#0: # %entry 866; SSE41-NEXT: pmovsxwq %xmm0, %xmm2 867; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 868; SSE41-NEXT: pmovsxwq %xmm0, %xmm1 869; SSE41-NEXT: movdqa %xmm2, %xmm0 870; SSE41-NEXT: retq 871; 872; AVX1-LABEL: sext_8i16_to_4i64: 873; AVX1: # BB#0: # %entry 874; AVX1-NEXT: vpmovsxwq %xmm0, %xmm1 875; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 876; AVX1-NEXT: vpmovsxwq %xmm0, %xmm0 877; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 878; AVX1-NEXT: retq 879; 880; AVX2-LABEL: sext_8i16_to_4i64: 881; AVX2: # BB#0: # %entry 882; AVX2-NEXT: vpmovsxwq %xmm0, %ymm0 883; AVX2-NEXT: retq 884; 885; AVX512-LABEL: sext_8i16_to_4i64: 886; AVX512: # BB#0: # %entry 887; AVX512-NEXT: vpmovsxwq %xmm0, %ymm0 888; AVX512-NEXT: retq 889; 890; X32-SSE41-LABEL: sext_8i16_to_4i64: 891; X32-SSE41: # BB#0: # %entry 892; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm2 893; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 894; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm1 895; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 896; X32-SSE41-NEXT: retl 897entry: 898 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 899 %C = sext <4 x i16> %B to <4 x i64> 900 ret <4 x i64> %C 901} 902 903define <8 x i64> @sext_8i16_to_8i64(<8 x i16> %A) nounwind uwtable readnone ssp { 904; SSE2-LABEL: sext_8i16_to_8i64: 905; SSE2: # BB#0: # %entry 906; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] 907; SSE2-NEXT: movdqa %xmm4, %xmm1 908; SSE2-NEXT: psrad $31, %xmm1 909; SSE2-NEXT: psrad $16, %xmm4 910; SSE2-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] 911; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 912; SSE2-NEXT: movdqa %xmm2, %xmm1 913; SSE2-NEXT: psrad $31, %xmm1 914; SSE2-NEXT: psrad $16, %xmm2 915; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 916; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 917; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 918; SSE2-NEXT: movdqa %xmm1, %xmm3 919; SSE2-NEXT: psrad $31, %xmm3 920; SSE2-NEXT: psrad $16, %xmm1 921; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 922; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 923; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3] 924; SSE2-NEXT: movdqa %xmm3, %xmm0 925; SSE2-NEXT: psrad $31, %xmm0 926; SSE2-NEXT: psrad $16, %xmm3 927; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 928; SSE2-NEXT: movdqa %xmm4, %xmm0 929; SSE2-NEXT: retq 930; 931; SSSE3-LABEL: sext_8i16_to_8i64: 932; SSSE3: # BB#0: # %entry 933; SSSE3-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm0[0],xmm4[1],xmm0[1],xmm4[2],xmm0[2],xmm4[3],xmm0[3] 934; SSSE3-NEXT: movdqa %xmm4, %xmm1 935; SSSE3-NEXT: psrad $31, %xmm1 936; SSSE3-NEXT: psrad $16, %xmm4 937; SSSE3-NEXT: punpckldq {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1] 938; SSSE3-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 939; SSSE3-NEXT: movdqa %xmm2, %xmm1 940; SSSE3-NEXT: psrad $31, %xmm1 941; SSSE3-NEXT: psrad $16, %xmm2 942; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 943; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 944; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 945; SSSE3-NEXT: movdqa %xmm1, %xmm3 946; SSSE3-NEXT: psrad $31, %xmm3 947; SSSE3-NEXT: psrad $16, %xmm1 948; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 949; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 950; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3] 951; SSSE3-NEXT: movdqa %xmm3, %xmm0 952; SSSE3-NEXT: psrad $31, %xmm0 953; SSSE3-NEXT: psrad $16, %xmm3 954; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1] 955; SSSE3-NEXT: movdqa %xmm4, %xmm0 956; SSSE3-NEXT: retq 957; 958; SSE41-LABEL: sext_8i16_to_8i64: 959; SSE41: # BB#0: # %entry 960; SSE41-NEXT: pmovsxwq %xmm0, %xmm4 961; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 962; SSE41-NEXT: pmovsxwq %xmm1, %xmm1 963; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 964; SSE41-NEXT: pmovsxwq %xmm2, %xmm2 965; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 966; SSE41-NEXT: pmovsxwq %xmm0, %xmm3 967; SSE41-NEXT: movdqa %xmm4, %xmm0 968; SSE41-NEXT: retq 969; 970; AVX1-LABEL: sext_8i16_to_8i64: 971; AVX1: # BB#0: # %entry 972; AVX1-NEXT: vpmovsxwq %xmm0, %xmm1 973; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3] 974; AVX1-NEXT: vpmovsxwq %xmm2, %xmm2 975; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 976; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 977; AVX1-NEXT: vpmovsxwq %xmm1, %xmm1 978; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 979; AVX1-NEXT: vpmovsxwq %xmm0, %xmm0 980; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 981; AVX1-NEXT: vmovaps %ymm2, %ymm0 982; AVX1-NEXT: retq 983; 984; AVX2-LABEL: sext_8i16_to_8i64: 985; AVX2: # BB#0: # %entry 986; AVX2-NEXT: vpmovsxwq %xmm0, %ymm2 987; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 988; AVX2-NEXT: vpmovsxwq %xmm0, %ymm1 989; AVX2-NEXT: vmovdqa %ymm2, %ymm0 990; AVX2-NEXT: retq 991; 992; AVX512-LABEL: sext_8i16_to_8i64: 993; AVX512: # BB#0: # %entry 994; AVX512-NEXT: vpmovsxwq %xmm0, %zmm0 995; AVX512-NEXT: retq 996; 997; X32-SSE41-LABEL: sext_8i16_to_8i64: 998; X32-SSE41: # BB#0: # %entry 999; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm4 1000; X32-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1001; X32-SSE41-NEXT: pmovsxwq %xmm1, %xmm1 1002; X32-SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 1003; X32-SSE41-NEXT: pmovsxwq %xmm2, %xmm2 1004; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 1005; X32-SSE41-NEXT: pmovsxwq %xmm0, %xmm3 1006; X32-SSE41-NEXT: movdqa %xmm4, %xmm0 1007; X32-SSE41-NEXT: retl 1008entry: 1009 %B = sext <8 x i16> %A to <8 x i64> 1010 ret <8 x i64> %B 1011} 1012 1013define <2 x i64> @sext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp { 1014; SSE2-LABEL: sext_4i32_to_2i64: 1015; SSE2: # BB#0: # %entry 1016; SSE2-NEXT: movdqa %xmm0, %xmm1 1017; SSE2-NEXT: psrad $31, %xmm1 1018; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1019; SSE2-NEXT: retq 1020; 1021; SSSE3-LABEL: sext_4i32_to_2i64: 1022; SSSE3: # BB#0: # %entry 1023; SSSE3-NEXT: movdqa %xmm0, %xmm1 1024; SSSE3-NEXT: psrad $31, %xmm1 1025; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1026; SSSE3-NEXT: retq 1027; 1028; SSE41-LABEL: sext_4i32_to_2i64: 1029; SSE41: # BB#0: # %entry 1030; SSE41-NEXT: pmovsxdq %xmm0, %xmm0 1031; SSE41-NEXT: retq 1032; 1033; AVX-LABEL: sext_4i32_to_2i64: 1034; AVX: # BB#0: # %entry 1035; AVX-NEXT: vpmovsxdq %xmm0, %xmm0 1036; AVX-NEXT: retq 1037; 1038; X32-SSE41-LABEL: sext_4i32_to_2i64: 1039; X32-SSE41: # BB#0: # %entry 1040; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm0 1041; X32-SSE41-NEXT: retl 1042entry: 1043 %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 1044 %C = sext <2 x i32> %B to <2 x i64> 1045 ret <2 x i64> %C 1046} 1047 1048define <4 x i64> @sext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { 1049; SSE2-LABEL: sext_4i32_to_4i64: 1050; SSE2: # BB#0: # %entry 1051; SSE2-NEXT: movdqa %xmm0, %xmm2 1052; SSE2-NEXT: psrad $31, %xmm2 1053; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1054; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1055; SSE2-NEXT: movdqa %xmm1, %xmm2 1056; SSE2-NEXT: psrad $31, %xmm2 1057; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1058; SSE2-NEXT: retq 1059; 1060; SSSE3-LABEL: sext_4i32_to_4i64: 1061; SSSE3: # BB#0: # %entry 1062; SSSE3-NEXT: movdqa %xmm0, %xmm2 1063; SSSE3-NEXT: psrad $31, %xmm2 1064; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1065; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1066; SSSE3-NEXT: movdqa %xmm1, %xmm2 1067; SSSE3-NEXT: psrad $31, %xmm2 1068; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1069; SSSE3-NEXT: retq 1070; 1071; SSE41-LABEL: sext_4i32_to_4i64: 1072; SSE41: # BB#0: # %entry 1073; SSE41-NEXT: pmovsxdq %xmm0, %xmm2 1074; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1075; SSE41-NEXT: pmovsxdq %xmm0, %xmm1 1076; SSE41-NEXT: movdqa %xmm2, %xmm0 1077; SSE41-NEXT: retq 1078; 1079; AVX1-LABEL: sext_4i32_to_4i64: 1080; AVX1: # BB#0: # %entry 1081; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1082; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1083; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1084; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1085; AVX1-NEXT: retq 1086; 1087; AVX2-LABEL: sext_4i32_to_4i64: 1088; AVX2: # BB#0: # %entry 1089; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 1090; AVX2-NEXT: retq 1091; 1092; AVX512-LABEL: sext_4i32_to_4i64: 1093; AVX512: # BB#0: # %entry 1094; AVX512-NEXT: vpmovsxdq %xmm0, %ymm0 1095; AVX512-NEXT: retq 1096; 1097; X32-SSE41-LABEL: sext_4i32_to_4i64: 1098; X32-SSE41: # BB#0: # %entry 1099; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm2 1100; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1101; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm1 1102; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 1103; X32-SSE41-NEXT: retl 1104entry: 1105 %B = sext <4 x i32> %A to <4 x i64> 1106 ret <4 x i64> %B 1107} 1108 1109define <8 x i64> @sext_8i32_to_8i64(<8 x i32> %A) nounwind uwtable readnone ssp { 1110; SSE2-LABEL: sext_8i32_to_8i64: 1111; SSE2: # BB#0: # %entry 1112; SSE2-NEXT: movdqa %xmm1, %xmm2 1113; SSE2-NEXT: movdqa %xmm0, %xmm3 1114; SSE2-NEXT: psrad $31, %xmm3 1115; SSE2-NEXT: movdqa %xmm2, %xmm4 1116; SSE2-NEXT: psrad $31, %xmm4 1117; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1118; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 1119; SSE2-NEXT: movdqa %xmm1, %xmm3 1120; SSE2-NEXT: psrad $31, %xmm3 1121; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 1122; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1] 1123; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 1124; SSE2-NEXT: movdqa %xmm3, %xmm4 1125; SSE2-NEXT: psrad $31, %xmm4 1126; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 1127; SSE2-NEXT: retq 1128; 1129; SSSE3-LABEL: sext_8i32_to_8i64: 1130; SSSE3: # BB#0: # %entry 1131; SSSE3-NEXT: movdqa %xmm1, %xmm2 1132; SSSE3-NEXT: movdqa %xmm0, %xmm3 1133; SSSE3-NEXT: psrad $31, %xmm3 1134; SSSE3-NEXT: movdqa %xmm2, %xmm4 1135; SSSE3-NEXT: psrad $31, %xmm4 1136; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 1137; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 1138; SSSE3-NEXT: movdqa %xmm1, %xmm3 1139; SSSE3-NEXT: psrad $31, %xmm3 1140; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1] 1141; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm2[2,3,0,1] 1142; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 1143; SSSE3-NEXT: movdqa %xmm3, %xmm4 1144; SSSE3-NEXT: psrad $31, %xmm4 1145; SSSE3-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 1146; SSSE3-NEXT: retq 1147; 1148; SSE41-LABEL: sext_8i32_to_8i64: 1149; SSE41: # BB#0: # %entry 1150; SSE41-NEXT: pmovsxdq %xmm0, %xmm5 1151; SSE41-NEXT: pmovsxdq %xmm1, %xmm2 1152; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1153; SSE41-NEXT: pmovsxdq %xmm0, %xmm4 1154; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 1155; SSE41-NEXT: pmovsxdq %xmm0, %xmm3 1156; SSE41-NEXT: movdqa %xmm5, %xmm0 1157; SSE41-NEXT: movdqa %xmm4, %xmm1 1158; SSE41-NEXT: retq 1159; 1160; AVX1-LABEL: sext_8i32_to_8i64: 1161; AVX1: # BB#0: # %entry 1162; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1163; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 1164; AVX1-NEXT: vpmovsxdq %xmm2, %xmm2 1165; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 1166; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1167; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1168; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1169; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1170; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 1171; AVX1-NEXT: vmovaps %ymm2, %ymm0 1172; AVX1-NEXT: retq 1173; 1174; AVX2-LABEL: sext_8i32_to_8i64: 1175; AVX2: # BB#0: # %entry 1176; AVX2-NEXT: vpmovsxdq %xmm0, %ymm2 1177; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1178; AVX2-NEXT: vpmovsxdq %xmm0, %ymm1 1179; AVX2-NEXT: vmovdqa %ymm2, %ymm0 1180; AVX2-NEXT: retq 1181; 1182; AVX512-LABEL: sext_8i32_to_8i64: 1183; AVX512: # BB#0: # %entry 1184; AVX512-NEXT: vpmovsxdq %ymm0, %zmm0 1185; AVX512-NEXT: retq 1186; 1187; X32-SSE41-LABEL: sext_8i32_to_8i64: 1188; X32-SSE41: # BB#0: # %entry 1189; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm5 1190; X32-SSE41-NEXT: pmovsxdq %xmm1, %xmm2 1191; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1192; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm4 1193; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,3,0,1] 1194; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm3 1195; X32-SSE41-NEXT: movdqa %xmm5, %xmm0 1196; X32-SSE41-NEXT: movdqa %xmm4, %xmm1 1197; X32-SSE41-NEXT: retl 1198entry: 1199 %B = sext <8 x i32> %A to <8 x i64> 1200 ret <8 x i64> %B 1201} 1202 1203define <2 x i64> @load_sext_2i1_to_2i64(<2 x i1> *%ptr) { 1204; SSE-LABEL: load_sext_2i1_to_2i64: 1205; SSE: # BB#0: # %entry 1206; SSE-NEXT: movzbl (%rdi), %eax 1207; SSE-NEXT: movq %rax, %rcx 1208; SSE-NEXT: shlq $62, %rcx 1209; SSE-NEXT: sarq $63, %rcx 1210; SSE-NEXT: movd %rcx, %xmm1 1211; SSE-NEXT: shlq $63, %rax 1212; SSE-NEXT: sarq $63, %rax 1213; SSE-NEXT: movd %rax, %xmm0 1214; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1215; SSE-NEXT: retq 1216; 1217; AVX1-LABEL: load_sext_2i1_to_2i64: 1218; AVX1: # BB#0: # %entry 1219; AVX1-NEXT: movzbl (%rdi), %eax 1220; AVX1-NEXT: movq %rax, %rcx 1221; AVX1-NEXT: shlq $62, %rcx 1222; AVX1-NEXT: sarq $63, %rcx 1223; AVX1-NEXT: vmovq %rcx, %xmm0 1224; AVX1-NEXT: shlq $63, %rax 1225; AVX1-NEXT: sarq $63, %rax 1226; AVX1-NEXT: vmovq %rax, %xmm1 1227; AVX1-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1228; AVX1-NEXT: retq 1229; 1230; AVX2-LABEL: load_sext_2i1_to_2i64: 1231; AVX2: # BB#0: # %entry 1232; AVX2-NEXT: movzbl (%rdi), %eax 1233; AVX2-NEXT: movq %rax, %rcx 1234; AVX2-NEXT: shlq $62, %rcx 1235; AVX2-NEXT: sarq $63, %rcx 1236; AVX2-NEXT: vmovq %rcx, %xmm0 1237; AVX2-NEXT: shlq $63, %rax 1238; AVX2-NEXT: sarq $63, %rax 1239; AVX2-NEXT: vmovq %rax, %xmm1 1240; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1241; AVX2-NEXT: retq 1242; 1243; AVX512F-LABEL: load_sext_2i1_to_2i64: 1244; AVX512F: # BB#0: # %entry 1245; AVX512F-NEXT: movzbl (%rdi), %eax 1246; AVX512F-NEXT: kmovw %eax, %k1 1247; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1248; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 1249; AVX512F-NEXT: retq 1250; 1251; AVX512BW-LABEL: load_sext_2i1_to_2i64: 1252; AVX512BW: # BB#0: # %entry 1253; AVX512BW-NEXT: movzbl (%rdi), %eax 1254; AVX512BW-NEXT: kmovd %eax, %k1 1255; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1256; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<kill> 1257; AVX512BW-NEXT: retq 1258; 1259; X32-SSE41-LABEL: load_sext_2i1_to_2i64: 1260; X32-SSE41: # BB#0: # %entry 1261; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1262; X32-SSE41-NEXT: movzbl (%eax), %eax 1263; X32-SSE41-NEXT: movl %eax, %ecx 1264; X32-SSE41-NEXT: shll $31, %ecx 1265; X32-SSE41-NEXT: sarl $31, %ecx 1266; X32-SSE41-NEXT: movd %ecx, %xmm0 1267; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm0 1268; X32-SSE41-NEXT: shll $30, %eax 1269; X32-SSE41-NEXT: sarl $31, %eax 1270; X32-SSE41-NEXT: pinsrd $2, %eax, %xmm0 1271; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0 1272; X32-SSE41-NEXT: retl 1273entry: 1274 %X = load <2 x i1>, <2 x i1>* %ptr 1275 %Y = sext <2 x i1> %X to <2 x i64> 1276 ret <2 x i64> %Y 1277} 1278 1279define <2 x i64> @load_sext_2i8_to_2i64(<2 x i8> *%ptr) { 1280; SSE2-LABEL: load_sext_2i8_to_2i64: 1281; SSE2: # BB#0: # %entry 1282; SSE2-NEXT: movzwl (%rdi), %eax 1283; SSE2-NEXT: movd %eax, %xmm0 1284; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1285; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1286; SSE2-NEXT: movdqa %xmm0, %xmm1 1287; SSE2-NEXT: psrad $31, %xmm1 1288; SSE2-NEXT: psrad $24, %xmm0 1289; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1290; SSE2-NEXT: retq 1291; 1292; SSSE3-LABEL: load_sext_2i8_to_2i64: 1293; SSSE3: # BB#0: # %entry 1294; SSSE3-NEXT: movzwl (%rdi), %eax 1295; SSSE3-NEXT: movd %eax, %xmm0 1296; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1297; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1298; SSSE3-NEXT: movdqa %xmm0, %xmm1 1299; SSSE3-NEXT: psrad $31, %xmm1 1300; SSSE3-NEXT: psrad $24, %xmm0 1301; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1302; SSSE3-NEXT: retq 1303; 1304; SSE41-LABEL: load_sext_2i8_to_2i64: 1305; SSE41: # BB#0: # %entry 1306; SSE41-NEXT: pmovsxbq (%rdi), %xmm0 1307; SSE41-NEXT: retq 1308; 1309; AVX-LABEL: load_sext_2i8_to_2i64: 1310; AVX: # BB#0: # %entry 1311; AVX-NEXT: vpmovsxbq (%rdi), %xmm0 1312; AVX-NEXT: retq 1313; 1314; X32-SSE41-LABEL: load_sext_2i8_to_2i64: 1315; X32-SSE41: # BB#0: # %entry 1316; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1317; X32-SSE41-NEXT: pmovsxbq (%eax), %xmm0 1318; X32-SSE41-NEXT: retl 1319entry: 1320 %X = load <2 x i8>, <2 x i8>* %ptr 1321 %Y = sext <2 x i8> %X to <2 x i64> 1322 ret <2 x i64> %Y 1323} 1324 1325define <4 x i32> @load_sext_4i1_to_4i32(<4 x i1> *%ptr) { 1326; SSE2-LABEL: load_sext_4i1_to_4i32: 1327; SSE2: # BB#0: # %entry 1328; SSE2-NEXT: movzbl (%rdi), %eax 1329; SSE2-NEXT: movq %rax, %rcx 1330; SSE2-NEXT: shlq $60, %rcx 1331; SSE2-NEXT: sarq $63, %rcx 1332; SSE2-NEXT: movd %ecx, %xmm0 1333; SSE2-NEXT: movq %rax, %rcx 1334; SSE2-NEXT: shlq $62, %rcx 1335; SSE2-NEXT: sarq $63, %rcx 1336; SSE2-NEXT: movd %ecx, %xmm1 1337; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1338; SSE2-NEXT: movq %rax, %rcx 1339; SSE2-NEXT: shlq $61, %rcx 1340; SSE2-NEXT: sarq $63, %rcx 1341; SSE2-NEXT: movd %ecx, %xmm2 1342; SSE2-NEXT: shlq $63, %rax 1343; SSE2-NEXT: sarq $63, %rax 1344; SSE2-NEXT: movd %eax, %xmm0 1345; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1346; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1347; SSE2-NEXT: retq 1348; 1349; SSSE3-LABEL: load_sext_4i1_to_4i32: 1350; SSSE3: # BB#0: # %entry 1351; SSSE3-NEXT: movzbl (%rdi), %eax 1352; SSSE3-NEXT: movq %rax, %rcx 1353; SSSE3-NEXT: shlq $60, %rcx 1354; SSSE3-NEXT: sarq $63, %rcx 1355; SSSE3-NEXT: movd %ecx, %xmm0 1356; SSSE3-NEXT: movq %rax, %rcx 1357; SSSE3-NEXT: shlq $62, %rcx 1358; SSSE3-NEXT: sarq $63, %rcx 1359; SSSE3-NEXT: movd %ecx, %xmm1 1360; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1361; SSSE3-NEXT: movq %rax, %rcx 1362; SSSE3-NEXT: shlq $61, %rcx 1363; SSSE3-NEXT: sarq $63, %rcx 1364; SSSE3-NEXT: movd %ecx, %xmm2 1365; SSSE3-NEXT: shlq $63, %rax 1366; SSSE3-NEXT: sarq $63, %rax 1367; SSSE3-NEXT: movd %eax, %xmm0 1368; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1369; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1370; SSSE3-NEXT: retq 1371; 1372; SSE41-LABEL: load_sext_4i1_to_4i32: 1373; SSE41: # BB#0: # %entry 1374; SSE41-NEXT: movzbl (%rdi), %eax 1375; SSE41-NEXT: movq %rax, %rcx 1376; SSE41-NEXT: shlq $62, %rcx 1377; SSE41-NEXT: sarq $63, %rcx 1378; SSE41-NEXT: movq %rax, %rdx 1379; SSE41-NEXT: shlq $63, %rdx 1380; SSE41-NEXT: sarq $63, %rdx 1381; SSE41-NEXT: movd %edx, %xmm0 1382; SSE41-NEXT: pinsrd $1, %ecx, %xmm0 1383; SSE41-NEXT: movq %rax, %rcx 1384; SSE41-NEXT: shlq $61, %rcx 1385; SSE41-NEXT: sarq $63, %rcx 1386; SSE41-NEXT: pinsrd $2, %ecx, %xmm0 1387; SSE41-NEXT: shlq $60, %rax 1388; SSE41-NEXT: sarq $63, %rax 1389; SSE41-NEXT: pinsrd $3, %eax, %xmm0 1390; SSE41-NEXT: retq 1391; 1392; AVX1-LABEL: load_sext_4i1_to_4i32: 1393; AVX1: # BB#0: # %entry 1394; AVX1-NEXT: movzbl (%rdi), %eax 1395; AVX1-NEXT: movq %rax, %rcx 1396; AVX1-NEXT: shlq $62, %rcx 1397; AVX1-NEXT: sarq $63, %rcx 1398; AVX1-NEXT: movq %rax, %rdx 1399; AVX1-NEXT: shlq $63, %rdx 1400; AVX1-NEXT: sarq $63, %rdx 1401; AVX1-NEXT: vmovd %edx, %xmm0 1402; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 1403; AVX1-NEXT: movq %rax, %rcx 1404; AVX1-NEXT: shlq $61, %rcx 1405; AVX1-NEXT: sarq $63, %rcx 1406; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 1407; AVX1-NEXT: shlq $60, %rax 1408; AVX1-NEXT: sarq $63, %rax 1409; AVX1-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 1410; AVX1-NEXT: retq 1411; 1412; AVX2-LABEL: load_sext_4i1_to_4i32: 1413; AVX2: # BB#0: # %entry 1414; AVX2-NEXT: movzbl (%rdi), %eax 1415; AVX2-NEXT: movq %rax, %rcx 1416; AVX2-NEXT: shlq $62, %rcx 1417; AVX2-NEXT: sarq $63, %rcx 1418; AVX2-NEXT: movq %rax, %rdx 1419; AVX2-NEXT: shlq $63, %rdx 1420; AVX2-NEXT: sarq $63, %rdx 1421; AVX2-NEXT: vmovd %edx, %xmm0 1422; AVX2-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 1423; AVX2-NEXT: movq %rax, %rcx 1424; AVX2-NEXT: shlq $61, %rcx 1425; AVX2-NEXT: sarq $63, %rcx 1426; AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 1427; AVX2-NEXT: shlq $60, %rax 1428; AVX2-NEXT: sarq $63, %rax 1429; AVX2-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 1430; AVX2-NEXT: retq 1431; 1432; AVX512F-LABEL: load_sext_4i1_to_4i32: 1433; AVX512F: # BB#0: # %entry 1434; AVX512F-NEXT: movzbl (%rdi), %eax 1435; AVX512F-NEXT: kmovw %eax, %k1 1436; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1437; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 1438; AVX512F-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 1439; AVX512F-NEXT: retq 1440; 1441; AVX512BW-LABEL: load_sext_4i1_to_4i32: 1442; AVX512BW: # BB#0: # %entry 1443; AVX512BW-NEXT: movzbl (%rdi), %eax 1444; AVX512BW-NEXT: kmovd %eax, %k1 1445; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1446; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 1447; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %YMM0<kill> 1448; AVX512BW-NEXT: retq 1449; 1450; X32-SSE41-LABEL: load_sext_4i1_to_4i32: 1451; X32-SSE41: # BB#0: # %entry 1452; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1453; X32-SSE41-NEXT: movl (%eax), %eax 1454; X32-SSE41-NEXT: movl %eax, %ecx 1455; X32-SSE41-NEXT: shll $30, %ecx 1456; X32-SSE41-NEXT: sarl $31, %ecx 1457; X32-SSE41-NEXT: movl %eax, %edx 1458; X32-SSE41-NEXT: shll $31, %edx 1459; X32-SSE41-NEXT: sarl $31, %edx 1460; X32-SSE41-NEXT: movd %edx, %xmm0 1461; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm0 1462; X32-SSE41-NEXT: movl %eax, %ecx 1463; X32-SSE41-NEXT: shll $29, %ecx 1464; X32-SSE41-NEXT: sarl $31, %ecx 1465; X32-SSE41-NEXT: pinsrd $2, %ecx, %xmm0 1466; X32-SSE41-NEXT: shll $28, %eax 1467; X32-SSE41-NEXT: sarl $31, %eax 1468; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm0 1469; X32-SSE41-NEXT: retl 1470entry: 1471 %X = load <4 x i1>, <4 x i1>* %ptr 1472 %Y = sext <4 x i1> %X to <4 x i32> 1473 ret <4 x i32> %Y 1474} 1475 1476define <4 x i32> @load_sext_4i8_to_4i32(<4 x i8> *%ptr) { 1477; SSE2-LABEL: load_sext_4i8_to_4i32: 1478; SSE2: # BB#0: # %entry 1479; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1480; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1481; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1482; SSE2-NEXT: psrad $24, %xmm0 1483; SSE2-NEXT: retq 1484; 1485; SSSE3-LABEL: load_sext_4i8_to_4i32: 1486; SSSE3: # BB#0: # %entry 1487; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1488; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 1489; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 1490; SSSE3-NEXT: psrad $24, %xmm0 1491; SSSE3-NEXT: retq 1492; 1493; SSE41-LABEL: load_sext_4i8_to_4i32: 1494; SSE41: # BB#0: # %entry 1495; SSE41-NEXT: pmovsxbd (%rdi), %xmm0 1496; SSE41-NEXT: retq 1497; 1498; AVX-LABEL: load_sext_4i8_to_4i32: 1499; AVX: # BB#0: # %entry 1500; AVX-NEXT: vpmovsxbd (%rdi), %xmm0 1501; AVX-NEXT: retq 1502; 1503; X32-SSE41-LABEL: load_sext_4i8_to_4i32: 1504; X32-SSE41: # BB#0: # %entry 1505; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1506; X32-SSE41-NEXT: pmovsxbd (%eax), %xmm0 1507; X32-SSE41-NEXT: retl 1508entry: 1509 %X = load <4 x i8>, <4 x i8>* %ptr 1510 %Y = sext <4 x i8> %X to <4 x i32> 1511 ret <4 x i32> %Y 1512} 1513 1514define <4 x i64> @load_sext_4i1_to_4i64(<4 x i1> *%ptr) { 1515; SSE2-LABEL: load_sext_4i1_to_4i64: 1516; SSE2: # BB#0: # %entry 1517; SSE2-NEXT: movl (%rdi), %eax 1518; SSE2-NEXT: movl %eax, %ecx 1519; SSE2-NEXT: shrl $3, %ecx 1520; SSE2-NEXT: movd %ecx, %xmm0 1521; SSE2-NEXT: movl %eax, %ecx 1522; SSE2-NEXT: shrl %ecx 1523; SSE2-NEXT: movd %ecx, %xmm1 1524; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1525; SSE2-NEXT: movd %eax, %xmm2 1526; SSE2-NEXT: shrl $2, %eax 1527; SSE2-NEXT: movd %eax, %xmm0 1528; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 1529; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 1530; SSE2-NEXT: pand {{.*}}(%rip), %xmm2 1531; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3] 1532; SSE2-NEXT: psllq $63, %xmm0 1533; SSE2-NEXT: psrad $31, %xmm0 1534; SSE2-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1535; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,1,3,3] 1536; SSE2-NEXT: psllq $63, %xmm1 1537; SSE2-NEXT: psrad $31, %xmm1 1538; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1539; SSE2-NEXT: retq 1540; 1541; SSSE3-LABEL: load_sext_4i1_to_4i64: 1542; SSSE3: # BB#0: # %entry 1543; SSSE3-NEXT: movl (%rdi), %eax 1544; SSSE3-NEXT: movl %eax, %ecx 1545; SSSE3-NEXT: shrl $3, %ecx 1546; SSSE3-NEXT: movd %ecx, %xmm0 1547; SSSE3-NEXT: movl %eax, %ecx 1548; SSSE3-NEXT: shrl %ecx 1549; SSSE3-NEXT: movd %ecx, %xmm1 1550; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1] 1551; SSSE3-NEXT: movd %eax, %xmm2 1552; SSSE3-NEXT: shrl $2, %eax 1553; SSSE3-NEXT: movd %eax, %xmm0 1554; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1] 1555; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1] 1556; SSSE3-NEXT: pand {{.*}}(%rip), %xmm2 1557; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm2[0,1,1,3] 1558; SSSE3-NEXT: psllq $63, %xmm0 1559; SSSE3-NEXT: psrad $31, %xmm0 1560; SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1561; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm2[2,1,3,3] 1562; SSSE3-NEXT: psllq $63, %xmm1 1563; SSSE3-NEXT: psrad $31, %xmm1 1564; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1565; SSSE3-NEXT: retq 1566; 1567; SSE41-LABEL: load_sext_4i1_to_4i64: 1568; SSE41: # BB#0: # %entry 1569; SSE41-NEXT: movl (%rdi), %eax 1570; SSE41-NEXT: movl %eax, %ecx 1571; SSE41-NEXT: shrl %ecx 1572; SSE41-NEXT: movd %eax, %xmm1 1573; SSE41-NEXT: pinsrd $1, %ecx, %xmm1 1574; SSE41-NEXT: movl %eax, %ecx 1575; SSE41-NEXT: shrl $2, %ecx 1576; SSE41-NEXT: pinsrd $2, %ecx, %xmm1 1577; SSE41-NEXT: shrl $3, %eax 1578; SSE41-NEXT: pinsrd $3, %eax, %xmm1 1579; SSE41-NEXT: pand {{.*}}(%rip), %xmm1 1580; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero 1581; SSE41-NEXT: psllq $63, %xmm0 1582; SSE41-NEXT: psrad $31, %xmm0 1583; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1584; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1585; SSE41-NEXT: psllq $63, %xmm1 1586; SSE41-NEXT: psrad $31, %xmm1 1587; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1588; SSE41-NEXT: retq 1589; 1590; AVX1-LABEL: load_sext_4i1_to_4i64: 1591; AVX1: # BB#0: # %entry 1592; AVX1-NEXT: movzbl (%rdi), %eax 1593; AVX1-NEXT: movq %rax, %rcx 1594; AVX1-NEXT: shlq $62, %rcx 1595; AVX1-NEXT: sarq $63, %rcx 1596; AVX1-NEXT: movq %rax, %rdx 1597; AVX1-NEXT: shlq $63, %rdx 1598; AVX1-NEXT: sarq $63, %rdx 1599; AVX1-NEXT: vmovd %edx, %xmm0 1600; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 1601; AVX1-NEXT: movq %rax, %rcx 1602; AVX1-NEXT: shlq $61, %rcx 1603; AVX1-NEXT: sarq $63, %rcx 1604; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 1605; AVX1-NEXT: shlq $60, %rax 1606; AVX1-NEXT: sarq $63, %rax 1607; AVX1-NEXT: vpinsrd $3, %eax, %xmm0, %xmm0 1608; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1609; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1610; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1611; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1612; AVX1-NEXT: retq 1613; 1614; AVX2-LABEL: load_sext_4i1_to_4i64: 1615; AVX2: # BB#0: # %entry 1616; AVX2-NEXT: movzbl (%rdi), %eax 1617; AVX2-NEXT: movq %rax, %rcx 1618; AVX2-NEXT: shlq $60, %rcx 1619; AVX2-NEXT: sarq $63, %rcx 1620; AVX2-NEXT: vmovq %rcx, %xmm0 1621; AVX2-NEXT: movq %rax, %rcx 1622; AVX2-NEXT: shlq $61, %rcx 1623; AVX2-NEXT: sarq $63, %rcx 1624; AVX2-NEXT: vmovq %rcx, %xmm1 1625; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm0 = xmm1[0],xmm0[0] 1626; AVX2-NEXT: movq %rax, %rcx 1627; AVX2-NEXT: shlq $62, %rcx 1628; AVX2-NEXT: sarq $63, %rcx 1629; AVX2-NEXT: vmovq %rcx, %xmm1 1630; AVX2-NEXT: shlq $63, %rax 1631; AVX2-NEXT: sarq $63, %rax 1632; AVX2-NEXT: vmovq %rax, %xmm2 1633; AVX2-NEXT: vpunpcklqdq {{.*#+}} xmm1 = xmm2[0],xmm1[0] 1634; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1635; AVX2-NEXT: retq 1636; 1637; AVX512F-LABEL: load_sext_4i1_to_4i64: 1638; AVX512F: # BB#0: # %entry 1639; AVX512F-NEXT: movzbl (%rdi), %eax 1640; AVX512F-NEXT: kmovw %eax, %k1 1641; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1642; AVX512F-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 1643; AVX512F-NEXT: retq 1644; 1645; AVX512BW-LABEL: load_sext_4i1_to_4i64: 1646; AVX512BW: # BB#0: # %entry 1647; AVX512BW-NEXT: movzbl (%rdi), %eax 1648; AVX512BW-NEXT: kmovd %eax, %k1 1649; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1650; AVX512BW-NEXT: # kill: %YMM0<def> %YMM0<kill> %ZMM0<kill> 1651; AVX512BW-NEXT: retq 1652; 1653; X32-SSE41-LABEL: load_sext_4i1_to_4i64: 1654; X32-SSE41: # BB#0: # %entry 1655; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1656; X32-SSE41-NEXT: movzbl (%eax), %eax 1657; X32-SSE41-NEXT: movl %eax, %ecx 1658; X32-SSE41-NEXT: shrl %ecx 1659; X32-SSE41-NEXT: movd %eax, %xmm1 1660; X32-SSE41-NEXT: pinsrd $1, %ecx, %xmm1 1661; X32-SSE41-NEXT: movl %eax, %ecx 1662; X32-SSE41-NEXT: shrl $2, %ecx 1663; X32-SSE41-NEXT: pinsrd $2, %ecx, %xmm1 1664; X32-SSE41-NEXT: shrl $3, %eax 1665; X32-SSE41-NEXT: pinsrd $3, %eax, %xmm1 1666; X32-SSE41-NEXT: pand {{\.LCPI.*}}, %xmm1 1667; X32-SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero 1668; X32-SSE41-NEXT: psllq $63, %xmm0 1669; X32-SSE41-NEXT: psrad $31, %xmm0 1670; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,3,3] 1671; X32-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[2,2,3,3] 1672; X32-SSE41-NEXT: psllq $63, %xmm1 1673; X32-SSE41-NEXT: psrad $31, %xmm1 1674; X32-SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,3,3] 1675; X32-SSE41-NEXT: retl 1676entry: 1677 %X = load <4 x i1>, <4 x i1>* %ptr 1678 %Y = sext <4 x i1> %X to <4 x i64> 1679 ret <4 x i64> %Y 1680} 1681 1682define <4 x i64> @load_sext_4i8_to_4i64(<4 x i8> *%ptr) { 1683; SSE2-LABEL: load_sext_4i8_to_4i64: 1684; SSE2: # BB#0: # %entry 1685; SSE2-NEXT: movsbq 1(%rdi), %rax 1686; SSE2-NEXT: movd %rax, %xmm1 1687; SSE2-NEXT: movsbq (%rdi), %rax 1688; SSE2-NEXT: movd %rax, %xmm0 1689; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1690; SSE2-NEXT: movsbq 3(%rdi), %rax 1691; SSE2-NEXT: movd %rax, %xmm2 1692; SSE2-NEXT: movsbq 2(%rdi), %rax 1693; SSE2-NEXT: movd %rax, %xmm1 1694; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1695; SSE2-NEXT: retq 1696; 1697; SSSE3-LABEL: load_sext_4i8_to_4i64: 1698; SSSE3: # BB#0: # %entry 1699; SSSE3-NEXT: movsbq 1(%rdi), %rax 1700; SSSE3-NEXT: movd %rax, %xmm1 1701; SSSE3-NEXT: movsbq (%rdi), %rax 1702; SSSE3-NEXT: movd %rax, %xmm0 1703; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 1704; SSSE3-NEXT: movsbq 3(%rdi), %rax 1705; SSSE3-NEXT: movd %rax, %xmm2 1706; SSSE3-NEXT: movsbq 2(%rdi), %rax 1707; SSSE3-NEXT: movd %rax, %xmm1 1708; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 1709; SSSE3-NEXT: retq 1710; 1711; SSE41-LABEL: load_sext_4i8_to_4i64: 1712; SSE41: # BB#0: # %entry 1713; SSE41-NEXT: pmovsxbq (%rdi), %xmm0 1714; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm1 1715; SSE41-NEXT: retq 1716; 1717; AVX1-LABEL: load_sext_4i8_to_4i64: 1718; AVX1: # BB#0: # %entry 1719; AVX1-NEXT: vpmovsxbd (%rdi), %xmm0 1720; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 1721; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1722; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 1723; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1724; AVX1-NEXT: retq 1725; 1726; AVX2-LABEL: load_sext_4i8_to_4i64: 1727; AVX2: # BB#0: # %entry 1728; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0 1729; AVX2-NEXT: retq 1730; 1731; AVX512-LABEL: load_sext_4i8_to_4i64: 1732; AVX512: # BB#0: # %entry 1733; AVX512-NEXT: vpmovsxbq (%rdi), %ymm0 1734; AVX512-NEXT: retq 1735; 1736; X32-SSE41-LABEL: load_sext_4i8_to_4i64: 1737; X32-SSE41: # BB#0: # %entry 1738; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1739; X32-SSE41-NEXT: pmovsxbq (%eax), %xmm0 1740; X32-SSE41-NEXT: pmovsxbq 2(%eax), %xmm1 1741; X32-SSE41-NEXT: retl 1742entry: 1743 %X = load <4 x i8>, <4 x i8>* %ptr 1744 %Y = sext <4 x i8> %X to <4 x i64> 1745 ret <4 x i64> %Y 1746} 1747 1748define <8 x i16> @load_sext_8i1_to_8i16(<8 x i1> *%ptr) { 1749; SSE2-LABEL: load_sext_8i1_to_8i16: 1750; SSE2: # BB#0: # %entry 1751; SSE2-NEXT: movsbq (%rdi), %rax 1752; SSE2-NEXT: movq %rax, %rcx 1753; SSE2-NEXT: shrq $7, %rcx 1754; SSE2-NEXT: movd %ecx, %xmm0 1755; SSE2-NEXT: movq %rax, %rcx 1756; SSE2-NEXT: shlq $60, %rcx 1757; SSE2-NEXT: sarq $63, %rcx 1758; SSE2-NEXT: movd %ecx, %xmm2 1759; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1760; SSE2-NEXT: movq %rax, %rcx 1761; SSE2-NEXT: shlq $58, %rcx 1762; SSE2-NEXT: sarq $63, %rcx 1763; SSE2-NEXT: movd %ecx, %xmm0 1764; SSE2-NEXT: movq %rax, %rcx 1765; SSE2-NEXT: shlq $62, %rcx 1766; SSE2-NEXT: sarq $63, %rcx 1767; SSE2-NEXT: movd %ecx, %xmm1 1768; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1769; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1770; SSE2-NEXT: movq %rax, %rcx 1771; SSE2-NEXT: shlq $57, %rcx 1772; SSE2-NEXT: sarq $63, %rcx 1773; SSE2-NEXT: movd %ecx, %xmm0 1774; SSE2-NEXT: movq %rax, %rcx 1775; SSE2-NEXT: shlq $61, %rcx 1776; SSE2-NEXT: sarq $63, %rcx 1777; SSE2-NEXT: movd %ecx, %xmm2 1778; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1779; SSE2-NEXT: movq %rax, %rcx 1780; SSE2-NEXT: shlq $59, %rcx 1781; SSE2-NEXT: sarq $63, %rcx 1782; SSE2-NEXT: movd %ecx, %xmm3 1783; SSE2-NEXT: shlq $63, %rax 1784; SSE2-NEXT: sarq $63, %rax 1785; SSE2-NEXT: movd %eax, %xmm0 1786; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 1787; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1788; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1789; SSE2-NEXT: retq 1790; 1791; SSSE3-LABEL: load_sext_8i1_to_8i16: 1792; SSSE3: # BB#0: # %entry 1793; SSSE3-NEXT: movsbq (%rdi), %rax 1794; SSSE3-NEXT: movq %rax, %rcx 1795; SSSE3-NEXT: shrq $7, %rcx 1796; SSSE3-NEXT: movd %ecx, %xmm0 1797; SSSE3-NEXT: movq %rax, %rcx 1798; SSSE3-NEXT: shlq $60, %rcx 1799; SSSE3-NEXT: sarq $63, %rcx 1800; SSSE3-NEXT: movd %ecx, %xmm2 1801; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1802; SSSE3-NEXT: movq %rax, %rcx 1803; SSSE3-NEXT: shlq $58, %rcx 1804; SSSE3-NEXT: sarq $63, %rcx 1805; SSSE3-NEXT: movd %ecx, %xmm0 1806; SSSE3-NEXT: movq %rax, %rcx 1807; SSSE3-NEXT: shlq $62, %rcx 1808; SSSE3-NEXT: sarq $63, %rcx 1809; SSSE3-NEXT: movd %ecx, %xmm1 1810; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 1811; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1812; SSSE3-NEXT: movq %rax, %rcx 1813; SSSE3-NEXT: shlq $57, %rcx 1814; SSSE3-NEXT: sarq $63, %rcx 1815; SSSE3-NEXT: movd %ecx, %xmm0 1816; SSSE3-NEXT: movq %rax, %rcx 1817; SSSE3-NEXT: shlq $61, %rcx 1818; SSSE3-NEXT: sarq $63, %rcx 1819; SSSE3-NEXT: movd %ecx, %xmm2 1820; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 1821; SSSE3-NEXT: movq %rax, %rcx 1822; SSSE3-NEXT: shlq $59, %rcx 1823; SSSE3-NEXT: sarq $63, %rcx 1824; SSSE3-NEXT: movd %ecx, %xmm3 1825; SSSE3-NEXT: shlq $63, %rax 1826; SSSE3-NEXT: sarq $63, %rax 1827; SSSE3-NEXT: movd %eax, %xmm0 1828; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 1829; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1830; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1831; SSSE3-NEXT: retq 1832; 1833; SSE41-LABEL: load_sext_8i1_to_8i16: 1834; SSE41: # BB#0: # %entry 1835; SSE41-NEXT: movsbq (%rdi), %rax 1836; SSE41-NEXT: movq %rax, %rcx 1837; SSE41-NEXT: shlq $62, %rcx 1838; SSE41-NEXT: sarq $63, %rcx 1839; SSE41-NEXT: movq %rax, %rdx 1840; SSE41-NEXT: shlq $63, %rdx 1841; SSE41-NEXT: sarq $63, %rdx 1842; SSE41-NEXT: movd %edx, %xmm0 1843; SSE41-NEXT: pinsrw $1, %ecx, %xmm0 1844; SSE41-NEXT: movq %rax, %rcx 1845; SSE41-NEXT: shlq $61, %rcx 1846; SSE41-NEXT: sarq $63, %rcx 1847; SSE41-NEXT: pinsrw $2, %ecx, %xmm0 1848; SSE41-NEXT: movq %rax, %rcx 1849; SSE41-NEXT: shlq $60, %rcx 1850; SSE41-NEXT: sarq $63, %rcx 1851; SSE41-NEXT: pinsrw $3, %ecx, %xmm0 1852; SSE41-NEXT: movq %rax, %rcx 1853; SSE41-NEXT: shlq $59, %rcx 1854; SSE41-NEXT: sarq $63, %rcx 1855; SSE41-NEXT: pinsrw $4, %ecx, %xmm0 1856; SSE41-NEXT: movq %rax, %rcx 1857; SSE41-NEXT: shlq $58, %rcx 1858; SSE41-NEXT: sarq $63, %rcx 1859; SSE41-NEXT: pinsrw $5, %ecx, %xmm0 1860; SSE41-NEXT: movq %rax, %rcx 1861; SSE41-NEXT: shlq $57, %rcx 1862; SSE41-NEXT: sarq $63, %rcx 1863; SSE41-NEXT: pinsrw $6, %ecx, %xmm0 1864; SSE41-NEXT: shrq $7, %rax 1865; SSE41-NEXT: pinsrw $7, %eax, %xmm0 1866; SSE41-NEXT: retq 1867; 1868; AVX1-LABEL: load_sext_8i1_to_8i16: 1869; AVX1: # BB#0: # %entry 1870; AVX1-NEXT: movsbq (%rdi), %rax 1871; AVX1-NEXT: movq %rax, %rcx 1872; AVX1-NEXT: shlq $62, %rcx 1873; AVX1-NEXT: sarq $63, %rcx 1874; AVX1-NEXT: movq %rax, %rdx 1875; AVX1-NEXT: shlq $63, %rdx 1876; AVX1-NEXT: sarq $63, %rdx 1877; AVX1-NEXT: vmovd %edx, %xmm0 1878; AVX1-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 1879; AVX1-NEXT: movq %rax, %rcx 1880; AVX1-NEXT: shlq $61, %rcx 1881; AVX1-NEXT: sarq $63, %rcx 1882; AVX1-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 1883; AVX1-NEXT: movq %rax, %rcx 1884; AVX1-NEXT: shlq $60, %rcx 1885; AVX1-NEXT: sarq $63, %rcx 1886; AVX1-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 1887; AVX1-NEXT: movq %rax, %rcx 1888; AVX1-NEXT: shlq $59, %rcx 1889; AVX1-NEXT: sarq $63, %rcx 1890; AVX1-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 1891; AVX1-NEXT: movq %rax, %rcx 1892; AVX1-NEXT: shlq $58, %rcx 1893; AVX1-NEXT: sarq $63, %rcx 1894; AVX1-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 1895; AVX1-NEXT: movq %rax, %rcx 1896; AVX1-NEXT: shlq $57, %rcx 1897; AVX1-NEXT: sarq $63, %rcx 1898; AVX1-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 1899; AVX1-NEXT: shrq $7, %rax 1900; AVX1-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 1901; AVX1-NEXT: retq 1902; 1903; AVX2-LABEL: load_sext_8i1_to_8i16: 1904; AVX2: # BB#0: # %entry 1905; AVX2-NEXT: movsbq (%rdi), %rax 1906; AVX2-NEXT: movq %rax, %rcx 1907; AVX2-NEXT: shlq $62, %rcx 1908; AVX2-NEXT: sarq $63, %rcx 1909; AVX2-NEXT: movq %rax, %rdx 1910; AVX2-NEXT: shlq $63, %rdx 1911; AVX2-NEXT: sarq $63, %rdx 1912; AVX2-NEXT: vmovd %edx, %xmm0 1913; AVX2-NEXT: vpinsrw $1, %ecx, %xmm0, %xmm0 1914; AVX2-NEXT: movq %rax, %rcx 1915; AVX2-NEXT: shlq $61, %rcx 1916; AVX2-NEXT: sarq $63, %rcx 1917; AVX2-NEXT: vpinsrw $2, %ecx, %xmm0, %xmm0 1918; AVX2-NEXT: movq %rax, %rcx 1919; AVX2-NEXT: shlq $60, %rcx 1920; AVX2-NEXT: sarq $63, %rcx 1921; AVX2-NEXT: vpinsrw $3, %ecx, %xmm0, %xmm0 1922; AVX2-NEXT: movq %rax, %rcx 1923; AVX2-NEXT: shlq $59, %rcx 1924; AVX2-NEXT: sarq $63, %rcx 1925; AVX2-NEXT: vpinsrw $4, %ecx, %xmm0, %xmm0 1926; AVX2-NEXT: movq %rax, %rcx 1927; AVX2-NEXT: shlq $58, %rcx 1928; AVX2-NEXT: sarq $63, %rcx 1929; AVX2-NEXT: vpinsrw $5, %ecx, %xmm0, %xmm0 1930; AVX2-NEXT: movq %rax, %rcx 1931; AVX2-NEXT: shlq $57, %rcx 1932; AVX2-NEXT: sarq $63, %rcx 1933; AVX2-NEXT: vpinsrw $6, %ecx, %xmm0, %xmm0 1934; AVX2-NEXT: shrq $7, %rax 1935; AVX2-NEXT: vpinsrw $7, %eax, %xmm0, %xmm0 1936; AVX2-NEXT: retq 1937; 1938; AVX512F-LABEL: load_sext_8i1_to_8i16: 1939; AVX512F: # BB#0: # %entry 1940; AVX512F-NEXT: movzbl (%rdi), %eax 1941; AVX512F-NEXT: kmovw %eax, %k1 1942; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1943; AVX512F-NEXT: vpmovqw %zmm0, %xmm0 1944; AVX512F-NEXT: retq 1945; 1946; AVX512BW-LABEL: load_sext_8i1_to_8i16: 1947; AVX512BW: # BB#0: # %entry 1948; AVX512BW-NEXT: movzbl (%rdi), %eax 1949; AVX512BW-NEXT: kmovd %eax, %k1 1950; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 1951; AVX512BW-NEXT: vpmovqw %zmm0, %xmm0 1952; AVX512BW-NEXT: retq 1953; 1954; X32-SSE41-LABEL: load_sext_8i1_to_8i16: 1955; X32-SSE41: # BB#0: # %entry 1956; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 1957; X32-SSE41-NEXT: movsbl (%eax), %eax 1958; X32-SSE41-NEXT: movl %eax, %ecx 1959; X32-SSE41-NEXT: shll $30, %ecx 1960; X32-SSE41-NEXT: sarl $31, %ecx 1961; X32-SSE41-NEXT: movl %eax, %edx 1962; X32-SSE41-NEXT: shll $31, %edx 1963; X32-SSE41-NEXT: sarl $31, %edx 1964; X32-SSE41-NEXT: movd %edx, %xmm0 1965; X32-SSE41-NEXT: pinsrw $1, %ecx, %xmm0 1966; X32-SSE41-NEXT: movl %eax, %ecx 1967; X32-SSE41-NEXT: shll $29, %ecx 1968; X32-SSE41-NEXT: sarl $31, %ecx 1969; X32-SSE41-NEXT: pinsrw $2, %ecx, %xmm0 1970; X32-SSE41-NEXT: movl %eax, %ecx 1971; X32-SSE41-NEXT: shll $28, %ecx 1972; X32-SSE41-NEXT: sarl $31, %ecx 1973; X32-SSE41-NEXT: pinsrw $3, %ecx, %xmm0 1974; X32-SSE41-NEXT: movl %eax, %ecx 1975; X32-SSE41-NEXT: shll $27, %ecx 1976; X32-SSE41-NEXT: sarl $31, %ecx 1977; X32-SSE41-NEXT: pinsrw $4, %ecx, %xmm0 1978; X32-SSE41-NEXT: movl %eax, %ecx 1979; X32-SSE41-NEXT: shll $26, %ecx 1980; X32-SSE41-NEXT: sarl $31, %ecx 1981; X32-SSE41-NEXT: pinsrw $5, %ecx, %xmm0 1982; X32-SSE41-NEXT: movl %eax, %ecx 1983; X32-SSE41-NEXT: shll $25, %ecx 1984; X32-SSE41-NEXT: sarl $31, %ecx 1985; X32-SSE41-NEXT: pinsrw $6, %ecx, %xmm0 1986; X32-SSE41-NEXT: shrl $7, %eax 1987; X32-SSE41-NEXT: pinsrw $7, %eax, %xmm0 1988; X32-SSE41-NEXT: retl 1989entry: 1990 %X = load <8 x i1>, <8 x i1>* %ptr 1991 %Y = sext <8 x i1> %X to <8 x i16> 1992 ret <8 x i16> %Y 1993} 1994 1995define <8 x i16> @load_sext_8i8_to_8i16(<8 x i8> *%ptr) { 1996; SSE2-LABEL: load_sext_8i8_to_8i16: 1997; SSE2: # BB#0: # %entry 1998; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1999; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2000; SSE2-NEXT: psraw $8, %xmm0 2001; SSE2-NEXT: retq 2002; 2003; SSSE3-LABEL: load_sext_8i8_to_8i16: 2004; SSSE3: # BB#0: # %entry 2005; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 2006; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2007; SSSE3-NEXT: psraw $8, %xmm0 2008; SSSE3-NEXT: retq 2009; 2010; SSE41-LABEL: load_sext_8i8_to_8i16: 2011; SSE41: # BB#0: # %entry 2012; SSE41-NEXT: pmovsxbw (%rdi), %xmm0 2013; SSE41-NEXT: retq 2014; 2015; AVX-LABEL: load_sext_8i8_to_8i16: 2016; AVX: # BB#0: # %entry 2017; AVX-NEXT: vpmovsxbw (%rdi), %xmm0 2018; AVX-NEXT: retq 2019; 2020; X32-SSE41-LABEL: load_sext_8i8_to_8i16: 2021; X32-SSE41: # BB#0: # %entry 2022; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2023; X32-SSE41-NEXT: pmovsxbw (%eax), %xmm0 2024; X32-SSE41-NEXT: retl 2025entry: 2026 %X = load <8 x i8>, <8 x i8>* %ptr 2027 %Y = sext <8 x i8> %X to <8 x i16> 2028 ret <8 x i16> %Y 2029} 2030 2031define <8 x i64> @load_sext_8i8_to_8i64(<8 x i8> *%ptr) { 2032; SSE2-LABEL: load_sext_8i8_to_8i64: 2033; SSE2: # BB#0: # %entry 2034; SSE2-NEXT: movsbq 1(%rdi), %rax 2035; SSE2-NEXT: movd %rax, %xmm1 2036; SSE2-NEXT: movsbq (%rdi), %rax 2037; SSE2-NEXT: movd %rax, %xmm0 2038; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2039; SSE2-NEXT: movsbq 3(%rdi), %rax 2040; SSE2-NEXT: movd %rax, %xmm2 2041; SSE2-NEXT: movsbq 2(%rdi), %rax 2042; SSE2-NEXT: movd %rax, %xmm1 2043; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 2044; SSE2-NEXT: movsbq 5(%rdi), %rax 2045; SSE2-NEXT: movd %rax, %xmm3 2046; SSE2-NEXT: movsbq 4(%rdi), %rax 2047; SSE2-NEXT: movd %rax, %xmm2 2048; SSE2-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 2049; SSE2-NEXT: movsbq 7(%rdi), %rax 2050; SSE2-NEXT: movd %rax, %xmm4 2051; SSE2-NEXT: movsbq 6(%rdi), %rax 2052; SSE2-NEXT: movd %rax, %xmm3 2053; SSE2-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0] 2054; SSE2-NEXT: retq 2055; 2056; SSSE3-LABEL: load_sext_8i8_to_8i64: 2057; SSSE3: # BB#0: # %entry 2058; SSSE3-NEXT: movsbq 1(%rdi), %rax 2059; SSSE3-NEXT: movd %rax, %xmm1 2060; SSSE3-NEXT: movsbq (%rdi), %rax 2061; SSSE3-NEXT: movd %rax, %xmm0 2062; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 2063; SSSE3-NEXT: movsbq 3(%rdi), %rax 2064; SSSE3-NEXT: movd %rax, %xmm2 2065; SSSE3-NEXT: movsbq 2(%rdi), %rax 2066; SSSE3-NEXT: movd %rax, %xmm1 2067; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 2068; SSSE3-NEXT: movsbq 5(%rdi), %rax 2069; SSSE3-NEXT: movd %rax, %xmm3 2070; SSSE3-NEXT: movsbq 4(%rdi), %rax 2071; SSSE3-NEXT: movd %rax, %xmm2 2072; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm2 = xmm2[0],xmm3[0] 2073; SSSE3-NEXT: movsbq 7(%rdi), %rax 2074; SSSE3-NEXT: movd %rax, %xmm4 2075; SSSE3-NEXT: movsbq 6(%rdi), %rax 2076; SSSE3-NEXT: movd %rax, %xmm3 2077; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm3 = xmm3[0],xmm4[0] 2078; SSSE3-NEXT: retq 2079; 2080; SSE41-LABEL: load_sext_8i8_to_8i64: 2081; SSE41: # BB#0: # %entry 2082; SSE41-NEXT: pmovsxbq (%rdi), %xmm0 2083; SSE41-NEXT: pmovsxbq 2(%rdi), %xmm1 2084; SSE41-NEXT: pmovsxbq 4(%rdi), %xmm2 2085; SSE41-NEXT: pmovsxbq 6(%rdi), %xmm3 2086; SSE41-NEXT: retq 2087; 2088; AVX1-LABEL: load_sext_8i8_to_8i64: 2089; AVX1: # BB#0: # %entry 2090; AVX1-NEXT: vpmovsxbd (%rdi), %xmm0 2091; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 2092; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 2093; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 2094; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2095; AVX1-NEXT: vpmovsxbd 4(%rdi), %xmm1 2096; AVX1-NEXT: vpmovsxdq %xmm1, %xmm2 2097; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm1[2,3,0,1] 2098; AVX1-NEXT: vpmovsxdq %xmm1, %xmm1 2099; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm2, %ymm1 2100; AVX1-NEXT: retq 2101; 2102; AVX2-LABEL: load_sext_8i8_to_8i64: 2103; AVX2: # BB#0: # %entry 2104; AVX2-NEXT: vpmovsxbq (%rdi), %ymm0 2105; AVX2-NEXT: vpmovsxbq 4(%rdi), %ymm1 2106; AVX2-NEXT: retq 2107; 2108; AVX512-LABEL: load_sext_8i8_to_8i64: 2109; AVX512: # BB#0: # %entry 2110; AVX512-NEXT: vpmovsxbq (%rdi), %zmm0 2111; AVX512-NEXT: retq 2112; 2113; X32-SSE41-LABEL: load_sext_8i8_to_8i64: 2114; X32-SSE41: # BB#0: # %entry 2115; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2116; X32-SSE41-NEXT: pmovsxbq (%eax), %xmm0 2117; X32-SSE41-NEXT: pmovsxbq 2(%eax), %xmm1 2118; X32-SSE41-NEXT: pmovsxbq 4(%eax), %xmm2 2119; X32-SSE41-NEXT: pmovsxbq 6(%eax), %xmm3 2120; X32-SSE41-NEXT: retl 2121entry: 2122 %X = load <8 x i8>, <8 x i8>* %ptr 2123 %Y = sext <8 x i8> %X to <8 x i64> 2124 ret <8 x i64> %Y 2125} 2126 2127define <8 x i32> @load_sext_8i1_to_8i32(<8 x i1> *%ptr) { 2128; SSE2-LABEL: load_sext_8i1_to_8i32: 2129; SSE2: # BB#0: # %entry 2130; SSE2-NEXT: movzbl (%rdi), %eax 2131; SSE2-NEXT: movl %eax, %ecx 2132; SSE2-NEXT: shrl $6, %ecx 2133; SSE2-NEXT: andl $1, %ecx 2134; SSE2-NEXT: movd %ecx, %xmm0 2135; SSE2-NEXT: movl %eax, %ecx 2136; SSE2-NEXT: shrl $2, %ecx 2137; SSE2-NEXT: andl $1, %ecx 2138; SSE2-NEXT: movd %ecx, %xmm2 2139; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 2140; SSE2-NEXT: movl %eax, %ecx 2141; SSE2-NEXT: andl $1, %ecx 2142; SSE2-NEXT: movd %ecx, %xmm1 2143; SSE2-NEXT: movl %eax, %ecx 2144; SSE2-NEXT: shrl $4, %ecx 2145; SSE2-NEXT: andl $1, %ecx 2146; SSE2-NEXT: movd %ecx, %xmm0 2147; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2148; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2149; SSE2-NEXT: movl %eax, %ecx 2150; SSE2-NEXT: shrl $5, %ecx 2151; SSE2-NEXT: andl $1, %ecx 2152; SSE2-NEXT: movd %ecx, %xmm0 2153; SSE2-NEXT: movl %eax, %ecx 2154; SSE2-NEXT: shrl %ecx 2155; SSE2-NEXT: andl $1, %ecx 2156; SSE2-NEXT: movd %ecx, %xmm2 2157; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 2158; SSE2-NEXT: movl %eax, %ecx 2159; SSE2-NEXT: shrl $3, %ecx 2160; SSE2-NEXT: andl $1, %ecx 2161; SSE2-NEXT: movd %ecx, %xmm0 2162; SSE2-NEXT: shrl $7, %eax 2163; SSE2-NEXT: movzwl %ax, %eax 2164; SSE2-NEXT: movd %eax, %xmm3 2165; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 2166; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 2167; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2168; SSE2-NEXT: movdqa %xmm1, %xmm0 2169; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2170; SSE2-NEXT: pslld $31, %xmm0 2171; SSE2-NEXT: psrad $31, %xmm0 2172; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2173; SSE2-NEXT: pslld $31, %xmm1 2174; SSE2-NEXT: psrad $31, %xmm1 2175; SSE2-NEXT: retq 2176; 2177; SSSE3-LABEL: load_sext_8i1_to_8i32: 2178; SSSE3: # BB#0: # %entry 2179; SSSE3-NEXT: movzbl (%rdi), %eax 2180; SSSE3-NEXT: movl %eax, %ecx 2181; SSSE3-NEXT: shrl $6, %ecx 2182; SSSE3-NEXT: andl $1, %ecx 2183; SSSE3-NEXT: movd %ecx, %xmm0 2184; SSSE3-NEXT: movl %eax, %ecx 2185; SSSE3-NEXT: shrl $2, %ecx 2186; SSSE3-NEXT: andl $1, %ecx 2187; SSSE3-NEXT: movd %ecx, %xmm2 2188; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 2189; SSSE3-NEXT: movl %eax, %ecx 2190; SSSE3-NEXT: andl $1, %ecx 2191; SSSE3-NEXT: movd %ecx, %xmm1 2192; SSSE3-NEXT: movl %eax, %ecx 2193; SSSE3-NEXT: shrl $4, %ecx 2194; SSSE3-NEXT: andl $1, %ecx 2195; SSSE3-NEXT: movd %ecx, %xmm0 2196; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 2197; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2198; SSSE3-NEXT: movl %eax, %ecx 2199; SSSE3-NEXT: shrl $5, %ecx 2200; SSSE3-NEXT: andl $1, %ecx 2201; SSSE3-NEXT: movd %ecx, %xmm0 2202; SSSE3-NEXT: movl %eax, %ecx 2203; SSSE3-NEXT: shrl %ecx 2204; SSSE3-NEXT: andl $1, %ecx 2205; SSSE3-NEXT: movd %ecx, %xmm2 2206; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 2207; SSSE3-NEXT: movl %eax, %ecx 2208; SSSE3-NEXT: shrl $3, %ecx 2209; SSSE3-NEXT: andl $1, %ecx 2210; SSSE3-NEXT: movd %ecx, %xmm0 2211; SSSE3-NEXT: shrl $7, %eax 2212; SSSE3-NEXT: movzwl %ax, %eax 2213; SSSE3-NEXT: movd %eax, %xmm3 2214; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3] 2215; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 2216; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 2217; SSSE3-NEXT: movdqa %xmm1, %xmm0 2218; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2219; SSSE3-NEXT: pslld $31, %xmm0 2220; SSSE3-NEXT: psrad $31, %xmm0 2221; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2222; SSSE3-NEXT: pslld $31, %xmm1 2223; SSSE3-NEXT: psrad $31, %xmm1 2224; SSSE3-NEXT: retq 2225; 2226; SSE41-LABEL: load_sext_8i1_to_8i32: 2227; SSE41: # BB#0: # %entry 2228; SSE41-NEXT: movzbl (%rdi), %eax 2229; SSE41-NEXT: movl %eax, %ecx 2230; SSE41-NEXT: shrl %ecx 2231; SSE41-NEXT: andl $1, %ecx 2232; SSE41-NEXT: movl %eax, %edx 2233; SSE41-NEXT: andl $1, %edx 2234; SSE41-NEXT: movd %edx, %xmm1 2235; SSE41-NEXT: pinsrw $1, %ecx, %xmm1 2236; SSE41-NEXT: movl %eax, %ecx 2237; SSE41-NEXT: shrl $2, %ecx 2238; SSE41-NEXT: andl $1, %ecx 2239; SSE41-NEXT: pinsrw $2, %ecx, %xmm1 2240; SSE41-NEXT: movl %eax, %ecx 2241; SSE41-NEXT: shrl $3, %ecx 2242; SSE41-NEXT: andl $1, %ecx 2243; SSE41-NEXT: pinsrw $3, %ecx, %xmm1 2244; SSE41-NEXT: movl %eax, %ecx 2245; SSE41-NEXT: shrl $4, %ecx 2246; SSE41-NEXT: andl $1, %ecx 2247; SSE41-NEXT: pinsrw $4, %ecx, %xmm1 2248; SSE41-NEXT: movl %eax, %ecx 2249; SSE41-NEXT: shrl $5, %ecx 2250; SSE41-NEXT: andl $1, %ecx 2251; SSE41-NEXT: pinsrw $5, %ecx, %xmm1 2252; SSE41-NEXT: movl %eax, %ecx 2253; SSE41-NEXT: shrl $6, %ecx 2254; SSE41-NEXT: andl $1, %ecx 2255; SSE41-NEXT: pinsrw $6, %ecx, %xmm1 2256; SSE41-NEXT: shrl $7, %eax 2257; SSE41-NEXT: movzwl %ax, %eax 2258; SSE41-NEXT: pinsrw $7, %eax, %xmm1 2259; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 2260; SSE41-NEXT: pslld $31, %xmm0 2261; SSE41-NEXT: psrad $31, %xmm0 2262; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2263; SSE41-NEXT: pslld $31, %xmm1 2264; SSE41-NEXT: psrad $31, %xmm1 2265; SSE41-NEXT: retq 2266; 2267; AVX1-LABEL: load_sext_8i1_to_8i32: 2268; AVX1: # BB#0: # %entry 2269; AVX1-NEXT: movsbq (%rdi), %rax 2270; AVX1-NEXT: movq %rax, %rcx 2271; AVX1-NEXT: shlq $58, %rcx 2272; AVX1-NEXT: sarq $63, %rcx 2273; AVX1-NEXT: movq %rax, %rdx 2274; AVX1-NEXT: shlq $59, %rdx 2275; AVX1-NEXT: sarq $63, %rdx 2276; AVX1-NEXT: vmovd %edx, %xmm0 2277; AVX1-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 2278; AVX1-NEXT: movq %rax, %rcx 2279; AVX1-NEXT: shlq $57, %rcx 2280; AVX1-NEXT: sarq $63, %rcx 2281; AVX1-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 2282; AVX1-NEXT: movq %rax, %rcx 2283; AVX1-NEXT: shrq $7, %rcx 2284; AVX1-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 2285; AVX1-NEXT: movq %rax, %rcx 2286; AVX1-NEXT: shlq $62, %rcx 2287; AVX1-NEXT: sarq $63, %rcx 2288; AVX1-NEXT: movq %rax, %rdx 2289; AVX1-NEXT: shlq $63, %rdx 2290; AVX1-NEXT: sarq $63, %rdx 2291; AVX1-NEXT: vmovd %edx, %xmm1 2292; AVX1-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 2293; AVX1-NEXT: movq %rax, %rcx 2294; AVX1-NEXT: shlq $61, %rcx 2295; AVX1-NEXT: sarq $63, %rcx 2296; AVX1-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1 2297; AVX1-NEXT: shlq $60, %rax 2298; AVX1-NEXT: sarq $63, %rax 2299; AVX1-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 2300; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2301; AVX1-NEXT: retq 2302; 2303; AVX2-LABEL: load_sext_8i1_to_8i32: 2304; AVX2: # BB#0: # %entry 2305; AVX2-NEXT: movsbq (%rdi), %rax 2306; AVX2-NEXT: movq %rax, %rcx 2307; AVX2-NEXT: shlq $58, %rcx 2308; AVX2-NEXT: sarq $63, %rcx 2309; AVX2-NEXT: movq %rax, %rdx 2310; AVX2-NEXT: shlq $59, %rdx 2311; AVX2-NEXT: sarq $63, %rdx 2312; AVX2-NEXT: vmovd %edx, %xmm0 2313; AVX2-NEXT: vpinsrd $1, %ecx, %xmm0, %xmm0 2314; AVX2-NEXT: movq %rax, %rcx 2315; AVX2-NEXT: shlq $57, %rcx 2316; AVX2-NEXT: sarq $63, %rcx 2317; AVX2-NEXT: vpinsrd $2, %ecx, %xmm0, %xmm0 2318; AVX2-NEXT: movq %rax, %rcx 2319; AVX2-NEXT: shrq $7, %rcx 2320; AVX2-NEXT: vpinsrd $3, %ecx, %xmm0, %xmm0 2321; AVX2-NEXT: movq %rax, %rcx 2322; AVX2-NEXT: shlq $62, %rcx 2323; AVX2-NEXT: sarq $63, %rcx 2324; AVX2-NEXT: movq %rax, %rdx 2325; AVX2-NEXT: shlq $63, %rdx 2326; AVX2-NEXT: sarq $63, %rdx 2327; AVX2-NEXT: vmovd %edx, %xmm1 2328; AVX2-NEXT: vpinsrd $1, %ecx, %xmm1, %xmm1 2329; AVX2-NEXT: movq %rax, %rcx 2330; AVX2-NEXT: shlq $61, %rcx 2331; AVX2-NEXT: sarq $63, %rcx 2332; AVX2-NEXT: vpinsrd $2, %ecx, %xmm1, %xmm1 2333; AVX2-NEXT: shlq $60, %rax 2334; AVX2-NEXT: sarq $63, %rax 2335; AVX2-NEXT: vpinsrd $3, %eax, %xmm1, %xmm1 2336; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 2337; AVX2-NEXT: retq 2338; 2339; AVX512F-LABEL: load_sext_8i1_to_8i32: 2340; AVX512F: # BB#0: # %entry 2341; AVX512F-NEXT: movzbl (%rdi), %eax 2342; AVX512F-NEXT: kmovw %eax, %k1 2343; AVX512F-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2344; AVX512F-NEXT: vpmovqd %zmm0, %ymm0 2345; AVX512F-NEXT: retq 2346; 2347; AVX512BW-LABEL: load_sext_8i1_to_8i32: 2348; AVX512BW: # BB#0: # %entry 2349; AVX512BW-NEXT: movzbl (%rdi), %eax 2350; AVX512BW-NEXT: kmovd %eax, %k1 2351; AVX512BW-NEXT: vpternlogq $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2352; AVX512BW-NEXT: vpmovqd %zmm0, %ymm0 2353; AVX512BW-NEXT: retq 2354; 2355; X32-SSE41-LABEL: load_sext_8i1_to_8i32: 2356; X32-SSE41: # BB#0: # %entry 2357; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2358; X32-SSE41-NEXT: movzbl (%eax), %eax 2359; X32-SSE41-NEXT: movl %eax, %ecx 2360; X32-SSE41-NEXT: shrl %ecx 2361; X32-SSE41-NEXT: andl $1, %ecx 2362; X32-SSE41-NEXT: movl %eax, %edx 2363; X32-SSE41-NEXT: andl $1, %edx 2364; X32-SSE41-NEXT: movd %edx, %xmm1 2365; X32-SSE41-NEXT: pinsrw $1, %ecx, %xmm1 2366; X32-SSE41-NEXT: movl %eax, %ecx 2367; X32-SSE41-NEXT: shrl $2, %ecx 2368; X32-SSE41-NEXT: andl $1, %ecx 2369; X32-SSE41-NEXT: pinsrw $2, %ecx, %xmm1 2370; X32-SSE41-NEXT: movl %eax, %ecx 2371; X32-SSE41-NEXT: shrl $3, %ecx 2372; X32-SSE41-NEXT: andl $1, %ecx 2373; X32-SSE41-NEXT: pinsrw $3, %ecx, %xmm1 2374; X32-SSE41-NEXT: movl %eax, %ecx 2375; X32-SSE41-NEXT: shrl $4, %ecx 2376; X32-SSE41-NEXT: andl $1, %ecx 2377; X32-SSE41-NEXT: pinsrw $4, %ecx, %xmm1 2378; X32-SSE41-NEXT: movl %eax, %ecx 2379; X32-SSE41-NEXT: shrl $5, %ecx 2380; X32-SSE41-NEXT: andl $1, %ecx 2381; X32-SSE41-NEXT: pinsrw $5, %ecx, %xmm1 2382; X32-SSE41-NEXT: movl %eax, %ecx 2383; X32-SSE41-NEXT: shrl $6, %ecx 2384; X32-SSE41-NEXT: andl $1, %ecx 2385; X32-SSE41-NEXT: pinsrw $6, %ecx, %xmm1 2386; X32-SSE41-NEXT: shrl $7, %eax 2387; X32-SSE41-NEXT: pinsrw $7, %eax, %xmm1 2388; X32-SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 2389; X32-SSE41-NEXT: pslld $31, %xmm0 2390; X32-SSE41-NEXT: psrad $31, %xmm0 2391; X32-SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2392; X32-SSE41-NEXT: pslld $31, %xmm1 2393; X32-SSE41-NEXT: psrad $31, %xmm1 2394; X32-SSE41-NEXT: retl 2395entry: 2396 %X = load <8 x i1>, <8 x i1>* %ptr 2397 %Y = sext <8 x i1> %X to <8 x i32> 2398 ret <8 x i32> %Y 2399} 2400 2401define <8 x i32> @load_sext_8i8_to_8i32(<8 x i8> *%ptr) { 2402; SSE2-LABEL: load_sext_8i8_to_8i32: 2403; SSE2: # BB#0: # %entry 2404; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2405; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2406; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2407; SSE2-NEXT: psrad $24, %xmm0 2408; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2409; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2410; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 2411; SSE2-NEXT: psrad $24, %xmm1 2412; SSE2-NEXT: retq 2413; 2414; SSSE3-LABEL: load_sext_8i8_to_8i32: 2415; SSSE3: # BB#0: # %entry 2416; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 2417; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2418; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 2419; SSSE3-NEXT: psrad $24, %xmm0 2420; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 2421; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 2422; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 2423; SSSE3-NEXT: psrad $24, %xmm1 2424; SSSE3-NEXT: retq 2425; 2426; SSE41-LABEL: load_sext_8i8_to_8i32: 2427; SSE41: # BB#0: # %entry 2428; SSE41-NEXT: pmovsxbd (%rdi), %xmm0 2429; SSE41-NEXT: pmovsxbd 4(%rdi), %xmm1 2430; SSE41-NEXT: retq 2431; 2432; AVX1-LABEL: load_sext_8i8_to_8i32: 2433; AVX1: # BB#0: # %entry 2434; AVX1-NEXT: vpmovsxbw (%rdi), %xmm0 2435; AVX1-NEXT: vpmovsxwd %xmm0, %xmm1 2436; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 2437; AVX1-NEXT: vpmovsxwd %xmm0, %xmm0 2438; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2439; AVX1-NEXT: retq 2440; 2441; AVX2-LABEL: load_sext_8i8_to_8i32: 2442; AVX2: # BB#0: # %entry 2443; AVX2-NEXT: vpmovsxbd (%rdi), %ymm0 2444; AVX2-NEXT: retq 2445; 2446; AVX512-LABEL: load_sext_8i8_to_8i32: 2447; AVX512: # BB#0: # %entry 2448; AVX512-NEXT: vpmovsxbd (%rdi), %ymm0 2449; AVX512-NEXT: retq 2450; 2451; X32-SSE41-LABEL: load_sext_8i8_to_8i32: 2452; X32-SSE41: # BB#0: # %entry 2453; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2454; X32-SSE41-NEXT: pmovsxbd (%eax), %xmm0 2455; X32-SSE41-NEXT: pmovsxbd 4(%eax), %xmm1 2456; X32-SSE41-NEXT: retl 2457entry: 2458 %X = load <8 x i8>, <8 x i8>* %ptr 2459 %Y = sext <8 x i8> %X to <8 x i32> 2460 ret <8 x i32> %Y 2461} 2462 2463define <16 x i8> @load_sext_16i1_to_16i8(<16 x i1> *%ptr) nounwind readnone { 2464; SSE2-LABEL: load_sext_16i1_to_16i8: 2465; SSE2: # BB#0: # %entry 2466; SSE2-NEXT: pushq %rbp 2467; SSE2-NEXT: pushq %r15 2468; SSE2-NEXT: pushq %r14 2469; SSE2-NEXT: pushq %r13 2470; SSE2-NEXT: pushq %r12 2471; SSE2-NEXT: pushq %rbx 2472; SSE2-NEXT: movswq (%rdi), %rax 2473; SSE2-NEXT: movq %rax, %r8 2474; SSE2-NEXT: movq %rax, %r9 2475; SSE2-NEXT: movq %rax, %r10 2476; SSE2-NEXT: movq %rax, %r11 2477; SSE2-NEXT: movq %rax, %r14 2478; SSE2-NEXT: movq %rax, %r15 2479; SSE2-NEXT: movq %rax, %r12 2480; SSE2-NEXT: movq %rax, %r13 2481; SSE2-NEXT: movq %rax, %rbx 2482; SSE2-NEXT: movq %rax, %rcx 2483; SSE2-NEXT: movq %rax, %rdx 2484; SSE2-NEXT: movq %rax, %rsi 2485; SSE2-NEXT: movq %rax, %rdi 2486; SSE2-NEXT: movq %rax, %rbp 2487; SSE2-NEXT: shlq $49, %rbp 2488; SSE2-NEXT: sarq $63, %rbp 2489; SSE2-NEXT: movd %ebp, %xmm0 2490; SSE2-NEXT: movq %rax, %rbp 2491; SSE2-NEXT: movsbq %al, %rax 2492; SSE2-NEXT: shlq $57, %r8 2493; SSE2-NEXT: sarq $63, %r8 2494; SSE2-NEXT: movd %r8d, %xmm1 2495; SSE2-NEXT: shlq $53, %r9 2496; SSE2-NEXT: sarq $63, %r9 2497; SSE2-NEXT: movd %r9d, %xmm2 2498; SSE2-NEXT: shlq $61, %r10 2499; SSE2-NEXT: sarq $63, %r10 2500; SSE2-NEXT: movd %r10d, %xmm3 2501; SSE2-NEXT: shlq $51, %r11 2502; SSE2-NEXT: sarq $63, %r11 2503; SSE2-NEXT: movd %r11d, %xmm4 2504; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2505; SSE2-NEXT: shlq $59, %r14 2506; SSE2-NEXT: sarq $63, %r14 2507; SSE2-NEXT: movd %r14d, %xmm5 2508; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2509; SSE2-NEXT: shlq $55, %r15 2510; SSE2-NEXT: sarq $63, %r15 2511; SSE2-NEXT: movd %r15d, %xmm2 2512; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 2513; SSE2-NEXT: shlq $63, %r12 2514; SSE2-NEXT: sarq $63, %r12 2515; SSE2-NEXT: movd %r12d, %xmm0 2516; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7] 2517; SSE2-NEXT: shlq $50, %r13 2518; SSE2-NEXT: sarq $63, %r13 2519; SSE2-NEXT: movd %r13d, %xmm1 2520; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2521; SSE2-NEXT: shlq $58, %rbx 2522; SSE2-NEXT: sarq $63, %rbx 2523; SSE2-NEXT: movd %ebx, %xmm2 2524; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7] 2525; SSE2-NEXT: shlq $54, %rcx 2526; SSE2-NEXT: sarq $63, %rcx 2527; SSE2-NEXT: movd %ecx, %xmm4 2528; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 2529; SSE2-NEXT: shlq $62, %rdx 2530; SSE2-NEXT: sarq $63, %rdx 2531; SSE2-NEXT: movd %edx, %xmm3 2532; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 2533; SSE2-NEXT: shlq $52, %rsi 2534; SSE2-NEXT: sarq $63, %rsi 2535; SSE2-NEXT: movd %esi, %xmm1 2536; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 2537; SSE2-NEXT: shlq $60, %rdi 2538; SSE2-NEXT: sarq $63, %rdi 2539; SSE2-NEXT: movd %edi, %xmm4 2540; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2541; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] 2542; SSE2-NEXT: shrq $15, %rbp 2543; SSE2-NEXT: movd %ebp, %xmm1 2544; SSE2-NEXT: shrq $7, %rax 2545; SSE2-NEXT: movd %eax, %xmm2 2546; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 2547; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] 2548; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 2549; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 2550; SSE2-NEXT: popq %rbx 2551; SSE2-NEXT: popq %r12 2552; SSE2-NEXT: popq %r13 2553; SSE2-NEXT: popq %r14 2554; SSE2-NEXT: popq %r15 2555; SSE2-NEXT: popq %rbp 2556; SSE2-NEXT: retq 2557; 2558; SSSE3-LABEL: load_sext_16i1_to_16i8: 2559; SSSE3: # BB#0: # %entry 2560; SSSE3-NEXT: pushq %rbp 2561; SSSE3-NEXT: pushq %r15 2562; SSSE3-NEXT: pushq %r14 2563; SSSE3-NEXT: pushq %r13 2564; SSSE3-NEXT: pushq %r12 2565; SSSE3-NEXT: pushq %rbx 2566; SSSE3-NEXT: movswq (%rdi), %rax 2567; SSSE3-NEXT: movq %rax, %r8 2568; SSSE3-NEXT: movq %rax, %r9 2569; SSSE3-NEXT: movq %rax, %r10 2570; SSSE3-NEXT: movq %rax, %r11 2571; SSSE3-NEXT: movq %rax, %r14 2572; SSSE3-NEXT: movq %rax, %r15 2573; SSSE3-NEXT: movq %rax, %r12 2574; SSSE3-NEXT: movq %rax, %r13 2575; SSSE3-NEXT: movq %rax, %rbx 2576; SSSE3-NEXT: movq %rax, %rcx 2577; SSSE3-NEXT: movq %rax, %rdx 2578; SSSE3-NEXT: movq %rax, %rsi 2579; SSSE3-NEXT: movq %rax, %rdi 2580; SSSE3-NEXT: movq %rax, %rbp 2581; SSSE3-NEXT: shlq $49, %rbp 2582; SSSE3-NEXT: sarq $63, %rbp 2583; SSSE3-NEXT: movd %ebp, %xmm0 2584; SSSE3-NEXT: movq %rax, %rbp 2585; SSSE3-NEXT: movsbq %al, %rax 2586; SSSE3-NEXT: shlq $57, %r8 2587; SSSE3-NEXT: sarq $63, %r8 2588; SSSE3-NEXT: movd %r8d, %xmm1 2589; SSSE3-NEXT: shlq $53, %r9 2590; SSSE3-NEXT: sarq $63, %r9 2591; SSSE3-NEXT: movd %r9d, %xmm2 2592; SSSE3-NEXT: shlq $61, %r10 2593; SSSE3-NEXT: sarq $63, %r10 2594; SSSE3-NEXT: movd %r10d, %xmm3 2595; SSSE3-NEXT: shlq $51, %r11 2596; SSSE3-NEXT: sarq $63, %r11 2597; SSSE3-NEXT: movd %r11d, %xmm4 2598; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2599; SSSE3-NEXT: shlq $59, %r14 2600; SSSE3-NEXT: sarq $63, %r14 2601; SSSE3-NEXT: movd %r14d, %xmm5 2602; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2603; SSSE3-NEXT: shlq $55, %r15 2604; SSSE3-NEXT: sarq $63, %r15 2605; SSSE3-NEXT: movd %r15d, %xmm2 2606; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 2607; SSSE3-NEXT: shlq $63, %r12 2608; SSSE3-NEXT: sarq $63, %r12 2609; SSSE3-NEXT: movd %r12d, %xmm0 2610; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3],xmm5[4],xmm4[4],xmm5[5],xmm4[5],xmm5[6],xmm4[6],xmm5[7],xmm4[7] 2611; SSSE3-NEXT: shlq $50, %r13 2612; SSSE3-NEXT: sarq $63, %r13 2613; SSSE3-NEXT: movd %r13d, %xmm1 2614; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2615; SSSE3-NEXT: shlq $58, %rbx 2616; SSSE3-NEXT: sarq $63, %rbx 2617; SSSE3-NEXT: movd %ebx, %xmm2 2618; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7] 2619; SSSE3-NEXT: shlq $54, %rcx 2620; SSSE3-NEXT: sarq $63, %rcx 2621; SSSE3-NEXT: movd %ecx, %xmm4 2622; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 2623; SSSE3-NEXT: shlq $62, %rdx 2624; SSSE3-NEXT: sarq $63, %rdx 2625; SSSE3-NEXT: movd %edx, %xmm3 2626; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 2627; SSSE3-NEXT: shlq $52, %rsi 2628; SSSE3-NEXT: sarq $63, %rsi 2629; SSSE3-NEXT: movd %esi, %xmm1 2630; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 2631; SSSE3-NEXT: shlq $60, %rdi 2632; SSSE3-NEXT: sarq $63, %rdi 2633; SSSE3-NEXT: movd %edi, %xmm4 2634; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2635; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] 2636; SSSE3-NEXT: shrq $15, %rbp 2637; SSSE3-NEXT: movd %ebp, %xmm1 2638; SSSE3-NEXT: shrq $7, %rax 2639; SSSE3-NEXT: movd %eax, %xmm2 2640; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 2641; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] 2642; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 2643; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 2644; SSSE3-NEXT: popq %rbx 2645; SSSE3-NEXT: popq %r12 2646; SSSE3-NEXT: popq %r13 2647; SSSE3-NEXT: popq %r14 2648; SSSE3-NEXT: popq %r15 2649; SSSE3-NEXT: popq %rbp 2650; SSSE3-NEXT: retq 2651; 2652; SSE41-LABEL: load_sext_16i1_to_16i8: 2653; SSE41: # BB#0: # %entry 2654; SSE41-NEXT: movswq (%rdi), %rax 2655; SSE41-NEXT: movq %rax, %rcx 2656; SSE41-NEXT: shlq $62, %rcx 2657; SSE41-NEXT: sarq $63, %rcx 2658; SSE41-NEXT: movq %rax, %rdx 2659; SSE41-NEXT: shlq $63, %rdx 2660; SSE41-NEXT: sarq $63, %rdx 2661; SSE41-NEXT: movd %edx, %xmm0 2662; SSE41-NEXT: pinsrb $1, %ecx, %xmm0 2663; SSE41-NEXT: movq %rax, %rcx 2664; SSE41-NEXT: shlq $61, %rcx 2665; SSE41-NEXT: sarq $63, %rcx 2666; SSE41-NEXT: pinsrb $2, %ecx, %xmm0 2667; SSE41-NEXT: movq %rax, %rcx 2668; SSE41-NEXT: shlq $60, %rcx 2669; SSE41-NEXT: sarq $63, %rcx 2670; SSE41-NEXT: pinsrb $3, %ecx, %xmm0 2671; SSE41-NEXT: movq %rax, %rcx 2672; SSE41-NEXT: shlq $59, %rcx 2673; SSE41-NEXT: sarq $63, %rcx 2674; SSE41-NEXT: pinsrb $4, %ecx, %xmm0 2675; SSE41-NEXT: movq %rax, %rcx 2676; SSE41-NEXT: shlq $58, %rcx 2677; SSE41-NEXT: sarq $63, %rcx 2678; SSE41-NEXT: pinsrb $5, %ecx, %xmm0 2679; SSE41-NEXT: movq %rax, %rcx 2680; SSE41-NEXT: shlq $57, %rcx 2681; SSE41-NEXT: sarq $63, %rcx 2682; SSE41-NEXT: pinsrb $6, %ecx, %xmm0 2683; SSE41-NEXT: movsbq %al, %rcx 2684; SSE41-NEXT: shrq $7, %rcx 2685; SSE41-NEXT: pinsrb $7, %ecx, %xmm0 2686; SSE41-NEXT: movq %rax, %rcx 2687; SSE41-NEXT: shlq $55, %rcx 2688; SSE41-NEXT: sarq $63, %rcx 2689; SSE41-NEXT: pinsrb $8, %ecx, %xmm0 2690; SSE41-NEXT: movq %rax, %rcx 2691; SSE41-NEXT: shlq $54, %rcx 2692; SSE41-NEXT: sarq $63, %rcx 2693; SSE41-NEXT: pinsrb $9, %ecx, %xmm0 2694; SSE41-NEXT: movq %rax, %rcx 2695; SSE41-NEXT: shlq $53, %rcx 2696; SSE41-NEXT: sarq $63, %rcx 2697; SSE41-NEXT: pinsrb $10, %ecx, %xmm0 2698; SSE41-NEXT: movq %rax, %rcx 2699; SSE41-NEXT: shlq $52, %rcx 2700; SSE41-NEXT: sarq $63, %rcx 2701; SSE41-NEXT: pinsrb $11, %ecx, %xmm0 2702; SSE41-NEXT: movq %rax, %rcx 2703; SSE41-NEXT: shlq $51, %rcx 2704; SSE41-NEXT: sarq $63, %rcx 2705; SSE41-NEXT: pinsrb $12, %ecx, %xmm0 2706; SSE41-NEXT: movq %rax, %rcx 2707; SSE41-NEXT: shlq $50, %rcx 2708; SSE41-NEXT: sarq $63, %rcx 2709; SSE41-NEXT: pinsrb $13, %ecx, %xmm0 2710; SSE41-NEXT: movq %rax, %rcx 2711; SSE41-NEXT: shlq $49, %rcx 2712; SSE41-NEXT: sarq $63, %rcx 2713; SSE41-NEXT: pinsrb $14, %ecx, %xmm0 2714; SSE41-NEXT: shrq $15, %rax 2715; SSE41-NEXT: pinsrb $15, %eax, %xmm0 2716; SSE41-NEXT: retq 2717; 2718; AVX1-LABEL: load_sext_16i1_to_16i8: 2719; AVX1: # BB#0: # %entry 2720; AVX1-NEXT: movswq (%rdi), %rax 2721; AVX1-NEXT: movq %rax, %rcx 2722; AVX1-NEXT: shlq $62, %rcx 2723; AVX1-NEXT: sarq $63, %rcx 2724; AVX1-NEXT: movq %rax, %rdx 2725; AVX1-NEXT: shlq $63, %rdx 2726; AVX1-NEXT: sarq $63, %rdx 2727; AVX1-NEXT: vmovd %edx, %xmm0 2728; AVX1-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 2729; AVX1-NEXT: movq %rax, %rcx 2730; AVX1-NEXT: shlq $61, %rcx 2731; AVX1-NEXT: sarq $63, %rcx 2732; AVX1-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 2733; AVX1-NEXT: movq %rax, %rcx 2734; AVX1-NEXT: shlq $60, %rcx 2735; AVX1-NEXT: sarq $63, %rcx 2736; AVX1-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 2737; AVX1-NEXT: movq %rax, %rcx 2738; AVX1-NEXT: shlq $59, %rcx 2739; AVX1-NEXT: sarq $63, %rcx 2740; AVX1-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 2741; AVX1-NEXT: movq %rax, %rcx 2742; AVX1-NEXT: shlq $58, %rcx 2743; AVX1-NEXT: sarq $63, %rcx 2744; AVX1-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 2745; AVX1-NEXT: movq %rax, %rcx 2746; AVX1-NEXT: shlq $57, %rcx 2747; AVX1-NEXT: sarq $63, %rcx 2748; AVX1-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 2749; AVX1-NEXT: movsbq %al, %rcx 2750; AVX1-NEXT: shrq $7, %rcx 2751; AVX1-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0 2752; AVX1-NEXT: movq %rax, %rcx 2753; AVX1-NEXT: shlq $55, %rcx 2754; AVX1-NEXT: sarq $63, %rcx 2755; AVX1-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 2756; AVX1-NEXT: movq %rax, %rcx 2757; AVX1-NEXT: shlq $54, %rcx 2758; AVX1-NEXT: sarq $63, %rcx 2759; AVX1-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 2760; AVX1-NEXT: movq %rax, %rcx 2761; AVX1-NEXT: shlq $53, %rcx 2762; AVX1-NEXT: sarq $63, %rcx 2763; AVX1-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 2764; AVX1-NEXT: movq %rax, %rcx 2765; AVX1-NEXT: shlq $52, %rcx 2766; AVX1-NEXT: sarq $63, %rcx 2767; AVX1-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 2768; AVX1-NEXT: movq %rax, %rcx 2769; AVX1-NEXT: shlq $51, %rcx 2770; AVX1-NEXT: sarq $63, %rcx 2771; AVX1-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 2772; AVX1-NEXT: movq %rax, %rcx 2773; AVX1-NEXT: shlq $50, %rcx 2774; AVX1-NEXT: sarq $63, %rcx 2775; AVX1-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 2776; AVX1-NEXT: movq %rax, %rcx 2777; AVX1-NEXT: shlq $49, %rcx 2778; AVX1-NEXT: sarq $63, %rcx 2779; AVX1-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 2780; AVX1-NEXT: shrq $15, %rax 2781; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 2782; AVX1-NEXT: retq 2783; 2784; AVX2-LABEL: load_sext_16i1_to_16i8: 2785; AVX2: # BB#0: # %entry 2786; AVX2-NEXT: movswq (%rdi), %rax 2787; AVX2-NEXT: movq %rax, %rcx 2788; AVX2-NEXT: shlq $62, %rcx 2789; AVX2-NEXT: sarq $63, %rcx 2790; AVX2-NEXT: movq %rax, %rdx 2791; AVX2-NEXT: shlq $63, %rdx 2792; AVX2-NEXT: sarq $63, %rdx 2793; AVX2-NEXT: vmovd %edx, %xmm0 2794; AVX2-NEXT: vpinsrb $1, %ecx, %xmm0, %xmm0 2795; AVX2-NEXT: movq %rax, %rcx 2796; AVX2-NEXT: shlq $61, %rcx 2797; AVX2-NEXT: sarq $63, %rcx 2798; AVX2-NEXT: vpinsrb $2, %ecx, %xmm0, %xmm0 2799; AVX2-NEXT: movq %rax, %rcx 2800; AVX2-NEXT: shlq $60, %rcx 2801; AVX2-NEXT: sarq $63, %rcx 2802; AVX2-NEXT: vpinsrb $3, %ecx, %xmm0, %xmm0 2803; AVX2-NEXT: movq %rax, %rcx 2804; AVX2-NEXT: shlq $59, %rcx 2805; AVX2-NEXT: sarq $63, %rcx 2806; AVX2-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 2807; AVX2-NEXT: movq %rax, %rcx 2808; AVX2-NEXT: shlq $58, %rcx 2809; AVX2-NEXT: sarq $63, %rcx 2810; AVX2-NEXT: vpinsrb $5, %ecx, %xmm0, %xmm0 2811; AVX2-NEXT: movq %rax, %rcx 2812; AVX2-NEXT: shlq $57, %rcx 2813; AVX2-NEXT: sarq $63, %rcx 2814; AVX2-NEXT: vpinsrb $6, %ecx, %xmm0, %xmm0 2815; AVX2-NEXT: movsbq %al, %rcx 2816; AVX2-NEXT: shrq $7, %rcx 2817; AVX2-NEXT: vpinsrb $7, %ecx, %xmm0, %xmm0 2818; AVX2-NEXT: movq %rax, %rcx 2819; AVX2-NEXT: shlq $55, %rcx 2820; AVX2-NEXT: sarq $63, %rcx 2821; AVX2-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 2822; AVX2-NEXT: movq %rax, %rcx 2823; AVX2-NEXT: shlq $54, %rcx 2824; AVX2-NEXT: sarq $63, %rcx 2825; AVX2-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 2826; AVX2-NEXT: movq %rax, %rcx 2827; AVX2-NEXT: shlq $53, %rcx 2828; AVX2-NEXT: sarq $63, %rcx 2829; AVX2-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 2830; AVX2-NEXT: movq %rax, %rcx 2831; AVX2-NEXT: shlq $52, %rcx 2832; AVX2-NEXT: sarq $63, %rcx 2833; AVX2-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 2834; AVX2-NEXT: movq %rax, %rcx 2835; AVX2-NEXT: shlq $51, %rcx 2836; AVX2-NEXT: sarq $63, %rcx 2837; AVX2-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 2838; AVX2-NEXT: movq %rax, %rcx 2839; AVX2-NEXT: shlq $50, %rcx 2840; AVX2-NEXT: sarq $63, %rcx 2841; AVX2-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 2842; AVX2-NEXT: movq %rax, %rcx 2843; AVX2-NEXT: shlq $49, %rcx 2844; AVX2-NEXT: sarq $63, %rcx 2845; AVX2-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 2846; AVX2-NEXT: shrq $15, %rax 2847; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 2848; AVX2-NEXT: retq 2849; 2850; AVX512-LABEL: load_sext_16i1_to_16i8: 2851; AVX512: # BB#0: # %entry 2852; AVX512-NEXT: kmovw (%rdi), %k1 2853; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 2854; AVX512-NEXT: vpmovdb %zmm0, %xmm0 2855; AVX512-NEXT: retq 2856; 2857; X32-SSE41-LABEL: load_sext_16i1_to_16i8: 2858; X32-SSE41: # BB#0: # %entry 2859; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 2860; X32-SSE41-NEXT: movswl (%eax), %eax 2861; X32-SSE41-NEXT: movl %eax, %ecx 2862; X32-SSE41-NEXT: shll $30, %ecx 2863; X32-SSE41-NEXT: sarl $31, %ecx 2864; X32-SSE41-NEXT: movl %eax, %edx 2865; X32-SSE41-NEXT: shll $31, %edx 2866; X32-SSE41-NEXT: sarl $31, %edx 2867; X32-SSE41-NEXT: movd %edx, %xmm0 2868; X32-SSE41-NEXT: pinsrb $1, %ecx, %xmm0 2869; X32-SSE41-NEXT: movl %eax, %ecx 2870; X32-SSE41-NEXT: shll $29, %ecx 2871; X32-SSE41-NEXT: sarl $31, %ecx 2872; X32-SSE41-NEXT: pinsrb $2, %ecx, %xmm0 2873; X32-SSE41-NEXT: movl %eax, %ecx 2874; X32-SSE41-NEXT: shll $28, %ecx 2875; X32-SSE41-NEXT: sarl $31, %ecx 2876; X32-SSE41-NEXT: pinsrb $3, %ecx, %xmm0 2877; X32-SSE41-NEXT: movl %eax, %ecx 2878; X32-SSE41-NEXT: shll $27, %ecx 2879; X32-SSE41-NEXT: sarl $31, %ecx 2880; X32-SSE41-NEXT: pinsrb $4, %ecx, %xmm0 2881; X32-SSE41-NEXT: movl %eax, %ecx 2882; X32-SSE41-NEXT: shll $26, %ecx 2883; X32-SSE41-NEXT: sarl $31, %ecx 2884; X32-SSE41-NEXT: pinsrb $5, %ecx, %xmm0 2885; X32-SSE41-NEXT: movl %eax, %ecx 2886; X32-SSE41-NEXT: shll $25, %ecx 2887; X32-SSE41-NEXT: sarl $31, %ecx 2888; X32-SSE41-NEXT: pinsrb $6, %ecx, %xmm0 2889; X32-SSE41-NEXT: movsbl %al, %ecx 2890; X32-SSE41-NEXT: shrl $7, %ecx 2891; X32-SSE41-NEXT: pinsrb $7, %ecx, %xmm0 2892; X32-SSE41-NEXT: movl %eax, %ecx 2893; X32-SSE41-NEXT: shll $23, %ecx 2894; X32-SSE41-NEXT: sarl $31, %ecx 2895; X32-SSE41-NEXT: pinsrb $8, %ecx, %xmm0 2896; X32-SSE41-NEXT: movl %eax, %ecx 2897; X32-SSE41-NEXT: shll $22, %ecx 2898; X32-SSE41-NEXT: sarl $31, %ecx 2899; X32-SSE41-NEXT: pinsrb $9, %ecx, %xmm0 2900; X32-SSE41-NEXT: movl %eax, %ecx 2901; X32-SSE41-NEXT: shll $21, %ecx 2902; X32-SSE41-NEXT: sarl $31, %ecx 2903; X32-SSE41-NEXT: pinsrb $10, %ecx, %xmm0 2904; X32-SSE41-NEXT: movl %eax, %ecx 2905; X32-SSE41-NEXT: shll $20, %ecx 2906; X32-SSE41-NEXT: sarl $31, %ecx 2907; X32-SSE41-NEXT: pinsrb $11, %ecx, %xmm0 2908; X32-SSE41-NEXT: movl %eax, %ecx 2909; X32-SSE41-NEXT: shll $19, %ecx 2910; X32-SSE41-NEXT: sarl $31, %ecx 2911; X32-SSE41-NEXT: pinsrb $12, %ecx, %xmm0 2912; X32-SSE41-NEXT: movl %eax, %ecx 2913; X32-SSE41-NEXT: shll $18, %ecx 2914; X32-SSE41-NEXT: sarl $31, %ecx 2915; X32-SSE41-NEXT: pinsrb $13, %ecx, %xmm0 2916; X32-SSE41-NEXT: movl %eax, %ecx 2917; X32-SSE41-NEXT: shll $17, %ecx 2918; X32-SSE41-NEXT: sarl $31, %ecx 2919; X32-SSE41-NEXT: pinsrb $14, %ecx, %xmm0 2920; X32-SSE41-NEXT: shrl $15, %eax 2921; X32-SSE41-NEXT: pinsrb $15, %eax, %xmm0 2922; X32-SSE41-NEXT: retl 2923entry: 2924 %X = load <16 x i1>, <16 x i1>* %ptr 2925 %Y = sext <16 x i1> %X to <16 x i8> 2926 ret <16 x i8> %Y 2927} 2928 2929define <16 x i16> @load_sext_16i1_to_16i16(<16 x i1> *%ptr) { 2930; SSE2-LABEL: load_sext_16i1_to_16i16: 2931; SSE2: # BB#0: # %entry 2932; SSE2-NEXT: movzwl (%rdi), %eax 2933; SSE2-NEXT: movl %eax, %ecx 2934; SSE2-NEXT: shrl $14, %ecx 2935; SSE2-NEXT: andl $1, %ecx 2936; SSE2-NEXT: movd %ecx, %xmm0 2937; SSE2-NEXT: movl %eax, %ecx 2938; SSE2-NEXT: shrl $6, %ecx 2939; SSE2-NEXT: andl $1, %ecx 2940; SSE2-NEXT: movd %ecx, %xmm1 2941; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2942; SSE2-NEXT: movl %eax, %ecx 2943; SSE2-NEXT: shrl $10, %ecx 2944; SSE2-NEXT: andl $1, %ecx 2945; SSE2-NEXT: movd %ecx, %xmm0 2946; SSE2-NEXT: movl %eax, %ecx 2947; SSE2-NEXT: shrl $2, %ecx 2948; SSE2-NEXT: andl $1, %ecx 2949; SSE2-NEXT: movd %ecx, %xmm2 2950; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2951; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 2952; SSE2-NEXT: movl %eax, %ecx 2953; SSE2-NEXT: shrl $12, %ecx 2954; SSE2-NEXT: andl $1, %ecx 2955; SSE2-NEXT: movd %ecx, %xmm0 2956; SSE2-NEXT: movl %eax, %ecx 2957; SSE2-NEXT: shrl $4, %ecx 2958; SSE2-NEXT: andl $1, %ecx 2959; SSE2-NEXT: movd %ecx, %xmm3 2960; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 2961; SSE2-NEXT: movl %eax, %ecx 2962; SSE2-NEXT: andl $1, %ecx 2963; SSE2-NEXT: movd %ecx, %xmm1 2964; SSE2-NEXT: movl %eax, %ecx 2965; SSE2-NEXT: shrl $8, %ecx 2966; SSE2-NEXT: andl $1, %ecx 2967; SSE2-NEXT: movd %ecx, %xmm0 2968; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 2969; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 2970; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 2971; SSE2-NEXT: movl %eax, %ecx 2972; SSE2-NEXT: shrl $13, %ecx 2973; SSE2-NEXT: andl $1, %ecx 2974; SSE2-NEXT: movd %ecx, %xmm0 2975; SSE2-NEXT: movl %eax, %ecx 2976; SSE2-NEXT: shrl $5, %ecx 2977; SSE2-NEXT: andl $1, %ecx 2978; SSE2-NEXT: movd %ecx, %xmm2 2979; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 2980; SSE2-NEXT: movl %eax, %ecx 2981; SSE2-NEXT: shrl $9, %ecx 2982; SSE2-NEXT: andl $1, %ecx 2983; SSE2-NEXT: movd %ecx, %xmm3 2984; SSE2-NEXT: movl %eax, %ecx 2985; SSE2-NEXT: shrl %ecx 2986; SSE2-NEXT: andl $1, %ecx 2987; SSE2-NEXT: movd %ecx, %xmm0 2988; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 2989; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 2990; SSE2-NEXT: movl %eax, %ecx 2991; SSE2-NEXT: shrl $11, %ecx 2992; SSE2-NEXT: andl $1, %ecx 2993; SSE2-NEXT: movd %ecx, %xmm2 2994; SSE2-NEXT: movl %eax, %ecx 2995; SSE2-NEXT: shrl $3, %ecx 2996; SSE2-NEXT: andl $1, %ecx 2997; SSE2-NEXT: movd %ecx, %xmm3 2998; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 2999; SSE2-NEXT: movl %eax, %ecx 3000; SSE2-NEXT: shrl $7, %ecx 3001; SSE2-NEXT: andl $1, %ecx 3002; SSE2-NEXT: movd %ecx, %xmm2 3003; SSE2-NEXT: shrl $15, %eax 3004; SSE2-NEXT: movzwl %ax, %eax 3005; SSE2-NEXT: movd %eax, %xmm4 3006; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] 3007; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 3008; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 3009; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 3010; SSE2-NEXT: movdqa %xmm1, %xmm0 3011; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3012; SSE2-NEXT: psllw $15, %xmm0 3013; SSE2-NEXT: psraw $15, %xmm0 3014; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 3015; SSE2-NEXT: psllw $15, %xmm1 3016; SSE2-NEXT: psraw $15, %xmm1 3017; SSE2-NEXT: retq 3018; 3019; SSSE3-LABEL: load_sext_16i1_to_16i16: 3020; SSSE3: # BB#0: # %entry 3021; SSSE3-NEXT: movzwl (%rdi), %eax 3022; SSSE3-NEXT: movl %eax, %ecx 3023; SSSE3-NEXT: shrl $14, %ecx 3024; SSSE3-NEXT: andl $1, %ecx 3025; SSSE3-NEXT: movd %ecx, %xmm0 3026; SSSE3-NEXT: movl %eax, %ecx 3027; SSSE3-NEXT: shrl $6, %ecx 3028; SSSE3-NEXT: andl $1, %ecx 3029; SSSE3-NEXT: movd %ecx, %xmm1 3030; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 3031; SSSE3-NEXT: movl %eax, %ecx 3032; SSSE3-NEXT: shrl $10, %ecx 3033; SSSE3-NEXT: andl $1, %ecx 3034; SSSE3-NEXT: movd %ecx, %xmm0 3035; SSSE3-NEXT: movl %eax, %ecx 3036; SSSE3-NEXT: shrl $2, %ecx 3037; SSSE3-NEXT: andl $1, %ecx 3038; SSSE3-NEXT: movd %ecx, %xmm2 3039; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 3040; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 3041; SSSE3-NEXT: movl %eax, %ecx 3042; SSSE3-NEXT: shrl $12, %ecx 3043; SSSE3-NEXT: andl $1, %ecx 3044; SSSE3-NEXT: movd %ecx, %xmm0 3045; SSSE3-NEXT: movl %eax, %ecx 3046; SSSE3-NEXT: shrl $4, %ecx 3047; SSSE3-NEXT: andl $1, %ecx 3048; SSSE3-NEXT: movd %ecx, %xmm3 3049; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 3050; SSSE3-NEXT: movl %eax, %ecx 3051; SSSE3-NEXT: andl $1, %ecx 3052; SSSE3-NEXT: movd %ecx, %xmm1 3053; SSSE3-NEXT: movl %eax, %ecx 3054; SSSE3-NEXT: shrl $8, %ecx 3055; SSSE3-NEXT: andl $1, %ecx 3056; SSSE3-NEXT: movd %ecx, %xmm0 3057; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 3058; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 3059; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 3060; SSSE3-NEXT: movl %eax, %ecx 3061; SSSE3-NEXT: shrl $13, %ecx 3062; SSSE3-NEXT: andl $1, %ecx 3063; SSSE3-NEXT: movd %ecx, %xmm0 3064; SSSE3-NEXT: movl %eax, %ecx 3065; SSSE3-NEXT: shrl $5, %ecx 3066; SSSE3-NEXT: andl $1, %ecx 3067; SSSE3-NEXT: movd %ecx, %xmm2 3068; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 3069; SSSE3-NEXT: movl %eax, %ecx 3070; SSSE3-NEXT: shrl $9, %ecx 3071; SSSE3-NEXT: andl $1, %ecx 3072; SSSE3-NEXT: movd %ecx, %xmm3 3073; SSSE3-NEXT: movl %eax, %ecx 3074; SSSE3-NEXT: shrl %ecx 3075; SSSE3-NEXT: andl $1, %ecx 3076; SSSE3-NEXT: movd %ecx, %xmm0 3077; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 3078; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 3079; SSSE3-NEXT: movl %eax, %ecx 3080; SSSE3-NEXT: shrl $11, %ecx 3081; SSSE3-NEXT: andl $1, %ecx 3082; SSSE3-NEXT: movd %ecx, %xmm2 3083; SSSE3-NEXT: movl %eax, %ecx 3084; SSSE3-NEXT: shrl $3, %ecx 3085; SSSE3-NEXT: andl $1, %ecx 3086; SSSE3-NEXT: movd %ecx, %xmm3 3087; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 3088; SSSE3-NEXT: movl %eax, %ecx 3089; SSSE3-NEXT: shrl $7, %ecx 3090; SSSE3-NEXT: andl $1, %ecx 3091; SSSE3-NEXT: movd %ecx, %xmm2 3092; SSSE3-NEXT: shrl $15, %eax 3093; SSSE3-NEXT: movzwl %ax, %eax 3094; SSSE3-NEXT: movd %eax, %xmm4 3095; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] 3096; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 3097; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1],xmm0[2],xmm3[2],xmm0[3],xmm3[3],xmm0[4],xmm3[4],xmm0[5],xmm3[5],xmm0[6],xmm3[6],xmm0[7],xmm3[7] 3098; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 3099; SSSE3-NEXT: movdqa %xmm1, %xmm0 3100; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 3101; SSSE3-NEXT: psllw $15, %xmm0 3102; SSSE3-NEXT: psraw $15, %xmm0 3103; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm0[8],xmm1[9],xmm0[9],xmm1[10],xmm0[10],xmm1[11],xmm0[11],xmm1[12],xmm0[12],xmm1[13],xmm0[13],xmm1[14],xmm0[14],xmm1[15],xmm0[15] 3104; SSSE3-NEXT: psllw $15, %xmm1 3105; SSSE3-NEXT: psraw $15, %xmm1 3106; SSSE3-NEXT: retq 3107; 3108; SSE41-LABEL: load_sext_16i1_to_16i16: 3109; SSE41: # BB#0: # %entry 3110; SSE41-NEXT: movzwl (%rdi), %eax 3111; SSE41-NEXT: movl %eax, %ecx 3112; SSE41-NEXT: shrl %ecx 3113; SSE41-NEXT: andl $1, %ecx 3114; SSE41-NEXT: movl %eax, %edx 3115; SSE41-NEXT: andl $1, %edx 3116; SSE41-NEXT: movd %edx, %xmm1 3117; SSE41-NEXT: pinsrb $1, %ecx, %xmm1 3118; SSE41-NEXT: movl %eax, %ecx 3119; SSE41-NEXT: shrl $2, %ecx 3120; SSE41-NEXT: andl $1, %ecx 3121; SSE41-NEXT: pinsrb $2, %ecx, %xmm1 3122; SSE41-NEXT: movl %eax, %ecx 3123; SSE41-NEXT: shrl $3, %ecx 3124; SSE41-NEXT: andl $1, %ecx 3125; SSE41-NEXT: pinsrb $3, %ecx, %xmm1 3126; SSE41-NEXT: movl %eax, %ecx 3127; SSE41-NEXT: shrl $4, %ecx 3128; SSE41-NEXT: andl $1, %ecx 3129; SSE41-NEXT: pinsrb $4, %ecx, %xmm1 3130; SSE41-NEXT: movl %eax, %ecx 3131; SSE41-NEXT: shrl $5, %ecx 3132; SSE41-NEXT: andl $1, %ecx 3133; SSE41-NEXT: pinsrb $5, %ecx, %xmm1 3134; SSE41-NEXT: movl %eax, %ecx 3135; SSE41-NEXT: shrl $6, %ecx 3136; SSE41-NEXT: andl $1, %ecx 3137; SSE41-NEXT: pinsrb $6, %ecx, %xmm1 3138; SSE41-NEXT: movl %eax, %ecx 3139; SSE41-NEXT: shrl $7, %ecx 3140; SSE41-NEXT: andl $1, %ecx 3141; SSE41-NEXT: pinsrb $7, %ecx, %xmm1 3142; SSE41-NEXT: movl %eax, %ecx 3143; SSE41-NEXT: shrl $8, %ecx 3144; SSE41-NEXT: andl $1, %ecx 3145; SSE41-NEXT: pinsrb $8, %ecx, %xmm1 3146; SSE41-NEXT: movl %eax, %ecx 3147; SSE41-NEXT: shrl $9, %ecx 3148; SSE41-NEXT: andl $1, %ecx 3149; SSE41-NEXT: pinsrb $9, %ecx, %xmm1 3150; SSE41-NEXT: movl %eax, %ecx 3151; SSE41-NEXT: shrl $10, %ecx 3152; SSE41-NEXT: andl $1, %ecx 3153; SSE41-NEXT: pinsrb $10, %ecx, %xmm1 3154; SSE41-NEXT: movl %eax, %ecx 3155; SSE41-NEXT: shrl $11, %ecx 3156; SSE41-NEXT: andl $1, %ecx 3157; SSE41-NEXT: pinsrb $11, %ecx, %xmm1 3158; SSE41-NEXT: movl %eax, %ecx 3159; SSE41-NEXT: shrl $12, %ecx 3160; SSE41-NEXT: andl $1, %ecx 3161; SSE41-NEXT: pinsrb $12, %ecx, %xmm1 3162; SSE41-NEXT: movl %eax, %ecx 3163; SSE41-NEXT: shrl $13, %ecx 3164; SSE41-NEXT: andl $1, %ecx 3165; SSE41-NEXT: pinsrb $13, %ecx, %xmm1 3166; SSE41-NEXT: movl %eax, %ecx 3167; SSE41-NEXT: shrl $14, %ecx 3168; SSE41-NEXT: andl $1, %ecx 3169; SSE41-NEXT: pinsrb $14, %ecx, %xmm1 3170; SSE41-NEXT: shrl $15, %eax 3171; SSE41-NEXT: movzwl %ax, %eax 3172; SSE41-NEXT: pinsrb $15, %eax, %xmm1 3173; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 3174; SSE41-NEXT: psllw $15, %xmm0 3175; SSE41-NEXT: psraw $15, %xmm0 3176; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 3177; SSE41-NEXT: psllw $15, %xmm1 3178; SSE41-NEXT: psraw $15, %xmm1 3179; SSE41-NEXT: retq 3180; 3181; AVX1-LABEL: load_sext_16i1_to_16i16: 3182; AVX1: # BB#0: # %entry 3183; AVX1-NEXT: pushq %rbp 3184; AVX1-NEXT: .Lcfi0: 3185; AVX1-NEXT: .cfi_def_cfa_offset 16 3186; AVX1-NEXT: pushq %r15 3187; AVX1-NEXT: .Lcfi1: 3188; AVX1-NEXT: .cfi_def_cfa_offset 24 3189; AVX1-NEXT: pushq %r14 3190; AVX1-NEXT: .Lcfi2: 3191; AVX1-NEXT: .cfi_def_cfa_offset 32 3192; AVX1-NEXT: pushq %r13 3193; AVX1-NEXT: .Lcfi3: 3194; AVX1-NEXT: .cfi_def_cfa_offset 40 3195; AVX1-NEXT: pushq %r12 3196; AVX1-NEXT: .Lcfi4: 3197; AVX1-NEXT: .cfi_def_cfa_offset 48 3198; AVX1-NEXT: pushq %rbx 3199; AVX1-NEXT: .Lcfi5: 3200; AVX1-NEXT: .cfi_def_cfa_offset 56 3201; AVX1-NEXT: .Lcfi6: 3202; AVX1-NEXT: .cfi_offset %rbx, -56 3203; AVX1-NEXT: .Lcfi7: 3204; AVX1-NEXT: .cfi_offset %r12, -48 3205; AVX1-NEXT: .Lcfi8: 3206; AVX1-NEXT: .cfi_offset %r13, -40 3207; AVX1-NEXT: .Lcfi9: 3208; AVX1-NEXT: .cfi_offset %r14, -32 3209; AVX1-NEXT: .Lcfi10: 3210; AVX1-NEXT: .cfi_offset %r15, -24 3211; AVX1-NEXT: .Lcfi11: 3212; AVX1-NEXT: .cfi_offset %rbp, -16 3213; AVX1-NEXT: movswq (%rdi), %rax 3214; AVX1-NEXT: movq %rax, %rcx 3215; AVX1-NEXT: shlq $55, %rcx 3216; AVX1-NEXT: sarq $63, %rcx 3217; AVX1-NEXT: vmovd %ecx, %xmm0 3218; AVX1-NEXT: movq %rax, %r8 3219; AVX1-NEXT: movq %rax, %r10 3220; AVX1-NEXT: movq %rax, %r11 3221; AVX1-NEXT: movq %rax, %r14 3222; AVX1-NEXT: movq %rax, %r15 3223; AVX1-NEXT: movq %rax, %r9 3224; AVX1-NEXT: movq %rax, %r12 3225; AVX1-NEXT: movq %rax, %r13 3226; AVX1-NEXT: movq %rax, %rbx 3227; AVX1-NEXT: movq %rax, %rdi 3228; AVX1-NEXT: movq %rax, %rcx 3229; AVX1-NEXT: movq %rax, %rdx 3230; AVX1-NEXT: movq %rax, %rsi 3231; AVX1-NEXT: movsbq %al, %rbp 3232; AVX1-NEXT: shlq $54, %rax 3233; AVX1-NEXT: sarq $63, %rax 3234; AVX1-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 3235; AVX1-NEXT: shlq $53, %r8 3236; AVX1-NEXT: sarq $63, %r8 3237; AVX1-NEXT: vpinsrw $2, %r8d, %xmm0, %xmm0 3238; AVX1-NEXT: shlq $52, %r10 3239; AVX1-NEXT: sarq $63, %r10 3240; AVX1-NEXT: vpinsrw $3, %r10d, %xmm0, %xmm0 3241; AVX1-NEXT: shlq $51, %r11 3242; AVX1-NEXT: sarq $63, %r11 3243; AVX1-NEXT: vpinsrw $4, %r11d, %xmm0, %xmm0 3244; AVX1-NEXT: shlq $50, %r14 3245; AVX1-NEXT: sarq $63, %r14 3246; AVX1-NEXT: vpinsrw $5, %r14d, %xmm0, %xmm0 3247; AVX1-NEXT: shlq $49, %r15 3248; AVX1-NEXT: sarq $63, %r15 3249; AVX1-NEXT: vpinsrw $6, %r15d, %xmm0, %xmm0 3250; AVX1-NEXT: shrq $15, %r9 3251; AVX1-NEXT: vpinsrw $7, %r9d, %xmm0, %xmm0 3252; AVX1-NEXT: shlq $63, %r13 3253; AVX1-NEXT: sarq $63, %r13 3254; AVX1-NEXT: vmovd %r13d, %xmm1 3255; AVX1-NEXT: shlq $62, %r12 3256; AVX1-NEXT: sarq $63, %r12 3257; AVX1-NEXT: vpinsrw $1, %r12d, %xmm1, %xmm1 3258; AVX1-NEXT: shlq $61, %rbx 3259; AVX1-NEXT: sarq $63, %rbx 3260; AVX1-NEXT: vpinsrw $2, %ebx, %xmm1, %xmm1 3261; AVX1-NEXT: shlq $60, %rdi 3262; AVX1-NEXT: sarq $63, %rdi 3263; AVX1-NEXT: vpinsrw $3, %edi, %xmm1, %xmm1 3264; AVX1-NEXT: shlq $59, %rcx 3265; AVX1-NEXT: sarq $63, %rcx 3266; AVX1-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1 3267; AVX1-NEXT: shlq $58, %rdx 3268; AVX1-NEXT: sarq $63, %rdx 3269; AVX1-NEXT: vpinsrw $5, %edx, %xmm1, %xmm1 3270; AVX1-NEXT: shlq $57, %rsi 3271; AVX1-NEXT: sarq $63, %rsi 3272; AVX1-NEXT: vpinsrw $6, %esi, %xmm1, %xmm1 3273; AVX1-NEXT: shrq $7, %rbp 3274; AVX1-NEXT: vpinsrw $7, %ebp, %xmm1, %xmm1 3275; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 3276; AVX1-NEXT: popq %rbx 3277; AVX1-NEXT: popq %r12 3278; AVX1-NEXT: popq %r13 3279; AVX1-NEXT: popq %r14 3280; AVX1-NEXT: popq %r15 3281; AVX1-NEXT: popq %rbp 3282; AVX1-NEXT: retq 3283; 3284; AVX2-LABEL: load_sext_16i1_to_16i16: 3285; AVX2: # BB#0: # %entry 3286; AVX2-NEXT: pushq %rbp 3287; AVX2-NEXT: .Lcfi0: 3288; AVX2-NEXT: .cfi_def_cfa_offset 16 3289; AVX2-NEXT: pushq %r15 3290; AVX2-NEXT: .Lcfi1: 3291; AVX2-NEXT: .cfi_def_cfa_offset 24 3292; AVX2-NEXT: pushq %r14 3293; AVX2-NEXT: .Lcfi2: 3294; AVX2-NEXT: .cfi_def_cfa_offset 32 3295; AVX2-NEXT: pushq %r13 3296; AVX2-NEXT: .Lcfi3: 3297; AVX2-NEXT: .cfi_def_cfa_offset 40 3298; AVX2-NEXT: pushq %r12 3299; AVX2-NEXT: .Lcfi4: 3300; AVX2-NEXT: .cfi_def_cfa_offset 48 3301; AVX2-NEXT: pushq %rbx 3302; AVX2-NEXT: .Lcfi5: 3303; AVX2-NEXT: .cfi_def_cfa_offset 56 3304; AVX2-NEXT: .Lcfi6: 3305; AVX2-NEXT: .cfi_offset %rbx, -56 3306; AVX2-NEXT: .Lcfi7: 3307; AVX2-NEXT: .cfi_offset %r12, -48 3308; AVX2-NEXT: .Lcfi8: 3309; AVX2-NEXT: .cfi_offset %r13, -40 3310; AVX2-NEXT: .Lcfi9: 3311; AVX2-NEXT: .cfi_offset %r14, -32 3312; AVX2-NEXT: .Lcfi10: 3313; AVX2-NEXT: .cfi_offset %r15, -24 3314; AVX2-NEXT: .Lcfi11: 3315; AVX2-NEXT: .cfi_offset %rbp, -16 3316; AVX2-NEXT: movswq (%rdi), %rax 3317; AVX2-NEXT: movq %rax, %rcx 3318; AVX2-NEXT: shlq $55, %rcx 3319; AVX2-NEXT: sarq $63, %rcx 3320; AVX2-NEXT: vmovd %ecx, %xmm0 3321; AVX2-NEXT: movq %rax, %r8 3322; AVX2-NEXT: movq %rax, %r10 3323; AVX2-NEXT: movq %rax, %r11 3324; AVX2-NEXT: movq %rax, %r14 3325; AVX2-NEXT: movq %rax, %r15 3326; AVX2-NEXT: movq %rax, %r9 3327; AVX2-NEXT: movq %rax, %r12 3328; AVX2-NEXT: movq %rax, %r13 3329; AVX2-NEXT: movq %rax, %rbx 3330; AVX2-NEXT: movq %rax, %rdi 3331; AVX2-NEXT: movq %rax, %rcx 3332; AVX2-NEXT: movq %rax, %rdx 3333; AVX2-NEXT: movq %rax, %rsi 3334; AVX2-NEXT: movsbq %al, %rbp 3335; AVX2-NEXT: shlq $54, %rax 3336; AVX2-NEXT: sarq $63, %rax 3337; AVX2-NEXT: vpinsrw $1, %eax, %xmm0, %xmm0 3338; AVX2-NEXT: shlq $53, %r8 3339; AVX2-NEXT: sarq $63, %r8 3340; AVX2-NEXT: vpinsrw $2, %r8d, %xmm0, %xmm0 3341; AVX2-NEXT: shlq $52, %r10 3342; AVX2-NEXT: sarq $63, %r10 3343; AVX2-NEXT: vpinsrw $3, %r10d, %xmm0, %xmm0 3344; AVX2-NEXT: shlq $51, %r11 3345; AVX2-NEXT: sarq $63, %r11 3346; AVX2-NEXT: vpinsrw $4, %r11d, %xmm0, %xmm0 3347; AVX2-NEXT: shlq $50, %r14 3348; AVX2-NEXT: sarq $63, %r14 3349; AVX2-NEXT: vpinsrw $5, %r14d, %xmm0, %xmm0 3350; AVX2-NEXT: shlq $49, %r15 3351; AVX2-NEXT: sarq $63, %r15 3352; AVX2-NEXT: vpinsrw $6, %r15d, %xmm0, %xmm0 3353; AVX2-NEXT: shrq $15, %r9 3354; AVX2-NEXT: vpinsrw $7, %r9d, %xmm0, %xmm0 3355; AVX2-NEXT: shlq $63, %r13 3356; AVX2-NEXT: sarq $63, %r13 3357; AVX2-NEXT: vmovd %r13d, %xmm1 3358; AVX2-NEXT: shlq $62, %r12 3359; AVX2-NEXT: sarq $63, %r12 3360; AVX2-NEXT: vpinsrw $1, %r12d, %xmm1, %xmm1 3361; AVX2-NEXT: shlq $61, %rbx 3362; AVX2-NEXT: sarq $63, %rbx 3363; AVX2-NEXT: vpinsrw $2, %ebx, %xmm1, %xmm1 3364; AVX2-NEXT: shlq $60, %rdi 3365; AVX2-NEXT: sarq $63, %rdi 3366; AVX2-NEXT: vpinsrw $3, %edi, %xmm1, %xmm1 3367; AVX2-NEXT: shlq $59, %rcx 3368; AVX2-NEXT: sarq $63, %rcx 3369; AVX2-NEXT: vpinsrw $4, %ecx, %xmm1, %xmm1 3370; AVX2-NEXT: shlq $58, %rdx 3371; AVX2-NEXT: sarq $63, %rdx 3372; AVX2-NEXT: vpinsrw $5, %edx, %xmm1, %xmm1 3373; AVX2-NEXT: shlq $57, %rsi 3374; AVX2-NEXT: sarq $63, %rsi 3375; AVX2-NEXT: vpinsrw $6, %esi, %xmm1, %xmm1 3376; AVX2-NEXT: shrq $7, %rbp 3377; AVX2-NEXT: vpinsrw $7, %ebp, %xmm1, %xmm1 3378; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 3379; AVX2-NEXT: popq %rbx 3380; AVX2-NEXT: popq %r12 3381; AVX2-NEXT: popq %r13 3382; AVX2-NEXT: popq %r14 3383; AVX2-NEXT: popq %r15 3384; AVX2-NEXT: popq %rbp 3385; AVX2-NEXT: retq 3386; 3387; AVX512-LABEL: load_sext_16i1_to_16i16: 3388; AVX512: # BB#0: # %entry 3389; AVX512-NEXT: kmovw (%rdi), %k1 3390; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 3391; AVX512-NEXT: vpmovdw %zmm0, %ymm0 3392; AVX512-NEXT: retq 3393; 3394; X32-SSE41-LABEL: load_sext_16i1_to_16i16: 3395; X32-SSE41: # BB#0: # %entry 3396; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 3397; X32-SSE41-NEXT: movzwl (%eax), %eax 3398; X32-SSE41-NEXT: movl %eax, %ecx 3399; X32-SSE41-NEXT: shrl %ecx 3400; X32-SSE41-NEXT: andl $1, %ecx 3401; X32-SSE41-NEXT: movl %eax, %edx 3402; X32-SSE41-NEXT: andl $1, %edx 3403; X32-SSE41-NEXT: movd %edx, %xmm1 3404; X32-SSE41-NEXT: pinsrb $1, %ecx, %xmm1 3405; X32-SSE41-NEXT: movl %eax, %ecx 3406; X32-SSE41-NEXT: shrl $2, %ecx 3407; X32-SSE41-NEXT: andl $1, %ecx 3408; X32-SSE41-NEXT: pinsrb $2, %ecx, %xmm1 3409; X32-SSE41-NEXT: movl %eax, %ecx 3410; X32-SSE41-NEXT: shrl $3, %ecx 3411; X32-SSE41-NEXT: andl $1, %ecx 3412; X32-SSE41-NEXT: pinsrb $3, %ecx, %xmm1 3413; X32-SSE41-NEXT: movl %eax, %ecx 3414; X32-SSE41-NEXT: shrl $4, %ecx 3415; X32-SSE41-NEXT: andl $1, %ecx 3416; X32-SSE41-NEXT: pinsrb $4, %ecx, %xmm1 3417; X32-SSE41-NEXT: movl %eax, %ecx 3418; X32-SSE41-NEXT: shrl $5, %ecx 3419; X32-SSE41-NEXT: andl $1, %ecx 3420; X32-SSE41-NEXT: pinsrb $5, %ecx, %xmm1 3421; X32-SSE41-NEXT: movl %eax, %ecx 3422; X32-SSE41-NEXT: shrl $6, %ecx 3423; X32-SSE41-NEXT: andl $1, %ecx 3424; X32-SSE41-NEXT: pinsrb $6, %ecx, %xmm1 3425; X32-SSE41-NEXT: movl %eax, %ecx 3426; X32-SSE41-NEXT: shrl $7, %ecx 3427; X32-SSE41-NEXT: andl $1, %ecx 3428; X32-SSE41-NEXT: pinsrb $7, %ecx, %xmm1 3429; X32-SSE41-NEXT: movl %eax, %ecx 3430; X32-SSE41-NEXT: shrl $8, %ecx 3431; X32-SSE41-NEXT: andl $1, %ecx 3432; X32-SSE41-NEXT: pinsrb $8, %ecx, %xmm1 3433; X32-SSE41-NEXT: movl %eax, %ecx 3434; X32-SSE41-NEXT: shrl $9, %ecx 3435; X32-SSE41-NEXT: andl $1, %ecx 3436; X32-SSE41-NEXT: pinsrb $9, %ecx, %xmm1 3437; X32-SSE41-NEXT: movl %eax, %ecx 3438; X32-SSE41-NEXT: shrl $10, %ecx 3439; X32-SSE41-NEXT: andl $1, %ecx 3440; X32-SSE41-NEXT: pinsrb $10, %ecx, %xmm1 3441; X32-SSE41-NEXT: movl %eax, %ecx 3442; X32-SSE41-NEXT: shrl $11, %ecx 3443; X32-SSE41-NEXT: andl $1, %ecx 3444; X32-SSE41-NEXT: pinsrb $11, %ecx, %xmm1 3445; X32-SSE41-NEXT: movl %eax, %ecx 3446; X32-SSE41-NEXT: shrl $12, %ecx 3447; X32-SSE41-NEXT: andl $1, %ecx 3448; X32-SSE41-NEXT: pinsrb $12, %ecx, %xmm1 3449; X32-SSE41-NEXT: movl %eax, %ecx 3450; X32-SSE41-NEXT: shrl $13, %ecx 3451; X32-SSE41-NEXT: andl $1, %ecx 3452; X32-SSE41-NEXT: pinsrb $13, %ecx, %xmm1 3453; X32-SSE41-NEXT: movl %eax, %ecx 3454; X32-SSE41-NEXT: shrl $14, %ecx 3455; X32-SSE41-NEXT: andl $1, %ecx 3456; X32-SSE41-NEXT: pinsrb $14, %ecx, %xmm1 3457; X32-SSE41-NEXT: shrl $15, %eax 3458; X32-SSE41-NEXT: pinsrb $15, %eax, %xmm1 3459; X32-SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 3460; X32-SSE41-NEXT: psllw $15, %xmm0 3461; X32-SSE41-NEXT: psraw $15, %xmm0 3462; X32-SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8,8,9,9,10,10,11,11,12,12,13,13,14,14,15,15] 3463; X32-SSE41-NEXT: psllw $15, %xmm1 3464; X32-SSE41-NEXT: psraw $15, %xmm1 3465; X32-SSE41-NEXT: retl 3466entry: 3467 %X = load <16 x i1>, <16 x i1>* %ptr 3468 %Y = sext <16 x i1> %X to <16 x i16> 3469 ret <16 x i16> %Y 3470} 3471 3472define <32 x i8> @load_sext_32i1_to_32i8(<32 x i1> *%ptr) nounwind readnone { 3473; SSE2-LABEL: load_sext_32i1_to_32i8: 3474; SSE2: # BB#0: # %entry 3475; SSE2-NEXT: pushq %rbp 3476; SSE2-NEXT: pushq %r15 3477; SSE2-NEXT: pushq %r14 3478; SSE2-NEXT: pushq %r13 3479; SSE2-NEXT: pushq %r12 3480; SSE2-NEXT: pushq %rbx 3481; SSE2-NEXT: movswq (%rdi), %rbx 3482; SSE2-NEXT: movq %rbx, %r10 3483; SSE2-NEXT: movq %rbx, %r8 3484; SSE2-NEXT: movq %rbx, %r9 3485; SSE2-NEXT: movq %rbx, %r11 3486; SSE2-NEXT: movq %rbx, %r14 3487; SSE2-NEXT: movq %rbx, %r15 3488; SSE2-NEXT: movq %rbx, %r12 3489; SSE2-NEXT: movq %rbx, %r13 3490; SSE2-NEXT: movq %rbx, %rdx 3491; SSE2-NEXT: movq %rbx, %rsi 3492; SSE2-NEXT: movq %rbx, %rcx 3493; SSE2-NEXT: movq %rbx, %rbp 3494; SSE2-NEXT: movq %rbx, %rax 3495; SSE2-NEXT: shlq $49, %rax 3496; SSE2-NEXT: sarq $63, %rax 3497; SSE2-NEXT: movd %eax, %xmm0 3498; SSE2-NEXT: movq %rbx, %rax 3499; SSE2-NEXT: shlq $57, %r10 3500; SSE2-NEXT: sarq $63, %r10 3501; SSE2-NEXT: movd %r10d, %xmm15 3502; SSE2-NEXT: movq %rbx, %r10 3503; SSE2-NEXT: movsbq %bl, %rbx 3504; SSE2-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3],xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7] 3505; SSE2-NEXT: shlq $53, %r8 3506; SSE2-NEXT: sarq $63, %r8 3507; SSE2-NEXT: movd %r8d, %xmm8 3508; SSE2-NEXT: shlq $61, %r9 3509; SSE2-NEXT: sarq $63, %r9 3510; SSE2-NEXT: movd %r9d, %xmm2 3511; SSE2-NEXT: shlq $51, %r11 3512; SSE2-NEXT: sarq $63, %r11 3513; SSE2-NEXT: movd %r11d, %xmm9 3514; SSE2-NEXT: shlq $59, %r14 3515; SSE2-NEXT: sarq $63, %r14 3516; SSE2-NEXT: movd %r14d, %xmm5 3517; SSE2-NEXT: shlq $55, %r15 3518; SSE2-NEXT: sarq $63, %r15 3519; SSE2-NEXT: movd %r15d, %xmm10 3520; SSE2-NEXT: shlq $63, %r12 3521; SSE2-NEXT: sarq $63, %r12 3522; SSE2-NEXT: movd %r12d, %xmm0 3523; SSE2-NEXT: shlq $50, %r13 3524; SSE2-NEXT: sarq $63, %r13 3525; SSE2-NEXT: movd %r13d, %xmm11 3526; SSE2-NEXT: shlq $58, %rdx 3527; SSE2-NEXT: sarq $63, %rdx 3528; SSE2-NEXT: movd %edx, %xmm4 3529; SSE2-NEXT: shlq $54, %rsi 3530; SSE2-NEXT: sarq $63, %rsi 3531; SSE2-NEXT: movd %esi, %xmm12 3532; SSE2-NEXT: shlq $62, %rcx 3533; SSE2-NEXT: sarq $63, %rcx 3534; SSE2-NEXT: movd %ecx, %xmm6 3535; SSE2-NEXT: shlq $52, %rbp 3536; SSE2-NEXT: sarq $63, %rbp 3537; SSE2-NEXT: movd %ebp, %xmm13 3538; SSE2-NEXT: shlq $60, %rax 3539; SSE2-NEXT: sarq $63, %rax 3540; SSE2-NEXT: movd %eax, %xmm7 3541; SSE2-NEXT: shrq $15, %r10 3542; SSE2-NEXT: movd %r10d, %xmm14 3543; SSE2-NEXT: shrq $7, %rbx 3544; SSE2-NEXT: movd %ebx, %xmm3 3545; SSE2-NEXT: movswq 2(%rdi), %rdx 3546; SSE2-NEXT: movq %rdx, %r8 3547; SSE2-NEXT: movq %rdx, %r9 3548; SSE2-NEXT: movq %rdx, %r10 3549; SSE2-NEXT: movq %rdx, %r11 3550; SSE2-NEXT: movq %rdx, %r14 3551; SSE2-NEXT: movq %rdx, %r15 3552; SSE2-NEXT: movq %rdx, %r12 3553; SSE2-NEXT: movq %rdx, %r13 3554; SSE2-NEXT: movq %rdx, %rbx 3555; SSE2-NEXT: movq %rdx, %rax 3556; SSE2-NEXT: movq %rdx, %rcx 3557; SSE2-NEXT: movq %rdx, %rsi 3558; SSE2-NEXT: movq %rdx, %rdi 3559; SSE2-NEXT: movq %rdx, %rbp 3560; SSE2-NEXT: shlq $49, %rbp 3561; SSE2-NEXT: sarq $63, %rbp 3562; SSE2-NEXT: movd %ebp, %xmm1 3563; SSE2-NEXT: movq %rdx, %rbp 3564; SSE2-NEXT: movsbq %dl, %rdx 3565; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7] 3566; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm15[0],xmm2[1],xmm15[1],xmm2[2],xmm15[2],xmm2[3],xmm15[3],xmm2[4],xmm15[4],xmm2[5],xmm15[5],xmm2[6],xmm15[6],xmm2[7],xmm15[7] 3567; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm9[0],xmm5[1],xmm9[1],xmm5[2],xmm9[2],xmm5[3],xmm9[3],xmm5[4],xmm9[4],xmm5[5],xmm9[5],xmm5[6],xmm9[6],xmm5[7],xmm9[7] 3568; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1],xmm0[2],xmm10[2],xmm0[3],xmm10[3],xmm0[4],xmm10[4],xmm0[5],xmm10[5],xmm0[6],xmm10[6],xmm0[7],xmm10[7] 3569; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7] 3570; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 3571; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm11[0],xmm4[1],xmm11[1],xmm4[2],xmm11[2],xmm4[3],xmm11[3],xmm4[4],xmm11[4],xmm4[5],xmm11[5],xmm4[6],xmm11[6],xmm4[7],xmm11[7] 3572; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3],xmm6[4],xmm12[4],xmm6[5],xmm12[5],xmm6[6],xmm12[6],xmm6[7],xmm12[7] 3573; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7] 3574; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7] 3575; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm14[0],xmm3[1],xmm14[1],xmm3[2],xmm14[2],xmm3[3],xmm14[3],xmm3[4],xmm14[4],xmm3[5],xmm14[5],xmm3[6],xmm14[6],xmm3[7],xmm14[7] 3576; SSE2-NEXT: shlq $57, %r8 3577; SSE2-NEXT: sarq $63, %r8 3578; SSE2-NEXT: movd %r8d, %xmm2 3579; SSE2-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3],xmm7[4],xmm3[4],xmm7[5],xmm3[5],xmm7[6],xmm3[6],xmm7[7],xmm3[7] 3580; SSE2-NEXT: shlq $53, %r9 3581; SSE2-NEXT: sarq $63, %r9 3582; SSE2-NEXT: movd %r9d, %xmm3 3583; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7] 3584; SSE2-NEXT: shlq $61, %r10 3585; SSE2-NEXT: sarq $63, %r10 3586; SSE2-NEXT: movd %r10d, %xmm4 3587; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7] 3588; SSE2-NEXT: shlq $51, %r11 3589; SSE2-NEXT: sarq $63, %r11 3590; SSE2-NEXT: movd %r11d, %xmm5 3591; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 3592; SSE2-NEXT: shlq $59, %r14 3593; SSE2-NEXT: sarq $63, %r14 3594; SSE2-NEXT: movd %r14d, %xmm6 3595; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 3596; SSE2-NEXT: shlq $55, %r15 3597; SSE2-NEXT: sarq $63, %r15 3598; SSE2-NEXT: movd %r15d, %xmm3 3599; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] 3600; SSE2-NEXT: shlq $63, %r12 3601; SSE2-NEXT: sarq $63, %r12 3602; SSE2-NEXT: movd %r12d, %xmm1 3603; SSE2-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7] 3604; SSE2-NEXT: shlq $50, %r13 3605; SSE2-NEXT: sarq $63, %r13 3606; SSE2-NEXT: movd %r13d, %xmm2 3607; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 3608; SSE2-NEXT: shlq $58, %rbx 3609; SSE2-NEXT: sarq $63, %rbx 3610; SSE2-NEXT: movd %ebx, %xmm3 3611; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7] 3612; SSE2-NEXT: shlq $54, %rax 3613; SSE2-NEXT: sarq $63, %rax 3614; SSE2-NEXT: movd %eax, %xmm5 3615; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 3616; SSE2-NEXT: shlq $62, %rcx 3617; SSE2-NEXT: sarq $63, %rcx 3618; SSE2-NEXT: movd %ecx, %xmm4 3619; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 3620; SSE2-NEXT: shlq $52, %rsi 3621; SSE2-NEXT: sarq $63, %rsi 3622; SSE2-NEXT: movd %esi, %xmm2 3623; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7] 3624; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 3625; SSE2-NEXT: shlq $60, %rdi 3626; SSE2-NEXT: sarq $63, %rdi 3627; SSE2-NEXT: movd %edi, %xmm3 3628; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 3629; SSE2-NEXT: shrq $15, %rbp 3630; SSE2-NEXT: movd %ebp, %xmm2 3631; SSE2-NEXT: shrq $7, %rdx 3632; SSE2-NEXT: movd %edx, %xmm5 3633; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3],xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7] 3634; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7] 3635; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 3636; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 3637; SSE2-NEXT: popq %rbx 3638; SSE2-NEXT: popq %r12 3639; SSE2-NEXT: popq %r13 3640; SSE2-NEXT: popq %r14 3641; SSE2-NEXT: popq %r15 3642; SSE2-NEXT: popq %rbp 3643; SSE2-NEXT: retq 3644; 3645; SSSE3-LABEL: load_sext_32i1_to_32i8: 3646; SSSE3: # BB#0: # %entry 3647; SSSE3-NEXT: pushq %rbp 3648; SSSE3-NEXT: pushq %r15 3649; SSSE3-NEXT: pushq %r14 3650; SSSE3-NEXT: pushq %r13 3651; SSSE3-NEXT: pushq %r12 3652; SSSE3-NEXT: pushq %rbx 3653; SSSE3-NEXT: movswq (%rdi), %rbx 3654; SSSE3-NEXT: movq %rbx, %r10 3655; SSSE3-NEXT: movq %rbx, %r8 3656; SSSE3-NEXT: movq %rbx, %r9 3657; SSSE3-NEXT: movq %rbx, %r11 3658; SSSE3-NEXT: movq %rbx, %r14 3659; SSSE3-NEXT: movq %rbx, %r15 3660; SSSE3-NEXT: movq %rbx, %r12 3661; SSSE3-NEXT: movq %rbx, %r13 3662; SSSE3-NEXT: movq %rbx, %rdx 3663; SSSE3-NEXT: movq %rbx, %rsi 3664; SSSE3-NEXT: movq %rbx, %rcx 3665; SSSE3-NEXT: movq %rbx, %rbp 3666; SSSE3-NEXT: movq %rbx, %rax 3667; SSSE3-NEXT: shlq $49, %rax 3668; SSSE3-NEXT: sarq $63, %rax 3669; SSSE3-NEXT: movd %eax, %xmm0 3670; SSSE3-NEXT: movq %rbx, %rax 3671; SSSE3-NEXT: shlq $57, %r10 3672; SSSE3-NEXT: sarq $63, %r10 3673; SSSE3-NEXT: movd %r10d, %xmm15 3674; SSSE3-NEXT: movq %rbx, %r10 3675; SSSE3-NEXT: movsbq %bl, %rbx 3676; SSSE3-NEXT: punpcklbw {{.*#+}} xmm15 = xmm15[0],xmm0[0],xmm15[1],xmm0[1],xmm15[2],xmm0[2],xmm15[3],xmm0[3],xmm15[4],xmm0[4],xmm15[5],xmm0[5],xmm15[6],xmm0[6],xmm15[7],xmm0[7] 3677; SSSE3-NEXT: shlq $53, %r8 3678; SSSE3-NEXT: sarq $63, %r8 3679; SSSE3-NEXT: movd %r8d, %xmm8 3680; SSSE3-NEXT: shlq $61, %r9 3681; SSSE3-NEXT: sarq $63, %r9 3682; SSSE3-NEXT: movd %r9d, %xmm2 3683; SSSE3-NEXT: shlq $51, %r11 3684; SSSE3-NEXT: sarq $63, %r11 3685; SSSE3-NEXT: movd %r11d, %xmm9 3686; SSSE3-NEXT: shlq $59, %r14 3687; SSSE3-NEXT: sarq $63, %r14 3688; SSSE3-NEXT: movd %r14d, %xmm5 3689; SSSE3-NEXT: shlq $55, %r15 3690; SSSE3-NEXT: sarq $63, %r15 3691; SSSE3-NEXT: movd %r15d, %xmm10 3692; SSSE3-NEXT: shlq $63, %r12 3693; SSSE3-NEXT: sarq $63, %r12 3694; SSSE3-NEXT: movd %r12d, %xmm0 3695; SSSE3-NEXT: shlq $50, %r13 3696; SSSE3-NEXT: sarq $63, %r13 3697; SSSE3-NEXT: movd %r13d, %xmm11 3698; SSSE3-NEXT: shlq $58, %rdx 3699; SSSE3-NEXT: sarq $63, %rdx 3700; SSSE3-NEXT: movd %edx, %xmm4 3701; SSSE3-NEXT: shlq $54, %rsi 3702; SSSE3-NEXT: sarq $63, %rsi 3703; SSSE3-NEXT: movd %esi, %xmm12 3704; SSSE3-NEXT: shlq $62, %rcx 3705; SSSE3-NEXT: sarq $63, %rcx 3706; SSSE3-NEXT: movd %ecx, %xmm6 3707; SSSE3-NEXT: shlq $52, %rbp 3708; SSSE3-NEXT: sarq $63, %rbp 3709; SSSE3-NEXT: movd %ebp, %xmm13 3710; SSSE3-NEXT: shlq $60, %rax 3711; SSSE3-NEXT: sarq $63, %rax 3712; SSSE3-NEXT: movd %eax, %xmm7 3713; SSSE3-NEXT: shrq $15, %r10 3714; SSSE3-NEXT: movd %r10d, %xmm14 3715; SSSE3-NEXT: shrq $7, %rbx 3716; SSSE3-NEXT: movd %ebx, %xmm3 3717; SSSE3-NEXT: movswq 2(%rdi), %rdx 3718; SSSE3-NEXT: movq %rdx, %r8 3719; SSSE3-NEXT: movq %rdx, %r9 3720; SSSE3-NEXT: movq %rdx, %r10 3721; SSSE3-NEXT: movq %rdx, %r11 3722; SSSE3-NEXT: movq %rdx, %r14 3723; SSSE3-NEXT: movq %rdx, %r15 3724; SSSE3-NEXT: movq %rdx, %r12 3725; SSSE3-NEXT: movq %rdx, %r13 3726; SSSE3-NEXT: movq %rdx, %rbx 3727; SSSE3-NEXT: movq %rdx, %rax 3728; SSSE3-NEXT: movq %rdx, %rcx 3729; SSSE3-NEXT: movq %rdx, %rsi 3730; SSSE3-NEXT: movq %rdx, %rdi 3731; SSSE3-NEXT: movq %rdx, %rbp 3732; SSSE3-NEXT: shlq $49, %rbp 3733; SSSE3-NEXT: sarq $63, %rbp 3734; SSSE3-NEXT: movd %ebp, %xmm1 3735; SSSE3-NEXT: movq %rdx, %rbp 3736; SSSE3-NEXT: movsbq %dl, %rdx 3737; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm8[0],xmm2[1],xmm8[1],xmm2[2],xmm8[2],xmm2[3],xmm8[3],xmm2[4],xmm8[4],xmm2[5],xmm8[5],xmm2[6],xmm8[6],xmm2[7],xmm8[7] 3738; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm15[0],xmm2[1],xmm15[1],xmm2[2],xmm15[2],xmm2[3],xmm15[3],xmm2[4],xmm15[4],xmm2[5],xmm15[5],xmm2[6],xmm15[6],xmm2[7],xmm15[7] 3739; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm9[0],xmm5[1],xmm9[1],xmm5[2],xmm9[2],xmm5[3],xmm9[3],xmm5[4],xmm9[4],xmm5[5],xmm9[5],xmm5[6],xmm9[6],xmm5[7],xmm9[7] 3740; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm10[0],xmm0[1],xmm10[1],xmm0[2],xmm10[2],xmm0[3],xmm10[3],xmm0[4],xmm10[4],xmm0[5],xmm10[5],xmm0[6],xmm10[6],xmm0[7],xmm10[7] 3741; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm5[0],xmm0[1],xmm5[1],xmm0[2],xmm5[2],xmm0[3],xmm5[3],xmm0[4],xmm5[4],xmm0[5],xmm5[5],xmm0[6],xmm5[6],xmm0[7],xmm5[7] 3742; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 3743; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm11[0],xmm4[1],xmm11[1],xmm4[2],xmm11[2],xmm4[3],xmm11[3],xmm4[4],xmm11[4],xmm4[5],xmm11[5],xmm4[6],xmm11[6],xmm4[7],xmm11[7] 3744; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm12[0],xmm6[1],xmm12[1],xmm6[2],xmm12[2],xmm6[3],xmm12[3],xmm6[4],xmm12[4],xmm6[5],xmm12[5],xmm6[6],xmm12[6],xmm6[7],xmm12[7] 3745; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm4[0],xmm6[1],xmm4[1],xmm6[2],xmm4[2],xmm6[3],xmm4[3],xmm6[4],xmm4[4],xmm6[5],xmm4[5],xmm6[6],xmm4[6],xmm6[7],xmm4[7] 3746; SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm13[0],xmm7[1],xmm13[1],xmm7[2],xmm13[2],xmm7[3],xmm13[3],xmm7[4],xmm13[4],xmm7[5],xmm13[5],xmm7[6],xmm13[6],xmm7[7],xmm13[7] 3747; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm14[0],xmm3[1],xmm14[1],xmm3[2],xmm14[2],xmm3[3],xmm14[3],xmm3[4],xmm14[4],xmm3[5],xmm14[5],xmm3[6],xmm14[6],xmm3[7],xmm14[7] 3748; SSSE3-NEXT: shlq $57, %r8 3749; SSSE3-NEXT: sarq $63, %r8 3750; SSSE3-NEXT: movd %r8d, %xmm2 3751; SSSE3-NEXT: punpcklbw {{.*#+}} xmm7 = xmm7[0],xmm3[0],xmm7[1],xmm3[1],xmm7[2],xmm3[2],xmm7[3],xmm3[3],xmm7[4],xmm3[4],xmm7[5],xmm3[5],xmm7[6],xmm3[6],xmm7[7],xmm3[7] 3752; SSSE3-NEXT: shlq $53, %r9 3753; SSSE3-NEXT: sarq $63, %r9 3754; SSSE3-NEXT: movd %r9d, %xmm3 3755; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm7[0],xmm6[1],xmm7[1],xmm6[2],xmm7[2],xmm6[3],xmm7[3],xmm6[4],xmm7[4],xmm6[5],xmm7[5],xmm6[6],xmm7[6],xmm6[7],xmm7[7] 3756; SSSE3-NEXT: shlq $61, %r10 3757; SSSE3-NEXT: sarq $63, %r10 3758; SSSE3-NEXT: movd %r10d, %xmm4 3759; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm6[0],xmm0[1],xmm6[1],xmm0[2],xmm6[2],xmm0[3],xmm6[3],xmm0[4],xmm6[4],xmm0[5],xmm6[5],xmm0[6],xmm6[6],xmm0[7],xmm6[7] 3760; SSSE3-NEXT: shlq $51, %r11 3761; SSSE3-NEXT: sarq $63, %r11 3762; SSSE3-NEXT: movd %r11d, %xmm5 3763; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3],xmm2[4],xmm1[4],xmm2[5],xmm1[5],xmm2[6],xmm1[6],xmm2[7],xmm1[7] 3764; SSSE3-NEXT: shlq $59, %r14 3765; SSSE3-NEXT: sarq $63, %r14 3766; SSSE3-NEXT: movd %r14d, %xmm6 3767; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 3768; SSSE3-NEXT: shlq $55, %r15 3769; SSSE3-NEXT: sarq $63, %r15 3770; SSSE3-NEXT: movd %r15d, %xmm3 3771; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm2[0],xmm4[1],xmm2[1],xmm4[2],xmm2[2],xmm4[3],xmm2[3],xmm4[4],xmm2[4],xmm4[5],xmm2[5],xmm4[6],xmm2[6],xmm4[7],xmm2[7] 3772; SSSE3-NEXT: shlq $63, %r12 3773; SSSE3-NEXT: sarq $63, %r12 3774; SSSE3-NEXT: movd %r12d, %xmm1 3775; SSSE3-NEXT: punpcklbw {{.*#+}} xmm6 = xmm6[0],xmm5[0],xmm6[1],xmm5[1],xmm6[2],xmm5[2],xmm6[3],xmm5[3],xmm6[4],xmm5[4],xmm6[5],xmm5[5],xmm6[6],xmm5[6],xmm6[7],xmm5[7] 3776; SSSE3-NEXT: shlq $50, %r13 3777; SSSE3-NEXT: sarq $63, %r13 3778; SSSE3-NEXT: movd %r13d, %xmm2 3779; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3],xmm1[4],xmm3[4],xmm1[5],xmm3[5],xmm1[6],xmm3[6],xmm1[7],xmm3[7] 3780; SSSE3-NEXT: shlq $58, %rbx 3781; SSSE3-NEXT: sarq $63, %rbx 3782; SSSE3-NEXT: movd %ebx, %xmm3 3783; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7] 3784; SSSE3-NEXT: shlq $54, %rax 3785; SSSE3-NEXT: sarq $63, %rax 3786; SSSE3-NEXT: movd %eax, %xmm5 3787; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 3788; SSSE3-NEXT: shlq $62, %rcx 3789; SSSE3-NEXT: sarq $63, %rcx 3790; SSSE3-NEXT: movd %ecx, %xmm4 3791; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 3792; SSSE3-NEXT: shlq $52, %rsi 3793; SSSE3-NEXT: sarq $63, %rsi 3794; SSSE3-NEXT: movd %esi, %xmm2 3795; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm5[0],xmm4[1],xmm5[1],xmm4[2],xmm5[2],xmm4[3],xmm5[3],xmm4[4],xmm5[4],xmm4[5],xmm5[5],xmm4[6],xmm5[6],xmm4[7],xmm5[7] 3796; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 3797; SSSE3-NEXT: shlq $60, %rdi 3798; SSSE3-NEXT: sarq $63, %rdi 3799; SSSE3-NEXT: movd %edi, %xmm3 3800; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3],xmm3[4],xmm2[4],xmm3[5],xmm2[5],xmm3[6],xmm2[6],xmm3[7],xmm2[7] 3801; SSSE3-NEXT: shrq $15, %rbp 3802; SSSE3-NEXT: movd %ebp, %xmm2 3803; SSSE3-NEXT: shrq $7, %rdx 3804; SSSE3-NEXT: movd %edx, %xmm5 3805; SSSE3-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm2[0],xmm5[1],xmm2[1],xmm5[2],xmm2[2],xmm5[3],xmm2[3],xmm5[4],xmm2[4],xmm5[5],xmm2[5],xmm5[6],xmm2[6],xmm5[7],xmm2[7] 3806; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3],xmm3[4],xmm5[4],xmm3[5],xmm5[5],xmm3[6],xmm5[6],xmm3[7],xmm5[7] 3807; SSSE3-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3],xmm4[4],xmm3[4],xmm4[5],xmm3[5],xmm4[6],xmm3[6],xmm4[7],xmm3[7] 3808; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 3809; SSSE3-NEXT: popq %rbx 3810; SSSE3-NEXT: popq %r12 3811; SSSE3-NEXT: popq %r13 3812; SSSE3-NEXT: popq %r14 3813; SSSE3-NEXT: popq %r15 3814; SSSE3-NEXT: popq %rbp 3815; SSSE3-NEXT: retq 3816; 3817; SSE41-LABEL: load_sext_32i1_to_32i8: 3818; SSE41: # BB#0: # %entry 3819; SSE41-NEXT: movswq (%rdi), %rax 3820; SSE41-NEXT: movq %rax, %rcx 3821; SSE41-NEXT: shlq $62, %rcx 3822; SSE41-NEXT: sarq $63, %rcx 3823; SSE41-NEXT: movq %rax, %rdx 3824; SSE41-NEXT: shlq $63, %rdx 3825; SSE41-NEXT: sarq $63, %rdx 3826; SSE41-NEXT: movd %edx, %xmm0 3827; SSE41-NEXT: pinsrb $1, %ecx, %xmm0 3828; SSE41-NEXT: movq %rax, %rcx 3829; SSE41-NEXT: shlq $61, %rcx 3830; SSE41-NEXT: sarq $63, %rcx 3831; SSE41-NEXT: pinsrb $2, %ecx, %xmm0 3832; SSE41-NEXT: movq %rax, %rcx 3833; SSE41-NEXT: shlq $60, %rcx 3834; SSE41-NEXT: sarq $63, %rcx 3835; SSE41-NEXT: pinsrb $3, %ecx, %xmm0 3836; SSE41-NEXT: movq %rax, %rcx 3837; SSE41-NEXT: shlq $59, %rcx 3838; SSE41-NEXT: sarq $63, %rcx 3839; SSE41-NEXT: pinsrb $4, %ecx, %xmm0 3840; SSE41-NEXT: movq %rax, %rcx 3841; SSE41-NEXT: shlq $58, %rcx 3842; SSE41-NEXT: sarq $63, %rcx 3843; SSE41-NEXT: pinsrb $5, %ecx, %xmm0 3844; SSE41-NEXT: movq %rax, %rcx 3845; SSE41-NEXT: shlq $57, %rcx 3846; SSE41-NEXT: sarq $63, %rcx 3847; SSE41-NEXT: pinsrb $6, %ecx, %xmm0 3848; SSE41-NEXT: movsbq %al, %rcx 3849; SSE41-NEXT: shrq $7, %rcx 3850; SSE41-NEXT: pinsrb $7, %ecx, %xmm0 3851; SSE41-NEXT: movq %rax, %rcx 3852; SSE41-NEXT: shlq $55, %rcx 3853; SSE41-NEXT: sarq $63, %rcx 3854; SSE41-NEXT: pinsrb $8, %ecx, %xmm0 3855; SSE41-NEXT: movq %rax, %rcx 3856; SSE41-NEXT: shlq $54, %rcx 3857; SSE41-NEXT: sarq $63, %rcx 3858; SSE41-NEXT: pinsrb $9, %ecx, %xmm0 3859; SSE41-NEXT: movq %rax, %rcx 3860; SSE41-NEXT: shlq $53, %rcx 3861; SSE41-NEXT: sarq $63, %rcx 3862; SSE41-NEXT: pinsrb $10, %ecx, %xmm0 3863; SSE41-NEXT: movq %rax, %rcx 3864; SSE41-NEXT: shlq $52, %rcx 3865; SSE41-NEXT: sarq $63, %rcx 3866; SSE41-NEXT: pinsrb $11, %ecx, %xmm0 3867; SSE41-NEXT: movq %rax, %rcx 3868; SSE41-NEXT: shlq $51, %rcx 3869; SSE41-NEXT: sarq $63, %rcx 3870; SSE41-NEXT: pinsrb $12, %ecx, %xmm0 3871; SSE41-NEXT: movq %rax, %rcx 3872; SSE41-NEXT: shlq $50, %rcx 3873; SSE41-NEXT: sarq $63, %rcx 3874; SSE41-NEXT: pinsrb $13, %ecx, %xmm0 3875; SSE41-NEXT: movq %rax, %rcx 3876; SSE41-NEXT: shlq $49, %rcx 3877; SSE41-NEXT: sarq $63, %rcx 3878; SSE41-NEXT: pinsrb $14, %ecx, %xmm0 3879; SSE41-NEXT: shrq $15, %rax 3880; SSE41-NEXT: pinsrb $15, %eax, %xmm0 3881; SSE41-NEXT: movswq 2(%rdi), %rax 3882; SSE41-NEXT: movq %rax, %rcx 3883; SSE41-NEXT: shlq $62, %rcx 3884; SSE41-NEXT: sarq $63, %rcx 3885; SSE41-NEXT: movq %rax, %rdx 3886; SSE41-NEXT: shlq $63, %rdx 3887; SSE41-NEXT: sarq $63, %rdx 3888; SSE41-NEXT: movd %edx, %xmm1 3889; SSE41-NEXT: pinsrb $1, %ecx, %xmm1 3890; SSE41-NEXT: movq %rax, %rcx 3891; SSE41-NEXT: shlq $61, %rcx 3892; SSE41-NEXT: sarq $63, %rcx 3893; SSE41-NEXT: pinsrb $2, %ecx, %xmm1 3894; SSE41-NEXT: movq %rax, %rcx 3895; SSE41-NEXT: shlq $60, %rcx 3896; SSE41-NEXT: sarq $63, %rcx 3897; SSE41-NEXT: pinsrb $3, %ecx, %xmm1 3898; SSE41-NEXT: movq %rax, %rcx 3899; SSE41-NEXT: shlq $59, %rcx 3900; SSE41-NEXT: sarq $63, %rcx 3901; SSE41-NEXT: pinsrb $4, %ecx, %xmm1 3902; SSE41-NEXT: movq %rax, %rcx 3903; SSE41-NEXT: shlq $58, %rcx 3904; SSE41-NEXT: sarq $63, %rcx 3905; SSE41-NEXT: pinsrb $5, %ecx, %xmm1 3906; SSE41-NEXT: movq %rax, %rcx 3907; SSE41-NEXT: shlq $57, %rcx 3908; SSE41-NEXT: sarq $63, %rcx 3909; SSE41-NEXT: pinsrb $6, %ecx, %xmm1 3910; SSE41-NEXT: movsbq %al, %rcx 3911; SSE41-NEXT: shrq $7, %rcx 3912; SSE41-NEXT: pinsrb $7, %ecx, %xmm1 3913; SSE41-NEXT: movq %rax, %rcx 3914; SSE41-NEXT: shlq $55, %rcx 3915; SSE41-NEXT: sarq $63, %rcx 3916; SSE41-NEXT: pinsrb $8, %ecx, %xmm1 3917; SSE41-NEXT: movq %rax, %rcx 3918; SSE41-NEXT: shlq $54, %rcx 3919; SSE41-NEXT: sarq $63, %rcx 3920; SSE41-NEXT: pinsrb $9, %ecx, %xmm1 3921; SSE41-NEXT: movq %rax, %rcx 3922; SSE41-NEXT: shlq $53, %rcx 3923; SSE41-NEXT: sarq $63, %rcx 3924; SSE41-NEXT: pinsrb $10, %ecx, %xmm1 3925; SSE41-NEXT: movq %rax, %rcx 3926; SSE41-NEXT: shlq $52, %rcx 3927; SSE41-NEXT: sarq $63, %rcx 3928; SSE41-NEXT: pinsrb $11, %ecx, %xmm1 3929; SSE41-NEXT: movq %rax, %rcx 3930; SSE41-NEXT: shlq $51, %rcx 3931; SSE41-NEXT: sarq $63, %rcx 3932; SSE41-NEXT: pinsrb $12, %ecx, %xmm1 3933; SSE41-NEXT: movq %rax, %rcx 3934; SSE41-NEXT: shlq $50, %rcx 3935; SSE41-NEXT: sarq $63, %rcx 3936; SSE41-NEXT: pinsrb $13, %ecx, %xmm1 3937; SSE41-NEXT: movq %rax, %rcx 3938; SSE41-NEXT: shlq $49, %rcx 3939; SSE41-NEXT: sarq $63, %rcx 3940; SSE41-NEXT: pinsrb $14, %ecx, %xmm1 3941; SSE41-NEXT: shrq $15, %rax 3942; SSE41-NEXT: pinsrb $15, %eax, %xmm1 3943; SSE41-NEXT: retq 3944; 3945; AVX1-LABEL: load_sext_32i1_to_32i8: 3946; AVX1: # BB#0: # %entry 3947; AVX1-NEXT: pushq %rbp 3948; AVX1-NEXT: pushq %r15 3949; AVX1-NEXT: pushq %r14 3950; AVX1-NEXT: pushq %r13 3951; AVX1-NEXT: pushq %r12 3952; AVX1-NEXT: pushq %rbx 3953; AVX1-NEXT: movslq (%rdi), %rax 3954; AVX1-NEXT: movq %rax, %rcx 3955; AVX1-NEXT: shlq $47, %rcx 3956; AVX1-NEXT: sarq $63, %rcx 3957; AVX1-NEXT: vmovd %ecx, %xmm0 3958; AVX1-NEXT: movq %rax, %r8 3959; AVX1-NEXT: movq %rax, %rdx 3960; AVX1-NEXT: movq %rax, %rcx 3961; AVX1-NEXT: movq %rax, %rdi 3962; AVX1-NEXT: movq %rax, %r13 3963; AVX1-NEXT: movq %rax, %rsi 3964; AVX1-NEXT: movq %rax, %r10 3965; AVX1-NEXT: movq %rax, %r11 3966; AVX1-NEXT: movq %rax, %r9 3967; AVX1-NEXT: movq %rax, %rbx 3968; AVX1-NEXT: movq %rax, %r14 3969; AVX1-NEXT: movq %rax, %r15 3970; AVX1-NEXT: movq %rax, %r12 3971; AVX1-NEXT: movq %rax, %rbp 3972; AVX1-NEXT: shlq $46, %rbp 3973; AVX1-NEXT: sarq $63, %rbp 3974; AVX1-NEXT: vpinsrb $1, %ebp, %xmm0, %xmm0 3975; AVX1-NEXT: movq %rax, %rbp 3976; AVX1-NEXT: shlq $45, %r8 3977; AVX1-NEXT: sarq $63, %r8 3978; AVX1-NEXT: vpinsrb $2, %r8d, %xmm0, %xmm0 3979; AVX1-NEXT: movq %rax, %r8 3980; AVX1-NEXT: shlq $44, %rdx 3981; AVX1-NEXT: sarq $63, %rdx 3982; AVX1-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0 3983; AVX1-NEXT: movq %rax, %rdx 3984; AVX1-NEXT: shlq $43, %rcx 3985; AVX1-NEXT: sarq $63, %rcx 3986; AVX1-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 3987; AVX1-NEXT: movq %rax, %rcx 3988; AVX1-NEXT: shlq $42, %rdi 3989; AVX1-NEXT: sarq $63, %rdi 3990; AVX1-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0 3991; AVX1-NEXT: movq %rax, %rdi 3992; AVX1-NEXT: shlq $41, %r13 3993; AVX1-NEXT: sarq $63, %r13 3994; AVX1-NEXT: vpinsrb $6, %r13d, %xmm0, %xmm0 3995; AVX1-NEXT: movq %rax, %r13 3996; AVX1-NEXT: shlq $40, %rsi 3997; AVX1-NEXT: sarq $63, %rsi 3998; AVX1-NEXT: vpinsrb $7, %esi, %xmm0, %xmm0 3999; AVX1-NEXT: movq %rax, %rsi 4000; AVX1-NEXT: shlq $39, %r10 4001; AVX1-NEXT: sarq $63, %r10 4002; AVX1-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0 4003; AVX1-NEXT: movq %rax, %r10 4004; AVX1-NEXT: shlq $38, %r11 4005; AVX1-NEXT: sarq $63, %r11 4006; AVX1-NEXT: vpinsrb $9, %r11d, %xmm0, %xmm0 4007; AVX1-NEXT: movsbq %al, %r11 4008; AVX1-NEXT: shlq $37, %r9 4009; AVX1-NEXT: sarq $63, %r9 4010; AVX1-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0 4011; AVX1-NEXT: movq %rax, %r9 4012; AVX1-NEXT: shlq $36, %rbx 4013; AVX1-NEXT: sarq $63, %rbx 4014; AVX1-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0 4015; AVX1-NEXT: movq %rax, %rbx 4016; AVX1-NEXT: shlq $35, %r14 4017; AVX1-NEXT: sarq $63, %r14 4018; AVX1-NEXT: vpinsrb $12, %r14d, %xmm0, %xmm0 4019; AVX1-NEXT: movq %rax, %r14 4020; AVX1-NEXT: shlq $34, %r15 4021; AVX1-NEXT: sarq $63, %r15 4022; AVX1-NEXT: vpinsrb $13, %r15d, %xmm0, %xmm0 4023; AVX1-NEXT: movq %rax, %r15 4024; AVX1-NEXT: shlq $33, %r12 4025; AVX1-NEXT: sarq $63, %r12 4026; AVX1-NEXT: vpinsrb $14, %r12d, %xmm0, %xmm0 4027; AVX1-NEXT: movq %rax, %r12 4028; AVX1-NEXT: shrq $31, %rbp 4029; AVX1-NEXT: vpinsrb $15, %ebp, %xmm0, %xmm0 4030; AVX1-NEXT: movq %rax, %rbp 4031; AVX1-NEXT: shlq $63, %rdx 4032; AVX1-NEXT: sarq $63, %rdx 4033; AVX1-NEXT: vmovd %edx, %xmm1 4034; AVX1-NEXT: movq %rax, %rdx 4035; AVX1-NEXT: movswq %ax, %rax 4036; AVX1-NEXT: shlq $62, %r8 4037; AVX1-NEXT: sarq $63, %r8 4038; AVX1-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm1 4039; AVX1-NEXT: shlq $61, %rcx 4040; AVX1-NEXT: sarq $63, %rcx 4041; AVX1-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 4042; AVX1-NEXT: shlq $60, %rdi 4043; AVX1-NEXT: sarq $63, %rdi 4044; AVX1-NEXT: vpinsrb $3, %edi, %xmm1, %xmm1 4045; AVX1-NEXT: shlq $59, %r13 4046; AVX1-NEXT: sarq $63, %r13 4047; AVX1-NEXT: vpinsrb $4, %r13d, %xmm1, %xmm1 4048; AVX1-NEXT: shlq $58, %rsi 4049; AVX1-NEXT: sarq $63, %rsi 4050; AVX1-NEXT: vpinsrb $5, %esi, %xmm1, %xmm1 4051; AVX1-NEXT: shlq $57, %r10 4052; AVX1-NEXT: sarq $63, %r10 4053; AVX1-NEXT: vpinsrb $6, %r10d, %xmm1, %xmm1 4054; AVX1-NEXT: shrq $7, %r11 4055; AVX1-NEXT: vpinsrb $7, %r11d, %xmm1, %xmm1 4056; AVX1-NEXT: shlq $55, %r9 4057; AVX1-NEXT: sarq $63, %r9 4058; AVX1-NEXT: vpinsrb $8, %r9d, %xmm1, %xmm1 4059; AVX1-NEXT: shlq $54, %rbx 4060; AVX1-NEXT: sarq $63, %rbx 4061; AVX1-NEXT: vpinsrb $9, %ebx, %xmm1, %xmm1 4062; AVX1-NEXT: shlq $53, %r14 4063; AVX1-NEXT: sarq $63, %r14 4064; AVX1-NEXT: vpinsrb $10, %r14d, %xmm1, %xmm1 4065; AVX1-NEXT: shlq $52, %r15 4066; AVX1-NEXT: sarq $63, %r15 4067; AVX1-NEXT: vpinsrb $11, %r15d, %xmm1, %xmm1 4068; AVX1-NEXT: shlq $51, %r12 4069; AVX1-NEXT: sarq $63, %r12 4070; AVX1-NEXT: vpinsrb $12, %r12d, %xmm1, %xmm1 4071; AVX1-NEXT: shlq $50, %rbp 4072; AVX1-NEXT: sarq $63, %rbp 4073; AVX1-NEXT: vpinsrb $13, %ebp, %xmm1, %xmm1 4074; AVX1-NEXT: shlq $49, %rdx 4075; AVX1-NEXT: sarq $63, %rdx 4076; AVX1-NEXT: vpinsrb $14, %edx, %xmm1, %xmm1 4077; AVX1-NEXT: shrq $15, %rax 4078; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 4079; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4080; AVX1-NEXT: popq %rbx 4081; AVX1-NEXT: popq %r12 4082; AVX1-NEXT: popq %r13 4083; AVX1-NEXT: popq %r14 4084; AVX1-NEXT: popq %r15 4085; AVX1-NEXT: popq %rbp 4086; AVX1-NEXT: retq 4087; 4088; AVX2-LABEL: load_sext_32i1_to_32i8: 4089; AVX2: # BB#0: # %entry 4090; AVX2-NEXT: pushq %rbp 4091; AVX2-NEXT: pushq %r15 4092; AVX2-NEXT: pushq %r14 4093; AVX2-NEXT: pushq %r13 4094; AVX2-NEXT: pushq %r12 4095; AVX2-NEXT: pushq %rbx 4096; AVX2-NEXT: movslq (%rdi), %rax 4097; AVX2-NEXT: movq %rax, %rcx 4098; AVX2-NEXT: shlq $47, %rcx 4099; AVX2-NEXT: sarq $63, %rcx 4100; AVX2-NEXT: vmovd %ecx, %xmm0 4101; AVX2-NEXT: movq %rax, %r8 4102; AVX2-NEXT: movq %rax, %rdx 4103; AVX2-NEXT: movq %rax, %rcx 4104; AVX2-NEXT: movq %rax, %rdi 4105; AVX2-NEXT: movq %rax, %r13 4106; AVX2-NEXT: movq %rax, %rsi 4107; AVX2-NEXT: movq %rax, %r10 4108; AVX2-NEXT: movq %rax, %r11 4109; AVX2-NEXT: movq %rax, %r9 4110; AVX2-NEXT: movq %rax, %rbx 4111; AVX2-NEXT: movq %rax, %r14 4112; AVX2-NEXT: movq %rax, %r15 4113; AVX2-NEXT: movq %rax, %r12 4114; AVX2-NEXT: movq %rax, %rbp 4115; AVX2-NEXT: shlq $46, %rbp 4116; AVX2-NEXT: sarq $63, %rbp 4117; AVX2-NEXT: vpinsrb $1, %ebp, %xmm0, %xmm0 4118; AVX2-NEXT: movq %rax, %rbp 4119; AVX2-NEXT: shlq $45, %r8 4120; AVX2-NEXT: sarq $63, %r8 4121; AVX2-NEXT: vpinsrb $2, %r8d, %xmm0, %xmm0 4122; AVX2-NEXT: movq %rax, %r8 4123; AVX2-NEXT: shlq $44, %rdx 4124; AVX2-NEXT: sarq $63, %rdx 4125; AVX2-NEXT: vpinsrb $3, %edx, %xmm0, %xmm0 4126; AVX2-NEXT: movq %rax, %rdx 4127; AVX2-NEXT: shlq $43, %rcx 4128; AVX2-NEXT: sarq $63, %rcx 4129; AVX2-NEXT: vpinsrb $4, %ecx, %xmm0, %xmm0 4130; AVX2-NEXT: movq %rax, %rcx 4131; AVX2-NEXT: shlq $42, %rdi 4132; AVX2-NEXT: sarq $63, %rdi 4133; AVX2-NEXT: vpinsrb $5, %edi, %xmm0, %xmm0 4134; AVX2-NEXT: movq %rax, %rdi 4135; AVX2-NEXT: shlq $41, %r13 4136; AVX2-NEXT: sarq $63, %r13 4137; AVX2-NEXT: vpinsrb $6, %r13d, %xmm0, %xmm0 4138; AVX2-NEXT: movq %rax, %r13 4139; AVX2-NEXT: shlq $40, %rsi 4140; AVX2-NEXT: sarq $63, %rsi 4141; AVX2-NEXT: vpinsrb $7, %esi, %xmm0, %xmm0 4142; AVX2-NEXT: movq %rax, %rsi 4143; AVX2-NEXT: shlq $39, %r10 4144; AVX2-NEXT: sarq $63, %r10 4145; AVX2-NEXT: vpinsrb $8, %r10d, %xmm0, %xmm0 4146; AVX2-NEXT: movq %rax, %r10 4147; AVX2-NEXT: shlq $38, %r11 4148; AVX2-NEXT: sarq $63, %r11 4149; AVX2-NEXT: vpinsrb $9, %r11d, %xmm0, %xmm0 4150; AVX2-NEXT: movsbq %al, %r11 4151; AVX2-NEXT: shlq $37, %r9 4152; AVX2-NEXT: sarq $63, %r9 4153; AVX2-NEXT: vpinsrb $10, %r9d, %xmm0, %xmm0 4154; AVX2-NEXT: movq %rax, %r9 4155; AVX2-NEXT: shlq $36, %rbx 4156; AVX2-NEXT: sarq $63, %rbx 4157; AVX2-NEXT: vpinsrb $11, %ebx, %xmm0, %xmm0 4158; AVX2-NEXT: movq %rax, %rbx 4159; AVX2-NEXT: shlq $35, %r14 4160; AVX2-NEXT: sarq $63, %r14 4161; AVX2-NEXT: vpinsrb $12, %r14d, %xmm0, %xmm0 4162; AVX2-NEXT: movq %rax, %r14 4163; AVX2-NEXT: shlq $34, %r15 4164; AVX2-NEXT: sarq $63, %r15 4165; AVX2-NEXT: vpinsrb $13, %r15d, %xmm0, %xmm0 4166; AVX2-NEXT: movq %rax, %r15 4167; AVX2-NEXT: shlq $33, %r12 4168; AVX2-NEXT: sarq $63, %r12 4169; AVX2-NEXT: vpinsrb $14, %r12d, %xmm0, %xmm0 4170; AVX2-NEXT: movq %rax, %r12 4171; AVX2-NEXT: shrq $31, %rbp 4172; AVX2-NEXT: vpinsrb $15, %ebp, %xmm0, %xmm0 4173; AVX2-NEXT: movq %rax, %rbp 4174; AVX2-NEXT: shlq $63, %rdx 4175; AVX2-NEXT: sarq $63, %rdx 4176; AVX2-NEXT: vmovd %edx, %xmm1 4177; AVX2-NEXT: movq %rax, %rdx 4178; AVX2-NEXT: movswq %ax, %rax 4179; AVX2-NEXT: shlq $62, %r8 4180; AVX2-NEXT: sarq $63, %r8 4181; AVX2-NEXT: vpinsrb $1, %r8d, %xmm1, %xmm1 4182; AVX2-NEXT: shlq $61, %rcx 4183; AVX2-NEXT: sarq $63, %rcx 4184; AVX2-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 4185; AVX2-NEXT: shlq $60, %rdi 4186; AVX2-NEXT: sarq $63, %rdi 4187; AVX2-NEXT: vpinsrb $3, %edi, %xmm1, %xmm1 4188; AVX2-NEXT: shlq $59, %r13 4189; AVX2-NEXT: sarq $63, %r13 4190; AVX2-NEXT: vpinsrb $4, %r13d, %xmm1, %xmm1 4191; AVX2-NEXT: shlq $58, %rsi 4192; AVX2-NEXT: sarq $63, %rsi 4193; AVX2-NEXT: vpinsrb $5, %esi, %xmm1, %xmm1 4194; AVX2-NEXT: shlq $57, %r10 4195; AVX2-NEXT: sarq $63, %r10 4196; AVX2-NEXT: vpinsrb $6, %r10d, %xmm1, %xmm1 4197; AVX2-NEXT: shrq $7, %r11 4198; AVX2-NEXT: vpinsrb $7, %r11d, %xmm1, %xmm1 4199; AVX2-NEXT: shlq $55, %r9 4200; AVX2-NEXT: sarq $63, %r9 4201; AVX2-NEXT: vpinsrb $8, %r9d, %xmm1, %xmm1 4202; AVX2-NEXT: shlq $54, %rbx 4203; AVX2-NEXT: sarq $63, %rbx 4204; AVX2-NEXT: vpinsrb $9, %ebx, %xmm1, %xmm1 4205; AVX2-NEXT: shlq $53, %r14 4206; AVX2-NEXT: sarq $63, %r14 4207; AVX2-NEXT: vpinsrb $10, %r14d, %xmm1, %xmm1 4208; AVX2-NEXT: shlq $52, %r15 4209; AVX2-NEXT: sarq $63, %r15 4210; AVX2-NEXT: vpinsrb $11, %r15d, %xmm1, %xmm1 4211; AVX2-NEXT: shlq $51, %r12 4212; AVX2-NEXT: sarq $63, %r12 4213; AVX2-NEXT: vpinsrb $12, %r12d, %xmm1, %xmm1 4214; AVX2-NEXT: shlq $50, %rbp 4215; AVX2-NEXT: sarq $63, %rbp 4216; AVX2-NEXT: vpinsrb $13, %ebp, %xmm1, %xmm1 4217; AVX2-NEXT: shlq $49, %rdx 4218; AVX2-NEXT: sarq $63, %rdx 4219; AVX2-NEXT: vpinsrb $14, %edx, %xmm1, %xmm1 4220; AVX2-NEXT: shrq $15, %rax 4221; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 4222; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 4223; AVX2-NEXT: popq %rbx 4224; AVX2-NEXT: popq %r12 4225; AVX2-NEXT: popq %r13 4226; AVX2-NEXT: popq %r14 4227; AVX2-NEXT: popq %r15 4228; AVX2-NEXT: popq %rbp 4229; AVX2-NEXT: retq 4230; 4231; AVX512-LABEL: load_sext_32i1_to_32i8: 4232; AVX512: # BB#0: # %entry 4233; AVX512-NEXT: kmovw (%rdi), %k1 4234; AVX512-NEXT: kmovw 2(%rdi), %k2 4235; AVX512-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 {%k1} {z} 4236; AVX512-NEXT: vpmovdb %zmm0, %xmm0 4237; AVX512-NEXT: vpternlogd $255, %zmm1, %zmm1, %zmm1 {%k2} {z} 4238; AVX512-NEXT: vpmovdb %zmm1, %xmm1 4239; AVX512-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4240; AVX512-NEXT: retq 4241; 4242; X32-SSE41-LABEL: load_sext_32i1_to_32i8: 4243; X32-SSE41: # BB#0: # %entry 4244; X32-SSE41-NEXT: pushl %esi 4245; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 4246; X32-SSE41-NEXT: movswl (%eax), %ecx 4247; X32-SSE41-NEXT: movl %ecx, %edx 4248; X32-SSE41-NEXT: shll $30, %edx 4249; X32-SSE41-NEXT: sarl $31, %edx 4250; X32-SSE41-NEXT: movl %ecx, %esi 4251; X32-SSE41-NEXT: shll $31, %esi 4252; X32-SSE41-NEXT: sarl $31, %esi 4253; X32-SSE41-NEXT: movd %esi, %xmm0 4254; X32-SSE41-NEXT: pinsrb $1, %edx, %xmm0 4255; X32-SSE41-NEXT: movl %ecx, %edx 4256; X32-SSE41-NEXT: shll $29, %edx 4257; X32-SSE41-NEXT: sarl $31, %edx 4258; X32-SSE41-NEXT: pinsrb $2, %edx, %xmm0 4259; X32-SSE41-NEXT: movl %ecx, %edx 4260; X32-SSE41-NEXT: shll $28, %edx 4261; X32-SSE41-NEXT: sarl $31, %edx 4262; X32-SSE41-NEXT: pinsrb $3, %edx, %xmm0 4263; X32-SSE41-NEXT: movl %ecx, %edx 4264; X32-SSE41-NEXT: shll $27, %edx 4265; X32-SSE41-NEXT: sarl $31, %edx 4266; X32-SSE41-NEXT: pinsrb $4, %edx, %xmm0 4267; X32-SSE41-NEXT: movl %ecx, %edx 4268; X32-SSE41-NEXT: shll $26, %edx 4269; X32-SSE41-NEXT: sarl $31, %edx 4270; X32-SSE41-NEXT: pinsrb $5, %edx, %xmm0 4271; X32-SSE41-NEXT: movl %ecx, %edx 4272; X32-SSE41-NEXT: shll $25, %edx 4273; X32-SSE41-NEXT: sarl $31, %edx 4274; X32-SSE41-NEXT: pinsrb $6, %edx, %xmm0 4275; X32-SSE41-NEXT: movsbl %cl, %edx 4276; X32-SSE41-NEXT: shrl $7, %edx 4277; X32-SSE41-NEXT: pinsrb $7, %edx, %xmm0 4278; X32-SSE41-NEXT: movl %ecx, %edx 4279; X32-SSE41-NEXT: shll $23, %edx 4280; X32-SSE41-NEXT: sarl $31, %edx 4281; X32-SSE41-NEXT: pinsrb $8, %edx, %xmm0 4282; X32-SSE41-NEXT: movl %ecx, %edx 4283; X32-SSE41-NEXT: shll $22, %edx 4284; X32-SSE41-NEXT: sarl $31, %edx 4285; X32-SSE41-NEXT: pinsrb $9, %edx, %xmm0 4286; X32-SSE41-NEXT: movl %ecx, %edx 4287; X32-SSE41-NEXT: shll $21, %edx 4288; X32-SSE41-NEXT: sarl $31, %edx 4289; X32-SSE41-NEXT: pinsrb $10, %edx, %xmm0 4290; X32-SSE41-NEXT: movl %ecx, %edx 4291; X32-SSE41-NEXT: shll $20, %edx 4292; X32-SSE41-NEXT: sarl $31, %edx 4293; X32-SSE41-NEXT: pinsrb $11, %edx, %xmm0 4294; X32-SSE41-NEXT: movl %ecx, %edx 4295; X32-SSE41-NEXT: shll $19, %edx 4296; X32-SSE41-NEXT: sarl $31, %edx 4297; X32-SSE41-NEXT: pinsrb $12, %edx, %xmm0 4298; X32-SSE41-NEXT: movl %ecx, %edx 4299; X32-SSE41-NEXT: shll $18, %edx 4300; X32-SSE41-NEXT: sarl $31, %edx 4301; X32-SSE41-NEXT: pinsrb $13, %edx, %xmm0 4302; X32-SSE41-NEXT: movl %ecx, %edx 4303; X32-SSE41-NEXT: shll $17, %edx 4304; X32-SSE41-NEXT: sarl $31, %edx 4305; X32-SSE41-NEXT: pinsrb $14, %edx, %xmm0 4306; X32-SSE41-NEXT: shrl $15, %ecx 4307; X32-SSE41-NEXT: pinsrb $15, %ecx, %xmm0 4308; X32-SSE41-NEXT: movswl 2(%eax), %eax 4309; X32-SSE41-NEXT: movl %eax, %ecx 4310; X32-SSE41-NEXT: shll $30, %ecx 4311; X32-SSE41-NEXT: sarl $31, %ecx 4312; X32-SSE41-NEXT: movl %eax, %edx 4313; X32-SSE41-NEXT: shll $31, %edx 4314; X32-SSE41-NEXT: sarl $31, %edx 4315; X32-SSE41-NEXT: movd %edx, %xmm1 4316; X32-SSE41-NEXT: pinsrb $1, %ecx, %xmm1 4317; X32-SSE41-NEXT: movl %eax, %ecx 4318; X32-SSE41-NEXT: shll $29, %ecx 4319; X32-SSE41-NEXT: sarl $31, %ecx 4320; X32-SSE41-NEXT: pinsrb $2, %ecx, %xmm1 4321; X32-SSE41-NEXT: movl %eax, %ecx 4322; X32-SSE41-NEXT: shll $28, %ecx 4323; X32-SSE41-NEXT: sarl $31, %ecx 4324; X32-SSE41-NEXT: pinsrb $3, %ecx, %xmm1 4325; X32-SSE41-NEXT: movl %eax, %ecx 4326; X32-SSE41-NEXT: shll $27, %ecx 4327; X32-SSE41-NEXT: sarl $31, %ecx 4328; X32-SSE41-NEXT: pinsrb $4, %ecx, %xmm1 4329; X32-SSE41-NEXT: movl %eax, %ecx 4330; X32-SSE41-NEXT: shll $26, %ecx 4331; X32-SSE41-NEXT: sarl $31, %ecx 4332; X32-SSE41-NEXT: pinsrb $5, %ecx, %xmm1 4333; X32-SSE41-NEXT: movl %eax, %ecx 4334; X32-SSE41-NEXT: shll $25, %ecx 4335; X32-SSE41-NEXT: sarl $31, %ecx 4336; X32-SSE41-NEXT: pinsrb $6, %ecx, %xmm1 4337; X32-SSE41-NEXT: movsbl %al, %ecx 4338; X32-SSE41-NEXT: shrl $7, %ecx 4339; X32-SSE41-NEXT: pinsrb $7, %ecx, %xmm1 4340; X32-SSE41-NEXT: movl %eax, %ecx 4341; X32-SSE41-NEXT: shll $23, %ecx 4342; X32-SSE41-NEXT: sarl $31, %ecx 4343; X32-SSE41-NEXT: pinsrb $8, %ecx, %xmm1 4344; X32-SSE41-NEXT: movl %eax, %ecx 4345; X32-SSE41-NEXT: shll $22, %ecx 4346; X32-SSE41-NEXT: sarl $31, %ecx 4347; X32-SSE41-NEXT: pinsrb $9, %ecx, %xmm1 4348; X32-SSE41-NEXT: movl %eax, %ecx 4349; X32-SSE41-NEXT: shll $21, %ecx 4350; X32-SSE41-NEXT: sarl $31, %ecx 4351; X32-SSE41-NEXT: pinsrb $10, %ecx, %xmm1 4352; X32-SSE41-NEXT: movl %eax, %ecx 4353; X32-SSE41-NEXT: shll $20, %ecx 4354; X32-SSE41-NEXT: sarl $31, %ecx 4355; X32-SSE41-NEXT: pinsrb $11, %ecx, %xmm1 4356; X32-SSE41-NEXT: movl %eax, %ecx 4357; X32-SSE41-NEXT: shll $19, %ecx 4358; X32-SSE41-NEXT: sarl $31, %ecx 4359; X32-SSE41-NEXT: pinsrb $12, %ecx, %xmm1 4360; X32-SSE41-NEXT: movl %eax, %ecx 4361; X32-SSE41-NEXT: shll $18, %ecx 4362; X32-SSE41-NEXT: sarl $31, %ecx 4363; X32-SSE41-NEXT: pinsrb $13, %ecx, %xmm1 4364; X32-SSE41-NEXT: movl %eax, %ecx 4365; X32-SSE41-NEXT: shll $17, %ecx 4366; X32-SSE41-NEXT: sarl $31, %ecx 4367; X32-SSE41-NEXT: pinsrb $14, %ecx, %xmm1 4368; X32-SSE41-NEXT: shrl $15, %eax 4369; X32-SSE41-NEXT: pinsrb $15, %eax, %xmm1 4370; X32-SSE41-NEXT: popl %esi 4371; X32-SSE41-NEXT: retl 4372entry: 4373 %X = load <32 x i1>, <32 x i1>* %ptr 4374 %Y = sext <32 x i1> %X to <32 x i8> 4375 ret <32 x i8> %Y 4376} 4377 4378define <16 x i16> @load_sext_16i8_to_16i16(<16 x i8> *%ptr) { 4379; SSE2-LABEL: load_sext_16i8_to_16i16: 4380; SSE2: # BB#0: # %entry 4381; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 4382; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 4383; SSE2-NEXT: psraw $8, %xmm0 4384; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 4385; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 4386; SSE2-NEXT: psraw $8, %xmm1 4387; SSE2-NEXT: retq 4388; 4389; SSSE3-LABEL: load_sext_16i8_to_16i16: 4390; SSSE3: # BB#0: # %entry 4391; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 4392; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 4393; SSSE3-NEXT: psraw $8, %xmm0 4394; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 4395; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 4396; SSSE3-NEXT: psraw $8, %xmm1 4397; SSSE3-NEXT: retq 4398; 4399; SSE41-LABEL: load_sext_16i8_to_16i16: 4400; SSE41: # BB#0: # %entry 4401; SSE41-NEXT: pmovsxbw (%rdi), %xmm0 4402; SSE41-NEXT: pmovsxbw 8(%rdi), %xmm1 4403; SSE41-NEXT: retq 4404; 4405; AVX1-LABEL: load_sext_16i8_to_16i16: 4406; AVX1: # BB#0: # %entry 4407; AVX1-NEXT: vpmovsxbw (%rdi), %xmm0 4408; AVX1-NEXT: vpmovsxbw 8(%rdi), %xmm1 4409; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4410; AVX1-NEXT: retq 4411; 4412; AVX2-LABEL: load_sext_16i8_to_16i16: 4413; AVX2: # BB#0: # %entry 4414; AVX2-NEXT: vpmovsxbw (%rdi), %ymm0 4415; AVX2-NEXT: retq 4416; 4417; AVX512-LABEL: load_sext_16i8_to_16i16: 4418; AVX512: # BB#0: # %entry 4419; AVX512-NEXT: vpmovsxbw (%rdi), %ymm0 4420; AVX512-NEXT: retq 4421; 4422; X32-SSE41-LABEL: load_sext_16i8_to_16i16: 4423; X32-SSE41: # BB#0: # %entry 4424; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 4425; X32-SSE41-NEXT: pmovsxbw (%eax), %xmm0 4426; X32-SSE41-NEXT: pmovsxbw 8(%eax), %xmm1 4427; X32-SSE41-NEXT: retl 4428entry: 4429 %X = load <16 x i8>, <16 x i8>* %ptr 4430 %Y = sext <16 x i8> %X to <16 x i16> 4431 ret <16 x i16> %Y 4432} 4433 4434define <2 x i64> @load_sext_2i16_to_2i64(<2 x i16> *%ptr) { 4435; SSE2-LABEL: load_sext_2i16_to_2i64: 4436; SSE2: # BB#0: # %entry 4437; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 4438; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 4439; SSE2-NEXT: movdqa %xmm0, %xmm1 4440; SSE2-NEXT: psrad $31, %xmm1 4441; SSE2-NEXT: psrad $16, %xmm0 4442; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4443; SSE2-NEXT: retq 4444; 4445; SSSE3-LABEL: load_sext_2i16_to_2i64: 4446; SSSE3: # BB#0: # %entry 4447; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 4448; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 4449; SSSE3-NEXT: movdqa %xmm0, %xmm1 4450; SSSE3-NEXT: psrad $31, %xmm1 4451; SSSE3-NEXT: psrad $16, %xmm0 4452; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4453; SSSE3-NEXT: retq 4454; 4455; SSE41-LABEL: load_sext_2i16_to_2i64: 4456; SSE41: # BB#0: # %entry 4457; SSE41-NEXT: pmovsxwq (%rdi), %xmm0 4458; SSE41-NEXT: retq 4459; 4460; AVX-LABEL: load_sext_2i16_to_2i64: 4461; AVX: # BB#0: # %entry 4462; AVX-NEXT: vpmovsxwq (%rdi), %xmm0 4463; AVX-NEXT: retq 4464; 4465; X32-SSE41-LABEL: load_sext_2i16_to_2i64: 4466; X32-SSE41: # BB#0: # %entry 4467; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 4468; X32-SSE41-NEXT: pmovsxwq (%eax), %xmm0 4469; X32-SSE41-NEXT: retl 4470entry: 4471 %X = load <2 x i16>, <2 x i16>* %ptr 4472 %Y = sext <2 x i16> %X to <2 x i64> 4473 ret <2 x i64> %Y 4474} 4475 4476define <4 x i32> @load_sext_4i16_to_4i32(<4 x i16> *%ptr) { 4477; SSE2-LABEL: load_sext_4i16_to_4i32: 4478; SSE2: # BB#0: # %entry 4479; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 4480; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 4481; SSE2-NEXT: psrad $16, %xmm0 4482; SSE2-NEXT: retq 4483; 4484; SSSE3-LABEL: load_sext_4i16_to_4i32: 4485; SSSE3: # BB#0: # %entry 4486; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 4487; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 4488; SSSE3-NEXT: psrad $16, %xmm0 4489; SSSE3-NEXT: retq 4490; 4491; SSE41-LABEL: load_sext_4i16_to_4i32: 4492; SSE41: # BB#0: # %entry 4493; SSE41-NEXT: pmovsxwd (%rdi), %xmm0 4494; SSE41-NEXT: retq 4495; 4496; AVX-LABEL: load_sext_4i16_to_4i32: 4497; AVX: # BB#0: # %entry 4498; AVX-NEXT: vpmovsxwd (%rdi), %xmm0 4499; AVX-NEXT: retq 4500; 4501; X32-SSE41-LABEL: load_sext_4i16_to_4i32: 4502; X32-SSE41: # BB#0: # %entry 4503; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 4504; X32-SSE41-NEXT: pmovsxwd (%eax), %xmm0 4505; X32-SSE41-NEXT: retl 4506entry: 4507 %X = load <4 x i16>, <4 x i16>* %ptr 4508 %Y = sext <4 x i16> %X to <4 x i32> 4509 ret <4 x i32> %Y 4510} 4511 4512define <4 x i64> @load_sext_4i16_to_4i64(<4 x i16> *%ptr) { 4513; SSE2-LABEL: load_sext_4i16_to_4i64: 4514; SSE2: # BB#0: # %entry 4515; SSE2-NEXT: movswq 2(%rdi), %rax 4516; SSE2-NEXT: movd %rax, %xmm1 4517; SSE2-NEXT: movswq (%rdi), %rax 4518; SSE2-NEXT: movd %rax, %xmm0 4519; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4520; SSE2-NEXT: movswq 6(%rdi), %rax 4521; SSE2-NEXT: movd %rax, %xmm2 4522; SSE2-NEXT: movswq 4(%rdi), %rax 4523; SSE2-NEXT: movd %rax, %xmm1 4524; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 4525; SSE2-NEXT: retq 4526; 4527; SSSE3-LABEL: load_sext_4i16_to_4i64: 4528; SSSE3: # BB#0: # %entry 4529; SSSE3-NEXT: movswq 2(%rdi), %rax 4530; SSSE3-NEXT: movd %rax, %xmm1 4531; SSSE3-NEXT: movswq (%rdi), %rax 4532; SSSE3-NEXT: movd %rax, %xmm0 4533; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 4534; SSSE3-NEXT: movswq 6(%rdi), %rax 4535; SSSE3-NEXT: movd %rax, %xmm2 4536; SSSE3-NEXT: movswq 4(%rdi), %rax 4537; SSSE3-NEXT: movd %rax, %xmm1 4538; SSSE3-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm2[0] 4539; SSSE3-NEXT: retq 4540; 4541; SSE41-LABEL: load_sext_4i16_to_4i64: 4542; SSE41: # BB#0: # %entry 4543; SSE41-NEXT: pmovsxwq (%rdi), %xmm0 4544; SSE41-NEXT: pmovsxwq 4(%rdi), %xmm1 4545; SSE41-NEXT: retq 4546; 4547; AVX1-LABEL: load_sext_4i16_to_4i64: 4548; AVX1: # BB#0: # %entry 4549; AVX1-NEXT: vpmovsxwd (%rdi), %xmm0 4550; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 4551; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 4552; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 4553; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4554; AVX1-NEXT: retq 4555; 4556; AVX2-LABEL: load_sext_4i16_to_4i64: 4557; AVX2: # BB#0: # %entry 4558; AVX2-NEXT: vpmovsxwq (%rdi), %ymm0 4559; AVX2-NEXT: retq 4560; 4561; AVX512-LABEL: load_sext_4i16_to_4i64: 4562; AVX512: # BB#0: # %entry 4563; AVX512-NEXT: vpmovsxwq (%rdi), %ymm0 4564; AVX512-NEXT: retq 4565; 4566; X32-SSE41-LABEL: load_sext_4i16_to_4i64: 4567; X32-SSE41: # BB#0: # %entry 4568; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 4569; X32-SSE41-NEXT: pmovsxwq (%eax), %xmm0 4570; X32-SSE41-NEXT: pmovsxwq 4(%eax), %xmm1 4571; X32-SSE41-NEXT: retl 4572entry: 4573 %X = load <4 x i16>, <4 x i16>* %ptr 4574 %Y = sext <4 x i16> %X to <4 x i64> 4575 ret <4 x i64> %Y 4576} 4577 4578define <8 x i32> @load_sext_8i16_to_8i32(<8 x i16> *%ptr) { 4579; SSE2-LABEL: load_sext_8i16_to_8i32: 4580; SSE2: # BB#0: # %entry 4581; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 4582; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 4583; SSE2-NEXT: psrad $16, %xmm0 4584; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 4585; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 4586; SSE2-NEXT: psrad $16, %xmm1 4587; SSE2-NEXT: retq 4588; 4589; SSSE3-LABEL: load_sext_8i16_to_8i32: 4590; SSSE3: # BB#0: # %entry 4591; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 4592; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 4593; SSSE3-NEXT: psrad $16, %xmm0 4594; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 4595; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 4596; SSSE3-NEXT: psrad $16, %xmm1 4597; SSSE3-NEXT: retq 4598; 4599; SSE41-LABEL: load_sext_8i16_to_8i32: 4600; SSE41: # BB#0: # %entry 4601; SSE41-NEXT: pmovsxwd (%rdi), %xmm0 4602; SSE41-NEXT: pmovsxwd 8(%rdi), %xmm1 4603; SSE41-NEXT: retq 4604; 4605; AVX1-LABEL: load_sext_8i16_to_8i32: 4606; AVX1: # BB#0: # %entry 4607; AVX1-NEXT: vpmovsxwd (%rdi), %xmm0 4608; AVX1-NEXT: vpmovsxwd 8(%rdi), %xmm1 4609; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4610; AVX1-NEXT: retq 4611; 4612; AVX2-LABEL: load_sext_8i16_to_8i32: 4613; AVX2: # BB#0: # %entry 4614; AVX2-NEXT: vpmovsxwd (%rdi), %ymm0 4615; AVX2-NEXT: retq 4616; 4617; AVX512-LABEL: load_sext_8i16_to_8i32: 4618; AVX512: # BB#0: # %entry 4619; AVX512-NEXT: vpmovsxwd (%rdi), %ymm0 4620; AVX512-NEXT: retq 4621; 4622; X32-SSE41-LABEL: load_sext_8i16_to_8i32: 4623; X32-SSE41: # BB#0: # %entry 4624; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 4625; X32-SSE41-NEXT: pmovsxwd (%eax), %xmm0 4626; X32-SSE41-NEXT: pmovsxwd 8(%eax), %xmm1 4627; X32-SSE41-NEXT: retl 4628entry: 4629 %X = load <8 x i16>, <8 x i16>* %ptr 4630 %Y = sext <8 x i16> %X to <8 x i32> 4631 ret <8 x i32> %Y 4632} 4633 4634define <2 x i64> @load_sext_2i32_to_2i64(<2 x i32> *%ptr) { 4635; SSE2-LABEL: load_sext_2i32_to_2i64: 4636; SSE2: # BB#0: # %entry 4637; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 4638; SSE2-NEXT: movdqa %xmm0, %xmm1 4639; SSE2-NEXT: psrad $31, %xmm1 4640; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4641; SSE2-NEXT: retq 4642; 4643; SSSE3-LABEL: load_sext_2i32_to_2i64: 4644; SSSE3: # BB#0: # %entry 4645; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 4646; SSSE3-NEXT: movdqa %xmm0, %xmm1 4647; SSSE3-NEXT: psrad $31, %xmm1 4648; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 4649; SSSE3-NEXT: retq 4650; 4651; SSE41-LABEL: load_sext_2i32_to_2i64: 4652; SSE41: # BB#0: # %entry 4653; SSE41-NEXT: pmovsxdq (%rdi), %xmm0 4654; SSE41-NEXT: retq 4655; 4656; AVX-LABEL: load_sext_2i32_to_2i64: 4657; AVX: # BB#0: # %entry 4658; AVX-NEXT: vpmovsxdq (%rdi), %xmm0 4659; AVX-NEXT: retq 4660; 4661; X32-SSE41-LABEL: load_sext_2i32_to_2i64: 4662; X32-SSE41: # BB#0: # %entry 4663; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 4664; X32-SSE41-NEXT: pmovsxdq (%eax), %xmm0 4665; X32-SSE41-NEXT: retl 4666entry: 4667 %X = load <2 x i32>, <2 x i32>* %ptr 4668 %Y = sext <2 x i32> %X to <2 x i64> 4669 ret <2 x i64> %Y 4670} 4671 4672define <4 x i64> @load_sext_4i32_to_4i64(<4 x i32> *%ptr) { 4673; SSE2-LABEL: load_sext_4i32_to_4i64: 4674; SSE2: # BB#0: # %entry 4675; SSE2-NEXT: movdqa (%rdi), %xmm0 4676; SSE2-NEXT: movdqa %xmm0, %xmm2 4677; SSE2-NEXT: psrad $31, %xmm2 4678; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 4679; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4680; SSE2-NEXT: movdqa %xmm1, %xmm2 4681; SSE2-NEXT: psrad $31, %xmm2 4682; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 4683; SSE2-NEXT: retq 4684; 4685; SSSE3-LABEL: load_sext_4i32_to_4i64: 4686; SSSE3: # BB#0: # %entry 4687; SSSE3-NEXT: movdqa (%rdi), %xmm0 4688; SSSE3-NEXT: movdqa %xmm0, %xmm2 4689; SSSE3-NEXT: psrad $31, %xmm2 4690; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 4691; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4692; SSSE3-NEXT: movdqa %xmm1, %xmm2 4693; SSSE3-NEXT: psrad $31, %xmm2 4694; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 4695; SSSE3-NEXT: retq 4696; 4697; SSE41-LABEL: load_sext_4i32_to_4i64: 4698; SSE41: # BB#0: # %entry 4699; SSE41-NEXT: pmovsxdq (%rdi), %xmm0 4700; SSE41-NEXT: pmovsxdq 8(%rdi), %xmm1 4701; SSE41-NEXT: retq 4702; 4703; AVX1-LABEL: load_sext_4i32_to_4i64: 4704; AVX1: # BB#0: # %entry 4705; AVX1-NEXT: vpmovsxdq (%rdi), %xmm0 4706; AVX1-NEXT: vpmovsxdq 8(%rdi), %xmm1 4707; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4708; AVX1-NEXT: retq 4709; 4710; AVX2-LABEL: load_sext_4i32_to_4i64: 4711; AVX2: # BB#0: # %entry 4712; AVX2-NEXT: vpmovsxdq (%rdi), %ymm0 4713; AVX2-NEXT: retq 4714; 4715; AVX512-LABEL: load_sext_4i32_to_4i64: 4716; AVX512: # BB#0: # %entry 4717; AVX512-NEXT: vpmovsxdq (%rdi), %ymm0 4718; AVX512-NEXT: retq 4719; 4720; X32-SSE41-LABEL: load_sext_4i32_to_4i64: 4721; X32-SSE41: # BB#0: # %entry 4722; X32-SSE41-NEXT: movl {{[0-9]+}}(%esp), %eax 4723; X32-SSE41-NEXT: pmovsxdq (%eax), %xmm0 4724; X32-SSE41-NEXT: pmovsxdq 8(%eax), %xmm1 4725; X32-SSE41-NEXT: retl 4726entry: 4727 %X = load <4 x i32>, <4 x i32>* %ptr 4728 %Y = sext <4 x i32> %X to <4 x i64> 4729 ret <4 x i64> %Y 4730} 4731 4732define i32 @sext_2i8_to_i32(<16 x i8> %A) nounwind uwtable readnone ssp { 4733; SSE2-LABEL: sext_2i8_to_i32: 4734; SSE2: # BB#0: # %entry 4735; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 4736; SSE2-NEXT: psraw $8, %xmm0 4737; SSE2-NEXT: movd %xmm0, %eax 4738; SSE2-NEXT: retq 4739; 4740; SSSE3-LABEL: sext_2i8_to_i32: 4741; SSSE3: # BB#0: # %entry 4742; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 4743; SSSE3-NEXT: psraw $8, %xmm0 4744; SSSE3-NEXT: movd %xmm0, %eax 4745; SSSE3-NEXT: retq 4746; 4747; SSE41-LABEL: sext_2i8_to_i32: 4748; SSE41: # BB#0: # %entry 4749; SSE41-NEXT: pmovsxbw %xmm0, %xmm0 4750; SSE41-NEXT: movd %xmm0, %eax 4751; SSE41-NEXT: retq 4752; 4753; AVX-LABEL: sext_2i8_to_i32: 4754; AVX: # BB#0: # %entry 4755; AVX-NEXT: vpmovsxbw %xmm0, %xmm0 4756; AVX-NEXT: vmovd %xmm0, %eax 4757; AVX-NEXT: retq 4758; 4759; X32-SSE41-LABEL: sext_2i8_to_i32: 4760; X32-SSE41: # BB#0: # %entry 4761; X32-SSE41-NEXT: pushl %eax 4762; X32-SSE41-NEXT: .Lcfi0: 4763; X32-SSE41-NEXT: .cfi_def_cfa_offset 8 4764; X32-SSE41-NEXT: pmovsxbw %xmm0, %xmm0 4765; X32-SSE41-NEXT: movd %xmm0, %eax 4766; X32-SSE41-NEXT: popl %ecx 4767; X32-SSE41-NEXT: retl 4768entry: 4769 %Shuf = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 4770 %Ex = sext <2 x i8> %Shuf to <2 x i16> 4771 %Bc = bitcast <2 x i16> %Ex to i32 4772 ret i32 %Bc 4773} 4774 4775define <4 x i64> @sext_4i1_to_4i64(<4 x i1> %mask) { 4776; SSE2-LABEL: sext_4i1_to_4i64: 4777; SSE2: # BB#0: 4778; SSE2-NEXT: pslld $31, %xmm0 4779; SSE2-NEXT: psrad $31, %xmm0 4780; SSE2-NEXT: movdqa %xmm0, %xmm2 4781; SSE2-NEXT: psrad $31, %xmm2 4782; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 4783; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4784; SSE2-NEXT: movdqa %xmm1, %xmm2 4785; SSE2-NEXT: psrad $31, %xmm2 4786; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 4787; SSE2-NEXT: retq 4788; 4789; SSSE3-LABEL: sext_4i1_to_4i64: 4790; SSSE3: # BB#0: 4791; SSSE3-NEXT: pslld $31, %xmm0 4792; SSSE3-NEXT: psrad $31, %xmm0 4793; SSSE3-NEXT: movdqa %xmm0, %xmm2 4794; SSSE3-NEXT: psrad $31, %xmm2 4795; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 4796; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4797; SSSE3-NEXT: movdqa %xmm1, %xmm2 4798; SSSE3-NEXT: psrad $31, %xmm2 4799; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 4800; SSSE3-NEXT: retq 4801; 4802; SSE41-LABEL: sext_4i1_to_4i64: 4803; SSE41: # BB#0: 4804; SSE41-NEXT: pslld $31, %xmm0 4805; SSE41-NEXT: psrad $31, %xmm0 4806; SSE41-NEXT: pmovsxdq %xmm0, %xmm2 4807; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 4808; SSE41-NEXT: pmovsxdq %xmm0, %xmm1 4809; SSE41-NEXT: movdqa %xmm2, %xmm0 4810; SSE41-NEXT: retq 4811; 4812; AVX1-LABEL: sext_4i1_to_4i64: 4813; AVX1: # BB#0: 4814; AVX1-NEXT: vpslld $31, %xmm0, %xmm0 4815; AVX1-NEXT: vpsrad $31, %xmm0, %xmm0 4816; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 4817; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 4818; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 4819; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4820; AVX1-NEXT: retq 4821; 4822; AVX2-LABEL: sext_4i1_to_4i64: 4823; AVX2: # BB#0: 4824; AVX2-NEXT: vpslld $31, %xmm0, %xmm0 4825; AVX2-NEXT: vpsrad $31, %xmm0, %xmm0 4826; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 4827; AVX2-NEXT: retq 4828; 4829; AVX512-LABEL: sext_4i1_to_4i64: 4830; AVX512: # BB#0: 4831; AVX512-NEXT: vpslld $31, %xmm0, %xmm0 4832; AVX512-NEXT: vpsrad $31, %xmm0, %xmm0 4833; AVX512-NEXT: vpmovsxdq %xmm0, %ymm0 4834; AVX512-NEXT: retq 4835; 4836; X32-SSE41-LABEL: sext_4i1_to_4i64: 4837; X32-SSE41: # BB#0: 4838; X32-SSE41-NEXT: pslld $31, %xmm0 4839; X32-SSE41-NEXT: psrad $31, %xmm0 4840; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm2 4841; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 4842; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm1 4843; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 4844; X32-SSE41-NEXT: retl 4845 %extmask = sext <4 x i1> %mask to <4 x i64> 4846 ret <4 x i64> %extmask 4847} 4848 4849define <4 x i64> @sext_4i8_to_4i64(<4 x i8> %mask) { 4850; SSE2-LABEL: sext_4i8_to_4i64: 4851; SSE2: # BB#0: 4852; SSE2-NEXT: pslld $24, %xmm0 4853; SSE2-NEXT: psrad $24, %xmm0 4854; SSE2-NEXT: movdqa %xmm0, %xmm2 4855; SSE2-NEXT: psrad $31, %xmm2 4856; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 4857; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4858; SSE2-NEXT: movdqa %xmm1, %xmm2 4859; SSE2-NEXT: psrad $31, %xmm2 4860; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 4861; SSE2-NEXT: retq 4862; 4863; SSSE3-LABEL: sext_4i8_to_4i64: 4864; SSSE3: # BB#0: 4865; SSSE3-NEXT: pslld $24, %xmm0 4866; SSSE3-NEXT: psrad $24, %xmm0 4867; SSSE3-NEXT: movdqa %xmm0, %xmm2 4868; SSSE3-NEXT: psrad $31, %xmm2 4869; SSSE3-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 4870; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 4871; SSSE3-NEXT: movdqa %xmm1, %xmm2 4872; SSSE3-NEXT: psrad $31, %xmm2 4873; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 4874; SSSE3-NEXT: retq 4875; 4876; SSE41-LABEL: sext_4i8_to_4i64: 4877; SSE41: # BB#0: 4878; SSE41-NEXT: pslld $24, %xmm0 4879; SSE41-NEXT: psrad $24, %xmm0 4880; SSE41-NEXT: pmovsxdq %xmm0, %xmm2 4881; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 4882; SSE41-NEXT: pmovsxdq %xmm0, %xmm1 4883; SSE41-NEXT: movdqa %xmm2, %xmm0 4884; SSE41-NEXT: retq 4885; 4886; AVX1-LABEL: sext_4i8_to_4i64: 4887; AVX1: # BB#0: 4888; AVX1-NEXT: vpslld $24, %xmm0, %xmm0 4889; AVX1-NEXT: vpsrad $24, %xmm0, %xmm0 4890; AVX1-NEXT: vpmovsxdq %xmm0, %xmm1 4891; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 4892; AVX1-NEXT: vpmovsxdq %xmm0, %xmm0 4893; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 4894; AVX1-NEXT: retq 4895; 4896; AVX2-LABEL: sext_4i8_to_4i64: 4897; AVX2: # BB#0: 4898; AVX2-NEXT: vpslld $24, %xmm0, %xmm0 4899; AVX2-NEXT: vpsrad $24, %xmm0, %xmm0 4900; AVX2-NEXT: vpmovsxdq %xmm0, %ymm0 4901; AVX2-NEXT: retq 4902; 4903; AVX512-LABEL: sext_4i8_to_4i64: 4904; AVX512: # BB#0: 4905; AVX512-NEXT: vpslld $24, %xmm0, %xmm0 4906; AVX512-NEXT: vpsrad $24, %xmm0, %xmm0 4907; AVX512-NEXT: vpmovsxdq %xmm0, %ymm0 4908; AVX512-NEXT: retq 4909; 4910; X32-SSE41-LABEL: sext_4i8_to_4i64: 4911; X32-SSE41: # BB#0: 4912; X32-SSE41-NEXT: pslld $24, %xmm0 4913; X32-SSE41-NEXT: psrad $24, %xmm0 4914; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm2 4915; X32-SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 4916; X32-SSE41-NEXT: pmovsxdq %xmm0, %xmm1 4917; X32-SSE41-NEXT: movdqa %xmm2, %xmm0 4918; X32-SSE41-NEXT: retl 4919 %extmask = sext <4 x i8> %mask to <4 x i64> 4920 ret <4 x i64> %extmask 4921} 4922 4923define <32 x i8> @sext_32xi1_to_32xi8(<32 x i16> %c1, <32 x i16> %c2)nounwind { 4924; SSE-LABEL: sext_32xi1_to_32xi8: 4925; SSE: # BB#0: 4926; SSE-NEXT: pcmpeqw %xmm5, %xmm1 4927; SSE-NEXT: pcmpeqw %xmm4, %xmm0 4928; SSE-NEXT: packsswb %xmm1, %xmm0 4929; SSE-NEXT: pcmpeqw %xmm7, %xmm3 4930; SSE-NEXT: pcmpeqw %xmm6, %xmm2 4931; SSE-NEXT: packsswb %xmm3, %xmm2 4932; SSE-NEXT: movdqa %xmm2, %xmm1 4933; SSE-NEXT: retq 4934; 4935; AVX1-LABEL: sext_32xi1_to_32xi8: 4936; AVX1: # BB#0: 4937; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm4 4938; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm5 4939; AVX1-NEXT: vpcmpeqw %xmm4, %xmm5, %xmm4 4940; AVX1-NEXT: vpcmpeqw %xmm3, %xmm1, %xmm1 4941; AVX1-NEXT: vpacksswb %xmm4, %xmm1, %xmm1 4942; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm3 4943; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm4 4944; AVX1-NEXT: vpcmpeqw %xmm3, %xmm4, %xmm3 4945; AVX1-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm0 4946; AVX1-NEXT: vpacksswb %xmm3, %xmm0, %xmm0 4947; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 4948; AVX1-NEXT: retq 4949; 4950; AVX2-LABEL: sext_32xi1_to_32xi8: 4951; AVX2: # BB#0: 4952; AVX2-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 4953; AVX2-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 4954; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 4955; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 4956; AVX2-NEXT: retq 4957; 4958; AVX512F-LABEL: sext_32xi1_to_32xi8: 4959; AVX512F: # BB#0: 4960; AVX512F-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm0 4961; AVX512F-NEXT: vpmovsxwd %ymm0, %zmm0 4962; AVX512F-NEXT: vpmovdb %zmm0, %xmm0 4963; AVX512F-NEXT: vpcmpeqw %ymm3, %ymm1, %ymm1 4964; AVX512F-NEXT: vpmovsxwd %ymm1, %zmm1 4965; AVX512F-NEXT: vpmovdb %zmm1, %xmm1 4966; AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 4967; AVX512F-NEXT: retq 4968; 4969; AVX512BW-LABEL: sext_32xi1_to_32xi8: 4970; AVX512BW: # BB#0: 4971; AVX512BW-NEXT: vpcmpeqw %zmm1, %zmm0, %k1 4972; AVX512BW-NEXT: vpternlogd $255, %zmm0, %zmm0, %zmm0 4973; AVX512BW-NEXT: vmovdqu16 %zmm0, %zmm0 {%k1} {z} 4974; AVX512BW-NEXT: vpmovwb %zmm0, %ymm0 4975; AVX512BW-NEXT: retq 4976; 4977; X32-SSE41-LABEL: sext_32xi1_to_32xi8: 4978; X32-SSE41: # BB#0: 4979; X32-SSE41-NEXT: pushl %ebp 4980; X32-SSE41-NEXT: movl %esp, %ebp 4981; X32-SSE41-NEXT: andl $-16, %esp 4982; X32-SSE41-NEXT: subl $16, %esp 4983; X32-SSE41-NEXT: movdqa 8(%ebp), %xmm3 4984; X32-SSE41-NEXT: pcmpeqw 40(%ebp), %xmm1 4985; X32-SSE41-NEXT: pcmpeqw 24(%ebp), %xmm0 4986; X32-SSE41-NEXT: packsswb %xmm1, %xmm0 4987; X32-SSE41-NEXT: pcmpeqw 72(%ebp), %xmm3 4988; X32-SSE41-NEXT: pcmpeqw 56(%ebp), %xmm2 4989; X32-SSE41-NEXT: packsswb %xmm3, %xmm2 4990; X32-SSE41-NEXT: movdqa %xmm2, %xmm1 4991; X32-SSE41-NEXT: movl %ebp, %esp 4992; X32-SSE41-NEXT: popl %ebp 4993; X32-SSE41-NEXT: retl 4994 %a = icmp eq <32 x i16> %c1, %c2 4995 %b = sext <32 x i1> %a to <32 x i8> 4996 ret <32 x i8> %b 4997} 4998