1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE --check-prefix=SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.1 | FileCheck %s --check-prefix=SSE --check-prefix=SSE41 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512F 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=AVX --check-prefix=AVX512 --check-prefix=AVX512BW 9 10define <8 x i16> @zext_16i8_to_8i16(<16 x i8> %A) nounwind uwtable readnone ssp { 11; SSE2-LABEL: zext_16i8_to_8i16: 12; SSE2: # BB#0: # %entry 13; SSE2-NEXT: pxor %xmm1, %xmm1 14; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 15; SSE2-NEXT: retq 16; 17; SSSE3-LABEL: zext_16i8_to_8i16: 18; SSSE3: # BB#0: # %entry 19; SSSE3-NEXT: pxor %xmm1, %xmm1 20; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 21; SSSE3-NEXT: retq 22; 23; SSE41-LABEL: zext_16i8_to_8i16: 24; SSE41: # BB#0: # %entry 25; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 26; SSE41-NEXT: retq 27; 28; AVX-LABEL: zext_16i8_to_8i16: 29; AVX: # BB#0: # %entry 30; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 31; AVX-NEXT: retq 32entry: 33 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 34 %C = zext <8 x i8> %B to <8 x i16> 35 ret <8 x i16> %C 36} 37 38; PR17654 39define <16 x i16> @zext_16i8_to_16i16(<16 x i8> %A) { 40; SSE2-LABEL: zext_16i8_to_16i16: 41; SSE2: # BB#0: # %entry 42; SSE2-NEXT: movdqa %xmm0, %xmm1 43; SSE2-NEXT: pxor %xmm2, %xmm2 44; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 45; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 46; SSE2-NEXT: retq 47; 48; SSSE3-LABEL: zext_16i8_to_16i16: 49; SSSE3: # BB#0: # %entry 50; SSSE3-NEXT: movdqa %xmm0, %xmm1 51; SSSE3-NEXT: pxor %xmm2, %xmm2 52; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 53; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 54; SSSE3-NEXT: retq 55; 56; SSE41-LABEL: zext_16i8_to_16i16: 57; SSE41: # BB#0: # %entry 58; SSE41-NEXT: movdqa %xmm0, %xmm1 59; SSE41-NEXT: pxor %xmm2, %xmm2 60; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 61; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 62; SSE41-NEXT: retq 63; 64; AVX1-LABEL: zext_16i8_to_16i16: 65; AVX1: # BB#0: # %entry 66; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 67; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 68; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 69; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 70; AVX1-NEXT: retq 71; 72; AVX2-LABEL: zext_16i8_to_16i16: 73; AVX2: # BB#0: # %entry 74; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 75; AVX2-NEXT: retq 76; 77; AVX512-LABEL: zext_16i8_to_16i16: 78; AVX512: # BB#0: # %entry 79; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 80; AVX512-NEXT: retq 81entry: 82 %B = zext <16 x i8> %A to <16 x i16> 83 ret <16 x i16> %B 84} 85 86define <32 x i16> @zext_32i8_to_32i16(<32 x i8> %A) { 87; SSE2-LABEL: zext_32i8_to_32i16: 88; SSE2: # BB#0: # %entry 89; SSE2-NEXT: movdqa %xmm1, %xmm3 90; SSE2-NEXT: movdqa %xmm0, %xmm1 91; SSE2-NEXT: pxor %xmm4, %xmm4 92; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 93; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15] 94; SSE2-NEXT: movdqa %xmm3, %xmm2 95; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] 96; SSE2-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15] 97; SSE2-NEXT: retq 98; 99; SSSE3-LABEL: zext_32i8_to_32i16: 100; SSSE3: # BB#0: # %entry 101; SSSE3-NEXT: movdqa %xmm1, %xmm3 102; SSSE3-NEXT: movdqa %xmm0, %xmm1 103; SSSE3-NEXT: pxor %xmm4, %xmm4 104; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 105; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15] 106; SSSE3-NEXT: movdqa %xmm3, %xmm2 107; SSSE3-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3],xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] 108; SSSE3-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15] 109; SSSE3-NEXT: retq 110; 111; SSE41-LABEL: zext_32i8_to_32i16: 112; SSE41: # BB#0: # %entry 113; SSE41-NEXT: movdqa %xmm1, %xmm3 114; SSE41-NEXT: movdqa %xmm0, %xmm1 115; SSE41-NEXT: pxor %xmm4, %xmm4 116; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 117; SSE41-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm4[8],xmm1[9],xmm4[9],xmm1[10],xmm4[10],xmm1[11],xmm4[11],xmm1[12],xmm4[12],xmm1[13],xmm4[13],xmm1[14],xmm4[14],xmm1[15],xmm4[15] 118; SSE41-NEXT: pmovzxbw {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero 119; SSE41-NEXT: punpckhbw {{.*#+}} xmm3 = xmm3[8],xmm4[8],xmm3[9],xmm4[9],xmm3[10],xmm4[10],xmm3[11],xmm4[11],xmm3[12],xmm4[12],xmm3[13],xmm4[13],xmm3[14],xmm4[14],xmm3[15],xmm4[15] 120; SSE41-NEXT: retq 121; 122; AVX1-LABEL: zext_32i8_to_32i16: 123; AVX1: # BB#0: # %entry 124; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 125; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm2 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 126; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 127; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 128; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 129; AVX1-NEXT: vpunpckhbw {{.*#+}} xmm1 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15] 130; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 131; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 132; AVX1-NEXT: vmovaps %ymm2, %ymm0 133; AVX1-NEXT: retq 134; 135; AVX2-LABEL: zext_32i8_to_32i16: 136; AVX2: # BB#0: # %entry 137; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 138; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 139; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 140; AVX2-NEXT: vmovdqa %ymm2, %ymm0 141; AVX2-NEXT: retq 142; 143; AVX512F-LABEL: zext_32i8_to_32i16: 144; AVX512F: # BB#0: # %entry 145; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 146; AVX512F-NEXT: vextracti128 $1, %ymm0, %xmm0 147; AVX512F-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero 148; AVX512F-NEXT: vmovdqa %ymm2, %ymm0 149; AVX512F-NEXT: retq 150; 151; AVX512BW-LABEL: zext_32i8_to_32i16: 152; AVX512BW: # BB#0: # %entry 153; AVX512BW-NEXT: vpmovzxbw {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero 154; AVX512BW-NEXT: retq 155entry: 156 %B = zext <32 x i8> %A to <32 x i16> 157 ret <32 x i16> %B 158} 159 160define <4 x i32> @zext_16i8_to_4i32(<16 x i8> %A) nounwind uwtable readnone ssp { 161; SSE2-LABEL: zext_16i8_to_4i32: 162; SSE2: # BB#0: # %entry 163; SSE2-NEXT: pxor %xmm1, %xmm1 164; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 165; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 166; SSE2-NEXT: retq 167; 168; SSSE3-LABEL: zext_16i8_to_4i32: 169; SSSE3: # BB#0: # %entry 170; SSSE3-NEXT: pxor %xmm1, %xmm1 171; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 172; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 173; SSSE3-NEXT: retq 174; 175; SSE41-LABEL: zext_16i8_to_4i32: 176; SSE41: # BB#0: # %entry 177; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 178; SSE41-NEXT: retq 179; 180; AVX-LABEL: zext_16i8_to_4i32: 181; AVX: # BB#0: # %entry 182; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 183; AVX-NEXT: retq 184entry: 185 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 186 %C = zext <4 x i8> %B to <4 x i32> 187 ret <4 x i32> %C 188} 189 190define <8 x i32> @zext_16i8_to_8i32(<16 x i8> %A) nounwind uwtable readnone ssp { 191; SSE2-LABEL: zext_16i8_to_8i32: 192; SSE2: # BB#0: # %entry 193; SSE2-NEXT: movdqa %xmm0, %xmm1 194; SSE2-NEXT: pxor %xmm2, %xmm2 195; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 196; SSE2-NEXT: movdqa %xmm1, %xmm0 197; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 198; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 199; SSE2-NEXT: retq 200; 201; SSSE3-LABEL: zext_16i8_to_8i32: 202; SSSE3: # BB#0: # %entry 203; SSSE3-NEXT: movdqa %xmm0, %xmm1 204; SSSE3-NEXT: pxor %xmm2, %xmm2 205; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 206; SSSE3-NEXT: movdqa %xmm1, %xmm0 207; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 208; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 209; SSSE3-NEXT: retq 210; 211; SSE41-LABEL: zext_16i8_to_8i32: 212; SSE41: # BB#0: # %entry 213; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 214; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 215; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 216; SSE41-NEXT: movdqa %xmm2, %xmm0 217; SSE41-NEXT: retq 218; 219; AVX1-LABEL: zext_16i8_to_8i32: 220; AVX1: # BB#0: # %entry 221; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 222; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 223; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 224; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 225; AVX1-NEXT: retq 226; 227; AVX2-LABEL: zext_16i8_to_8i32: 228; AVX2: # BB#0: # %entry 229; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 230; AVX2-NEXT: retq 231; 232; AVX512-LABEL: zext_16i8_to_8i32: 233; AVX512: # BB#0: # %entry 234; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 235; AVX512-NEXT: retq 236entry: 237 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 238 %C = zext <8 x i8> %B to <8 x i32> 239 ret <8 x i32> %C 240} 241 242define <16 x i32> @zext_16i8_to_16i32(<16 x i8> %A) nounwind uwtable readnone ssp { 243; SSE2-LABEL: zext_16i8_to_16i32: 244; SSE2: # BB#0: # %entry 245; SSE2-NEXT: movdqa %xmm0, %xmm1 246; SSE2-NEXT: pxor %xmm4, %xmm4 247; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1] 248; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 249; SSE2-NEXT: movdqa %xmm1, %xmm0 250; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] 251; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 252; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 253; SSE2-NEXT: movdqa %xmm3, %xmm2 254; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] 255; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 256; SSE2-NEXT: retq 257; 258; SSSE3-LABEL: zext_16i8_to_16i32: 259; SSSE3: # BB#0: # %entry 260; SSSE3-NEXT: movdqa %xmm0, %xmm1 261; SSSE3-NEXT: pxor %xmm4, %xmm4 262; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1] 263; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 264; SSSE3-NEXT: movdqa %xmm1, %xmm0 265; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] 266; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 267; SSSE3-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 268; SSSE3-NEXT: movdqa %xmm3, %xmm2 269; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] 270; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 271; SSSE3-NEXT: retq 272; 273; SSE41-LABEL: zext_16i8_to_16i32: 274; SSE41: # BB#0: # %entry 275; SSE41-NEXT: pmovzxbd {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 276; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 277; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 278; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 279; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 280; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 281; SSE41-NEXT: pmovzxbd {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 282; SSE41-NEXT: movdqa %xmm4, %xmm0 283; SSE41-NEXT: retq 284; 285; AVX1-LABEL: zext_16i8_to_16i32: 286; AVX1: # BB#0: # %entry 287; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 288; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3] 289; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero,xmm2[2],zero,zero,zero,xmm2[3],zero,zero,zero 290; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 291; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 292; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 293; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 294; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 295; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 296; AVX1-NEXT: vmovaps %ymm2, %ymm0 297; AVX1-NEXT: retq 298; 299; AVX2-LABEL: zext_16i8_to_16i32: 300; AVX2: # BB#0: # %entry 301; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 302; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 303; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 304; AVX2-NEXT: vmovdqa %ymm2, %ymm0 305; AVX2-NEXT: retq 306; 307; AVX512-LABEL: zext_16i8_to_16i32: 308; AVX512: # BB#0: # %entry 309; AVX512-NEXT: vpmovzxbd {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero 310; AVX512-NEXT: retq 311entry: 312 %B = zext <16 x i8> %A to <16 x i32> 313 ret <16 x i32> %B 314} 315 316define <2 x i64> @zext_16i8_to_2i64(<16 x i8> %A) nounwind uwtable readnone ssp { 317; SSE2-LABEL: zext_16i8_to_2i64: 318; SSE2: # BB#0: # %entry 319; SSE2-NEXT: pxor %xmm1, %xmm1 320; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 321; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 322; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 323; SSE2-NEXT: retq 324; 325; SSSE3-LABEL: zext_16i8_to_2i64: 326; SSSE3: # BB#0: # %entry 327; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 328; SSSE3-NEXT: retq 329; 330; SSE41-LABEL: zext_16i8_to_2i64: 331; SSE41: # BB#0: # %entry 332; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 333; SSE41-NEXT: retq 334; 335; AVX-LABEL: zext_16i8_to_2i64: 336; AVX: # BB#0: # %entry 337; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 338; AVX-NEXT: retq 339entry: 340 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <2 x i32> <i32 0, i32 1> 341 %C = zext <2 x i8> %B to <2 x i64> 342 ret <2 x i64> %C 343} 344 345define <4 x i64> @zext_16i8_to_4i64(<16 x i8> %A) nounwind uwtable readnone ssp { 346; SSE2-LABEL: zext_16i8_to_4i64: 347; SSE2: # BB#0: # %entry 348; SSE2-NEXT: movdqa %xmm0, %xmm1 349; SSE2-NEXT: pxor %xmm2, %xmm2 350; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 351; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 352; SSE2-NEXT: movdqa %xmm1, %xmm0 353; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 354; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 355; SSE2-NEXT: retq 356; 357; SSSE3-LABEL: zext_16i8_to_4i64: 358; SSSE3: # BB#0: # %entry 359; SSSE3-NEXT: movdqa %xmm0, %xmm1 360; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 361; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero 362; SSSE3-NEXT: retq 363; 364; SSE41-LABEL: zext_16i8_to_4i64: 365; SSE41: # BB#0: # %entry 366; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 367; SSE41-NEXT: psrld $16, %xmm0 368; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 369; SSE41-NEXT: movdqa %xmm2, %xmm0 370; SSE41-NEXT: retq 371; 372; AVX1-LABEL: zext_16i8_to_4i64: 373; AVX1: # BB#0: # %entry 374; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 375; AVX1-NEXT: vpsrld $16, %xmm0, %xmm0 376; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 377; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 378; AVX1-NEXT: retq 379; 380; AVX2-LABEL: zext_16i8_to_4i64: 381; AVX2: # BB#0: # %entry 382; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 383; AVX2-NEXT: retq 384; 385; AVX512-LABEL: zext_16i8_to_4i64: 386; AVX512: # BB#0: # %entry 387; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 388; AVX512-NEXT: retq 389entry: 390 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 391 %C = zext <4 x i8> %B to <4 x i64> 392 ret <4 x i64> %C 393} 394 395define <8 x i64> @zext_16i8_to_8i64(<16 x i8> %A) nounwind uwtable readnone ssp { 396; SSE2-LABEL: zext_16i8_to_8i64: 397; SSE2: # BB#0: # %entry 398; SSE2-NEXT: movdqa %xmm0, %xmm1 399; SSE2-NEXT: pxor %xmm4, %xmm4 400; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3] 401; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 402; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 403; SSE2-NEXT: movdqa %xmm1, %xmm0 404; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 405; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 406; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 407; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3] 408; SSE2-NEXT: movdqa %xmm3, %xmm2 409; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 410; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 411; SSE2-NEXT: retq 412; 413; SSSE3-LABEL: zext_16i8_to_8i64: 414; SSSE3: # BB#0: # %entry 415; SSSE3-NEXT: movdqa %xmm0, %xmm1 416; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [0,128,128,128,128,128,128,128,1,128,128,128,128,128,128,128] 417; SSSE3-NEXT: pshufb %xmm4, %xmm0 418; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2,128,128,128,128,128,128,128,3,128,128,128,128,128,128,128] 419; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3] 420; SSSE3-NEXT: pshufb %xmm5, %xmm1 421; SSSE3-NEXT: movdqa %xmm3, %xmm2 422; SSSE3-NEXT: pshufb %xmm4, %xmm2 423; SSSE3-NEXT: pshufb %xmm5, %xmm3 424; SSSE3-NEXT: retq 425; 426; SSE41-LABEL: zext_16i8_to_8i64: 427; SSE41: # BB#0: # %entry 428; SSE41-NEXT: pmovzxbq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 429; SSE41-NEXT: movdqa %xmm0, %xmm1 430; SSE41-NEXT: psrld $16, %xmm1 431; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 432; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[1,1,2,3] 433; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero 434; SSE41-NEXT: psrlq $48, %xmm0 435; SSE41-NEXT: pmovzxbq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 436; SSE41-NEXT: movdqa %xmm4, %xmm0 437; SSE41-NEXT: retq 438; 439; AVX1-LABEL: zext_16i8_to_8i64: 440; AVX1: # BB#0: # %entry 441; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 442; AVX1-NEXT: vpsrld $16, %xmm0, %xmm2 443; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero 444; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 445; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 446; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 447; AVX1-NEXT: vpsrlq $48, %xmm0, %xmm0 448; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 449; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 450; AVX1-NEXT: vmovaps %ymm2, %ymm0 451; AVX1-NEXT: retq 452; 453; AVX2-LABEL: zext_16i8_to_8i64: 454; AVX2: # BB#0: # %entry 455; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 456; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 457; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 458; AVX2-NEXT: vmovdqa %ymm2, %ymm0 459; AVX2-NEXT: retq 460; 461; AVX512F-LABEL: zext_16i8_to_8i64: 462; AVX512F: # BB#0: # %entry 463; AVX512F-NEXT: vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 464; AVX512F-NEXT: retq 465; 466; AVX512BW-LABEL: zext_16i8_to_8i64: 467; AVX512BW: # BB#0: # %entry 468; AVX512BW-NEXT: # kill: %XMM0<def> %XMM0<kill> %ZMM0<def> 469; AVX512BW-NEXT: vpmovzxbq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 470; AVX512BW-NEXT: retq 471entry: 472 %B = shufflevector <16 x i8> %A, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 473 %C = zext <8 x i8> %B to <8 x i64> 474 ret <8 x i64> %C 475} 476 477define <4 x i32> @zext_8i16_to_4i32(<8 x i16> %A) nounwind uwtable readnone ssp { 478; SSE2-LABEL: zext_8i16_to_4i32: 479; SSE2: # BB#0: # %entry 480; SSE2-NEXT: pxor %xmm1, %xmm1 481; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 482; SSE2-NEXT: retq 483; 484; SSSE3-LABEL: zext_8i16_to_4i32: 485; SSSE3: # BB#0: # %entry 486; SSSE3-NEXT: pxor %xmm1, %xmm1 487; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 488; SSSE3-NEXT: retq 489; 490; SSE41-LABEL: zext_8i16_to_4i32: 491; SSE41: # BB#0: # %entry 492; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 493; SSE41-NEXT: retq 494; 495; AVX-LABEL: zext_8i16_to_4i32: 496; AVX: # BB#0: # %entry 497; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 498; AVX-NEXT: retq 499entry: 500 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 501 %C = zext <4 x i16> %B to <4 x i32> 502 ret <4 x i32> %C 503} 504 505define <8 x i32> @zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp { 506; SSE2-LABEL: zext_8i16_to_8i32: 507; SSE2: # BB#0: # %entry 508; SSE2-NEXT: movdqa %xmm0, %xmm1 509; SSE2-NEXT: pxor %xmm2, %xmm2 510; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 511; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 512; SSE2-NEXT: retq 513; 514; SSSE3-LABEL: zext_8i16_to_8i32: 515; SSSE3: # BB#0: # %entry 516; SSSE3-NEXT: movdqa %xmm0, %xmm1 517; SSSE3-NEXT: pxor %xmm2, %xmm2 518; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 519; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 520; SSSE3-NEXT: retq 521; 522; SSE41-LABEL: zext_8i16_to_8i32: 523; SSE41: # BB#0: # %entry 524; SSE41-NEXT: movdqa %xmm0, %xmm1 525; SSE41-NEXT: pxor %xmm2, %xmm2 526; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 527; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 528; SSE41-NEXT: retq 529; 530; AVX1-LABEL: zext_8i16_to_8i32: 531; AVX1: # BB#0: # %entry 532; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 533; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 534; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 535; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 536; AVX1-NEXT: retq 537; 538; AVX2-LABEL: zext_8i16_to_8i32: 539; AVX2: # BB#0: # %entry 540; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 541; AVX2-NEXT: retq 542; 543; AVX512-LABEL: zext_8i16_to_8i32: 544; AVX512: # BB#0: # %entry 545; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 546; AVX512-NEXT: retq 547entry: 548 %B = zext <8 x i16> %A to <8 x i32> 549 ret <8 x i32>%B 550} 551 552define <16 x i32> @zext_16i16_to_16i32(<16 x i16> %A) nounwind uwtable readnone ssp { 553; SSE2-LABEL: zext_16i16_to_16i32: 554; SSE2: # BB#0: # %entry 555; SSE2-NEXT: movdqa %xmm1, %xmm3 556; SSE2-NEXT: movdqa %xmm0, %xmm1 557; SSE2-NEXT: pxor %xmm4, %xmm4 558; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] 559; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 560; SSE2-NEXT: movdqa %xmm3, %xmm2 561; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] 562; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 563; SSE2-NEXT: retq 564; 565; SSSE3-LABEL: zext_16i16_to_16i32: 566; SSSE3: # BB#0: # %entry 567; SSSE3-NEXT: movdqa %xmm1, %xmm3 568; SSSE3-NEXT: movdqa %xmm0, %xmm1 569; SSSE3-NEXT: pxor %xmm4, %xmm4 570; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3] 571; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 572; SSSE3-NEXT: movdqa %xmm3, %xmm2 573; SSSE3-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1],xmm2[2],xmm4[2],xmm2[3],xmm4[3] 574; SSSE3-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 575; SSSE3-NEXT: retq 576; 577; SSE41-LABEL: zext_16i16_to_16i32: 578; SSE41: # BB#0: # %entry 579; SSE41-NEXT: movdqa %xmm1, %xmm3 580; SSE41-NEXT: movdqa %xmm0, %xmm1 581; SSE41-NEXT: pxor %xmm4, %xmm4 582; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 583; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 584; SSE41-NEXT: pmovzxwd {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero 585; SSE41-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 586; SSE41-NEXT: retq 587; 588; AVX1-LABEL: zext_16i16_to_16i32: 589; AVX1: # BB#0: # %entry 590; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 591; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm2 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 592; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 593; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 594; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 595; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 596; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 597; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 598; AVX1-NEXT: vmovaps %ymm2, %ymm0 599; AVX1-NEXT: retq 600; 601; AVX2-LABEL: zext_16i16_to_16i32: 602; AVX2: # BB#0: # %entry 603; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 604; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 605; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 606; AVX2-NEXT: vmovdqa %ymm2, %ymm0 607; AVX2-NEXT: retq 608; 609; AVX512-LABEL: zext_16i16_to_16i32: 610; AVX512: # BB#0: # %entry 611; AVX512-NEXT: vpmovzxwd {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero 612; AVX512-NEXT: retq 613entry: 614 %B = zext <16 x i16> %A to <16 x i32> 615 ret <16 x i32> %B 616} 617 618define <2 x i64> @zext_8i16_to_2i64(<8 x i16> %A) nounwind uwtable readnone ssp { 619; SSE2-LABEL: zext_8i16_to_2i64: 620; SSE2: # BB#0: # %entry 621; SSE2-NEXT: pxor %xmm1, %xmm1 622; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 623; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 624; SSE2-NEXT: retq 625; 626; SSSE3-LABEL: zext_8i16_to_2i64: 627; SSSE3: # BB#0: # %entry 628; SSSE3-NEXT: pxor %xmm1, %xmm1 629; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 630; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 631; SSSE3-NEXT: retq 632; 633; SSE41-LABEL: zext_8i16_to_2i64: 634; SSE41: # BB#0: # %entry 635; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 636; SSE41-NEXT: retq 637; 638; AVX-LABEL: zext_8i16_to_2i64: 639; AVX: # BB#0: # %entry 640; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 641; AVX-NEXT: retq 642entry: 643 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <2 x i32> <i32 0, i32 1> 644 %C = zext <2 x i16> %B to <2 x i64> 645 ret <2 x i64> %C 646} 647 648define <4 x i64> @zext_8i16_to_4i64(<8 x i16> %A) nounwind uwtable readnone ssp { 649; SSE2-LABEL: zext_8i16_to_4i64: 650; SSE2: # BB#0: # %entry 651; SSE2-NEXT: movdqa %xmm0, %xmm1 652; SSE2-NEXT: pxor %xmm2, %xmm2 653; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 654; SSE2-NEXT: movdqa %xmm1, %xmm0 655; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 656; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 657; SSE2-NEXT: retq 658; 659; SSSE3-LABEL: zext_8i16_to_4i64: 660; SSSE3: # BB#0: # %entry 661; SSSE3-NEXT: movdqa %xmm0, %xmm1 662; SSSE3-NEXT: pxor %xmm2, %xmm2 663; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 664; SSSE3-NEXT: movdqa %xmm1, %xmm0 665; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 666; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 667; SSSE3-NEXT: retq 668; 669; SSE41-LABEL: zext_8i16_to_4i64: 670; SSE41: # BB#0: # %entry 671; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 672; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 673; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 674; SSE41-NEXT: movdqa %xmm2, %xmm0 675; SSE41-NEXT: retq 676; 677; AVX1-LABEL: zext_8i16_to_4i64: 678; AVX1: # BB#0: # %entry 679; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 680; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 681; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 682; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 683; AVX1-NEXT: retq 684; 685; AVX2-LABEL: zext_8i16_to_4i64: 686; AVX2: # BB#0: # %entry 687; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 688; AVX2-NEXT: retq 689; 690; AVX512-LABEL: zext_8i16_to_4i64: 691; AVX512: # BB#0: # %entry 692; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 693; AVX512-NEXT: retq 694entry: 695 %B = shufflevector <8 x i16> %A, <8 x i16> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 696 %C = zext <4 x i16> %B to <4 x i64> 697 ret <4 x i64> %C 698} 699 700define <8 x i64> @zext_8i16_to_8i64(<8 x i16> %A) nounwind uwtable readnone ssp { 701; SSE2-LABEL: zext_8i16_to_8i64: 702; SSE2: # BB#0: # %entry 703; SSE2-NEXT: movdqa %xmm0, %xmm1 704; SSE2-NEXT: pxor %xmm4, %xmm4 705; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1] 706; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 707; SSE2-NEXT: movdqa %xmm1, %xmm0 708; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 709; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 710; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3] 711; SSE2-NEXT: movdqa %xmm3, %xmm2 712; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 713; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 714; SSE2-NEXT: retq 715; 716; SSSE3-LABEL: zext_8i16_to_8i64: 717; SSSE3: # BB#0: # %entry 718; SSSE3-NEXT: movdqa %xmm0, %xmm1 719; SSSE3-NEXT: pxor %xmm4, %xmm4 720; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[2,3,0,1] 721; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 722; SSSE3-NEXT: movdqa %xmm1, %xmm0 723; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 724; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 725; SSSE3-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3] 726; SSSE3-NEXT: movdqa %xmm3, %xmm2 727; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 728; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 729; SSSE3-NEXT: retq 730; 731; SSE41-LABEL: zext_8i16_to_8i64: 732; SSE41: # BB#0: # %entry 733; SSE41-NEXT: pmovzxwq {{.*#+}} xmm4 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 734; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 735; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 736; SSE41-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 737; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero 738; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 739; SSE41-NEXT: pmovzxwq {{.*#+}} xmm3 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 740; SSE41-NEXT: movdqa %xmm4, %xmm0 741; SSE41-NEXT: retq 742; 743; AVX1-LABEL: zext_8i16_to_8i64: 744; AVX1: # BB#0: # %entry 745; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 746; AVX1-NEXT: vpshufd {{.*#+}} xmm2 = xmm0[1,1,2,3] 747; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,xmm2[1],zero,zero,zero 748; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm2 749; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 750; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 751; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[3,1,2,3] 752; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 753; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm1 754; AVX1-NEXT: vmovaps %ymm2, %ymm0 755; AVX1-NEXT: retq 756; 757; AVX2-LABEL: zext_8i16_to_8i64: 758; AVX2: # BB#0: # %entry 759; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 760; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 761; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 762; AVX2-NEXT: vmovdqa %ymm2, %ymm0 763; AVX2-NEXT: retq 764; 765; AVX512-LABEL: zext_8i16_to_8i64: 766; AVX512: # BB#0: # %entry 767; AVX512-NEXT: vpmovzxwq {{.*#+}} zmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 768; AVX512-NEXT: retq 769entry: 770 %B = zext <8 x i16> %A to <8 x i64> 771 ret <8 x i64> %B 772} 773 774define <2 x i64> @zext_4i32_to_2i64(<4 x i32> %A) nounwind uwtable readnone ssp { 775; SSE2-LABEL: zext_4i32_to_2i64: 776; SSE2: # BB#0: # %entry 777; SSE2-NEXT: pxor %xmm1, %xmm1 778; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 779; SSE2-NEXT: retq 780; 781; SSSE3-LABEL: zext_4i32_to_2i64: 782; SSSE3: # BB#0: # %entry 783; SSSE3-NEXT: pxor %xmm1, %xmm1 784; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 785; SSSE3-NEXT: retq 786; 787; SSE41-LABEL: zext_4i32_to_2i64: 788; SSE41: # BB#0: # %entry 789; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 790; SSE41-NEXT: retq 791; 792; AVX-LABEL: zext_4i32_to_2i64: 793; AVX: # BB#0: # %entry 794; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 795; AVX-NEXT: retq 796entry: 797 %B = shufflevector <4 x i32> %A, <4 x i32> undef, <2 x i32> <i32 0, i32 1> 798 %C = zext <2 x i32> %B to <2 x i64> 799 ret <2 x i64> %C 800} 801 802define <4 x i64> @zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { 803; SSE2-LABEL: zext_4i32_to_4i64: 804; SSE2: # BB#0: # %entry 805; SSE2-NEXT: movdqa %xmm0, %xmm1 806; SSE2-NEXT: pxor %xmm2, %xmm2 807; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 808; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 809; SSE2-NEXT: retq 810; 811; SSSE3-LABEL: zext_4i32_to_4i64: 812; SSSE3: # BB#0: # %entry 813; SSSE3-NEXT: movdqa %xmm0, %xmm1 814; SSSE3-NEXT: pxor %xmm2, %xmm2 815; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 816; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 817; SSSE3-NEXT: retq 818; 819; SSE41-LABEL: zext_4i32_to_4i64: 820; SSE41: # BB#0: # %entry 821; SSE41-NEXT: movdqa %xmm0, %xmm1 822; SSE41-NEXT: pxor %xmm2, %xmm2 823; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero 824; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 825; SSE41-NEXT: retq 826; 827; AVX1-LABEL: zext_4i32_to_4i64: 828; AVX1: # BB#0: # %entry 829; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 830; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 831; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 832; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 833; AVX1-NEXT: retq 834; 835; AVX2-LABEL: zext_4i32_to_4i64: 836; AVX2: # BB#0: # %entry 837; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 838; AVX2-NEXT: retq 839; 840; AVX512-LABEL: zext_4i32_to_4i64: 841; AVX512: # BB#0: # %entry 842; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 843; AVX512-NEXT: retq 844entry: 845 %B = zext <4 x i32> %A to <4 x i64> 846 ret <4 x i64>%B 847} 848 849define <8 x i64> @zext_8i32_to_8i64(<8 x i32> %A) nounwind uwtable readnone ssp { 850; SSE2-LABEL: zext_8i32_to_8i64: 851; SSE2: # BB#0: # %entry 852; SSE2-NEXT: movdqa %xmm1, %xmm3 853; SSE2-NEXT: movdqa %xmm0, %xmm1 854; SSE2-NEXT: pxor %xmm4, %xmm4 855; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 856; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 857; SSE2-NEXT: movdqa %xmm3, %xmm2 858; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 859; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 860; SSE2-NEXT: retq 861; 862; SSSE3-LABEL: zext_8i32_to_8i64: 863; SSSE3: # BB#0: # %entry 864; SSSE3-NEXT: movdqa %xmm1, %xmm3 865; SSSE3-NEXT: movdqa %xmm0, %xmm1 866; SSSE3-NEXT: pxor %xmm4, %xmm4 867; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 868; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 869; SSSE3-NEXT: movdqa %xmm3, %xmm2 870; SSSE3-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 871; SSSE3-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 872; SSSE3-NEXT: retq 873; 874; SSE41-LABEL: zext_8i32_to_8i64: 875; SSE41: # BB#0: # %entry 876; SSE41-NEXT: movdqa %xmm1, %xmm3 877; SSE41-NEXT: movdqa %xmm0, %xmm1 878; SSE41-NEXT: pxor %xmm4, %xmm4 879; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero 880; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 881; SSE41-NEXT: pmovzxdq {{.*#+}} xmm2 = xmm3[0],zero,xmm3[1],zero 882; SSE41-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 883; SSE41-NEXT: retq 884; 885; AVX1-LABEL: zext_8i32_to_8i64: 886; AVX1: # BB#0: # %entry 887; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 888; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 889; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm3 = xmm0[0],zero,xmm0[1],zero 890; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm3, %ymm2 891; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 892; AVX1-NEXT: vpunpckhdq {{.*#+}} xmm1 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 893; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero 894; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm1 895; AVX1-NEXT: vmovaps %ymm2, %ymm0 896; AVX1-NEXT: retq 897; 898; AVX2-LABEL: zext_8i32_to_8i64: 899; AVX2: # BB#0: # %entry 900; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 901; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 902; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm1 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 903; AVX2-NEXT: vmovdqa %ymm2, %ymm0 904; AVX2-NEXT: retq 905; 906; AVX512-LABEL: zext_8i32_to_8i64: 907; AVX512: # BB#0: # %entry 908; AVX512-NEXT: vpmovzxdq {{.*#+}} zmm0 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero 909; AVX512-NEXT: retq 910entry: 911 %B = zext <8 x i32> %A to <8 x i64> 912 ret <8 x i64>%B 913} 914 915define <2 x i64> @load_zext_2i8_to_2i64(<2 x i8> *%ptr) { 916; SSE2-LABEL: load_zext_2i8_to_2i64: 917; SSE2: # BB#0: # %entry 918; SSE2-NEXT: movzwl (%rdi), %eax 919; SSE2-NEXT: movd %eax, %xmm0 920; SSE2-NEXT: pxor %xmm1, %xmm1 921; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 922; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 923; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 924; SSE2-NEXT: retq 925; 926; SSSE3-LABEL: load_zext_2i8_to_2i64: 927; SSSE3: # BB#0: # %entry 928; SSSE3-NEXT: movzwl (%rdi), %eax 929; SSSE3-NEXT: movd %eax, %xmm0 930; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 931; SSSE3-NEXT: retq 932; 933; SSE41-LABEL: load_zext_2i8_to_2i64: 934; SSE41: # BB#0: # %entry 935; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 936; SSE41-NEXT: retq 937; 938; AVX-LABEL: load_zext_2i8_to_2i64: 939; AVX: # BB#0: # %entry 940; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 941; AVX-NEXT: retq 942entry: 943 %X = load <2 x i8>, <2 x i8>* %ptr 944 %Y = zext <2 x i8> %X to <2 x i64> 945 ret <2 x i64> %Y 946} 947 948define <4 x i32> @load_zext_4i8_to_4i32(<4 x i8> *%ptr) { 949; SSE2-LABEL: load_zext_4i8_to_4i32: 950; SSE2: # BB#0: # %entry 951; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 952; SSE2-NEXT: pxor %xmm1, %xmm1 953; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 954; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 955; SSE2-NEXT: retq 956; 957; SSSE3-LABEL: load_zext_4i8_to_4i32: 958; SSSE3: # BB#0: # %entry 959; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 960; SSSE3-NEXT: pxor %xmm1, %xmm1 961; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 962; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 963; SSSE3-NEXT: retq 964; 965; SSE41-LABEL: load_zext_4i8_to_4i32: 966; SSE41: # BB#0: # %entry 967; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 968; SSE41-NEXT: retq 969; 970; AVX-LABEL: load_zext_4i8_to_4i32: 971; AVX: # BB#0: # %entry 972; AVX-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 973; AVX-NEXT: retq 974entry: 975 %X = load <4 x i8>, <4 x i8>* %ptr 976 %Y = zext <4 x i8> %X to <4 x i32> 977 ret <4 x i32> %Y 978} 979 980define <4 x i64> @load_zext_4i8_to_4i64(<4 x i8> *%ptr) { 981; SSE2-LABEL: load_zext_4i8_to_4i64: 982; SSE2: # BB#0: # %entry 983; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 984; SSE2-NEXT: pxor %xmm2, %xmm2 985; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 986; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 987; SSE2-NEXT: movdqa %xmm1, %xmm0 988; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 989; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 990; SSE2-NEXT: retq 991; 992; SSSE3-LABEL: load_zext_4i8_to_4i64: 993; SSSE3: # BB#0: # %entry 994; SSSE3-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 995; SSSE3-NEXT: movdqa %xmm1, %xmm0 996; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 997; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[2],zero,zero,zero,zero,zero,zero,zero,xmm1[3],zero,zero,zero,zero,zero,zero,zero 998; SSSE3-NEXT: retq 999; 1000; SSE41-LABEL: load_zext_4i8_to_4i64: 1001; SSE41: # BB#0: # %entry 1002; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1003; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1004; SSE41-NEXT: retq 1005; 1006; AVX1-LABEL: load_zext_4i8_to_4i64: 1007; AVX1: # BB#0: # %entry 1008; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1009; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1010; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1011; AVX1-NEXT: retq 1012; 1013; AVX2-LABEL: load_zext_4i8_to_4i64: 1014; AVX2: # BB#0: # %entry 1015; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 1016; AVX2-NEXT: retq 1017; 1018; AVX512-LABEL: load_zext_4i8_to_4i64: 1019; AVX512: # BB#0: # %entry 1020; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 1021; AVX512-NEXT: retq 1022entry: 1023 %X = load <4 x i8>, <4 x i8>* %ptr 1024 %Y = zext <4 x i8> %X to <4 x i64> 1025 ret <4 x i64> %Y 1026} 1027 1028define <8 x i16> @load_zext_8i8_to_8i16(<8 x i8> *%ptr) { 1029; SSE2-LABEL: load_zext_8i8_to_8i16: 1030; SSE2: # BB#0: # %entry 1031; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1032; SSE2-NEXT: pxor %xmm1, %xmm1 1033; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1034; SSE2-NEXT: retq 1035; 1036; SSSE3-LABEL: load_zext_8i8_to_8i16: 1037; SSSE3: # BB#0: # %entry 1038; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1039; SSSE3-NEXT: pxor %xmm1, %xmm1 1040; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1041; SSSE3-NEXT: retq 1042; 1043; SSE41-LABEL: load_zext_8i8_to_8i16: 1044; SSE41: # BB#0: # %entry 1045; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1046; SSE41-NEXT: retq 1047; 1048; AVX-LABEL: load_zext_8i8_to_8i16: 1049; AVX: # BB#0: # %entry 1050; AVX-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1051; AVX-NEXT: retq 1052entry: 1053 %X = load <8 x i8>, <8 x i8>* %ptr 1054 %Y = zext <8 x i8> %X to <8 x i16> 1055 ret <8 x i16> %Y 1056} 1057 1058define <8 x i32> @load_zext_8i8_to_8i32(<8 x i8> *%ptr) { 1059; SSE2-LABEL: load_zext_8i8_to_8i32: 1060; SSE2: # BB#0: # %entry 1061; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1062; SSE2-NEXT: pxor %xmm2, %xmm2 1063; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1064; SSE2-NEXT: movdqa %xmm1, %xmm0 1065; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1066; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1067; SSE2-NEXT: retq 1068; 1069; SSSE3-LABEL: load_zext_8i8_to_8i32: 1070; SSSE3: # BB#0: # %entry 1071; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1072; SSSE3-NEXT: pxor %xmm2, %xmm2 1073; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1074; SSSE3-NEXT: movdqa %xmm1, %xmm0 1075; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1076; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1077; SSSE3-NEXT: retq 1078; 1079; SSE41-LABEL: load_zext_8i8_to_8i32: 1080; SSE41: # BB#0: # %entry 1081; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1082; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1083; SSE41-NEXT: retq 1084; 1085; AVX1-LABEL: load_zext_8i8_to_8i32: 1086; AVX1: # BB#0: # %entry 1087; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1088; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1089; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1090; AVX1-NEXT: retq 1091; 1092; AVX2-LABEL: load_zext_8i8_to_8i32: 1093; AVX2: # BB#0: # %entry 1094; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 1095; AVX2-NEXT: retq 1096; 1097; AVX512-LABEL: load_zext_8i8_to_8i32: 1098; AVX512: # BB#0: # %entry 1099; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 1100; AVX512-NEXT: retq 1101entry: 1102 %X = load <8 x i8>, <8 x i8>* %ptr 1103 %Y = zext <8 x i8> %X to <8 x i32> 1104 ret <8 x i32> %Y 1105} 1106 1107define <8 x i32> @load_zext_16i8_to_8i32(<16 x i8> *%ptr) { 1108; SSE2-LABEL: load_zext_16i8_to_8i32: 1109; SSE2: # BB#0: # %entry 1110; SSE2-NEXT: movdqa (%rdi), %xmm1 1111; SSE2-NEXT: pxor %xmm2, %xmm2 1112; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1113; SSE2-NEXT: movdqa %xmm1, %xmm0 1114; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1115; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1116; SSE2-NEXT: retq 1117; 1118; SSSE3-LABEL: load_zext_16i8_to_8i32: 1119; SSSE3: # BB#0: # %entry 1120; SSSE3-NEXT: movdqa (%rdi), %xmm1 1121; SSSE3-NEXT: pxor %xmm2, %xmm2 1122; SSSE3-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1123; SSSE3-NEXT: movdqa %xmm1, %xmm0 1124; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1125; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1126; SSSE3-NEXT: retq 1127; 1128; SSE41-LABEL: load_zext_16i8_to_8i32: 1129; SSE41: # BB#0: # %entry 1130; SSE41-NEXT: movdqa (%rdi), %xmm1 1131; SSE41-NEXT: pmovzxbd {{.*#+}} xmm0 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 1132; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm1[1,1,2,3] 1133; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero,xmm1[2],zero,zero,zero,xmm1[3],zero,zero,zero 1134; SSE41-NEXT: retq 1135; 1136; AVX1-LABEL: load_zext_16i8_to_8i32: 1137; AVX1: # BB#0: # %entry 1138; AVX1-NEXT: vmovdqa (%rdi), %xmm0 1139; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1140; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 1141; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1142; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1143; AVX1-NEXT: retq 1144; 1145; AVX2-LABEL: load_zext_16i8_to_8i32: 1146; AVX2: # BB#0: # %entry 1147; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 1148; AVX2-NEXT: retq 1149; 1150; AVX512-LABEL: load_zext_16i8_to_8i32: 1151; AVX512: # BB#0: # %entry 1152; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero,mem[4],zero,zero,zero,mem[5],zero,zero,zero,mem[6],zero,zero,zero,mem[7],zero,zero,zero 1153; AVX512-NEXT: retq 1154entry: 1155 %X = load <16 x i8>, <16 x i8>* %ptr 1156 %Y = shufflevector <16 x i8> %X, <16 x i8> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 1157 %Z = zext <8 x i8> %Y to <8 x i32> 1158 ret <8 x i32> %Z 1159} 1160 1161define <8 x i64> @load_zext_8i8_to_8i64(<8 x i8> *%ptr) { 1162; SSE2-LABEL: load_zext_8i8_to_8i64: 1163; SSE2: # BB#0: # %entry 1164; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1165; SSE2-NEXT: pxor %xmm4, %xmm4 1166; SSE2-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3] 1167; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3],xmm1[4],xmm4[4],xmm1[5],xmm4[5],xmm1[6],xmm4[6],xmm1[7],xmm4[7] 1168; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 1169; SSE2-NEXT: movdqa %xmm1, %xmm0 1170; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1] 1171; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm4[2],xmm1[3],xmm4[3] 1172; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3],xmm3[4],xmm4[4],xmm3[5],xmm4[5],xmm3[6],xmm4[6],xmm3[7],xmm4[7] 1173; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1],xmm3[2],xmm4[2],xmm3[3],xmm4[3] 1174; SSE2-NEXT: movdqa %xmm3, %xmm2 1175; SSE2-NEXT: punpckldq {{.*#+}} xmm2 = xmm2[0],xmm4[0],xmm2[1],xmm4[1] 1176; SSE2-NEXT: punpckhdq {{.*#+}} xmm3 = xmm3[2],xmm4[2],xmm3[3],xmm4[3] 1177; SSE2-NEXT: retq 1178; 1179; SSSE3-LABEL: load_zext_8i8_to_8i64: 1180; SSSE3: # BB#0: # %entry 1181; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1182; SSSE3-NEXT: movdqa {{.*#+}} xmm4 = [0,128,128,128,128,128,128,128,1,128,128,128,128,128,128,128] 1183; SSSE3-NEXT: movdqa %xmm1, %xmm0 1184; SSSE3-NEXT: pshufb %xmm4, %xmm0 1185; SSSE3-NEXT: movdqa {{.*#+}} xmm5 = [2,128,128,128,128,128,128,128,3,128,128,128,128,128,128,128] 1186; SSSE3-NEXT: pshufd {{.*#+}} xmm3 = xmm1[1,1,2,3] 1187; SSSE3-NEXT: pshufb %xmm5, %xmm1 1188; SSSE3-NEXT: movdqa %xmm3, %xmm2 1189; SSSE3-NEXT: pshufb %xmm4, %xmm2 1190; SSSE3-NEXT: pshufb %xmm5, %xmm3 1191; SSSE3-NEXT: retq 1192; 1193; SSE41-LABEL: load_zext_8i8_to_8i64: 1194; SSE41: # BB#0: # %entry 1195; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1196; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1197; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1198; SSE41-NEXT: pmovzxbq {{.*#+}} xmm3 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1199; SSE41-NEXT: retq 1200; 1201; AVX1-LABEL: load_zext_8i8_to_8i64: 1202; AVX1: # BB#0: # %entry 1203; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1204; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1205; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1206; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1207; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm2 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero 1208; AVX1-NEXT: vinsertf128 $1, %xmm2, %ymm1, %ymm1 1209; AVX1-NEXT: retq 1210; 1211; AVX2-LABEL: load_zext_8i8_to_8i64: 1212; AVX2: # BB#0: # %entry 1213; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 1214; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm1 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero 1215; AVX2-NEXT: retq 1216; 1217; AVX512-LABEL: load_zext_8i8_to_8i64: 1218; AVX512: # BB#0: # %entry 1219; AVX512-NEXT: vpmovzxbq {{.*#+}} zmm0 = mem[0],zero,zero,zero,zero,zero,zero,zero,mem[1],zero,zero,zero,zero,zero,zero,zero,mem[2],zero,zero,zero,zero,zero,zero,zero,mem[3],zero,zero,zero,zero,zero,zero,zero,mem[4],zero,zero,zero,zero,zero,zero,zero,mem[5],zero,zero,zero,zero,zero,zero,zero,mem[6],zero,zero,zero,zero,zero,zero,zero,mem[7],zero,zero,zero,zero,zero,zero,zero 1220; AVX512-NEXT: retq 1221entry: 1222 %X = load <8 x i8>, <8 x i8>* %ptr 1223 %Y = zext <8 x i8> %X to <8 x i64> 1224 ret <8 x i64> %Y 1225} 1226 1227define <16 x i16> @load_zext_16i8_to_16i16(<16 x i8> *%ptr) { 1228; SSE2-LABEL: load_zext_16i8_to_16i16: 1229; SSE2: # BB#0: # %entry 1230; SSE2-NEXT: movdqa (%rdi), %xmm1 1231; SSE2-NEXT: pxor %xmm2, %xmm2 1232; SSE2-NEXT: movdqa %xmm1, %xmm0 1233; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 1234; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 1235; SSE2-NEXT: retq 1236; 1237; SSSE3-LABEL: load_zext_16i8_to_16i16: 1238; SSSE3: # BB#0: # %entry 1239; SSSE3-NEXT: movdqa (%rdi), %xmm1 1240; SSSE3-NEXT: pxor %xmm2, %xmm2 1241; SSSE3-NEXT: movdqa %xmm1, %xmm0 1242; SSSE3-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 1243; SSSE3-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 1244; SSSE3-NEXT: retq 1245; 1246; SSE41-LABEL: load_zext_16i8_to_16i16: 1247; SSE41: # BB#0: # %entry 1248; SSE41-NEXT: pmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1249; SSE41-NEXT: pmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1250; SSE41-NEXT: retq 1251; 1252; AVX1-LABEL: load_zext_16i8_to_16i16: 1253; AVX1: # BB#0: # %entry 1254; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1255; AVX1-NEXT: vpmovzxbw {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1256; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1257; AVX1-NEXT: retq 1258; 1259; AVX2-LABEL: load_zext_16i8_to_16i16: 1260; AVX2: # BB#0: # %entry 1261; AVX2-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 1262; AVX2-NEXT: retq 1263; 1264; AVX512-LABEL: load_zext_16i8_to_16i16: 1265; AVX512: # BB#0: # %entry 1266; AVX512-NEXT: vpmovzxbw {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero,mem[8],zero,mem[9],zero,mem[10],zero,mem[11],zero,mem[12],zero,mem[13],zero,mem[14],zero,mem[15],zero 1267; AVX512-NEXT: retq 1268entry: 1269 %X = load <16 x i8>, <16 x i8>* %ptr 1270 %Y = zext <16 x i8> %X to <16 x i16> 1271 ret <16 x i16> %Y 1272} 1273 1274define <2 x i64> @load_zext_2i16_to_2i64(<2 x i16> *%ptr) { 1275; SSE2-LABEL: load_zext_2i16_to_2i64: 1276; SSE2: # BB#0: # %entry 1277; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1278; SSE2-NEXT: pxor %xmm1, %xmm1 1279; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1280; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1281; SSE2-NEXT: retq 1282; 1283; SSSE3-LABEL: load_zext_2i16_to_2i64: 1284; SSSE3: # BB#0: # %entry 1285; SSSE3-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 1286; SSSE3-NEXT: pxor %xmm1, %xmm1 1287; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1288; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1289; SSSE3-NEXT: retq 1290; 1291; SSE41-LABEL: load_zext_2i16_to_2i64: 1292; SSE41: # BB#0: # %entry 1293; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1294; SSE41-NEXT: retq 1295; 1296; AVX-LABEL: load_zext_2i16_to_2i64: 1297; AVX: # BB#0: # %entry 1298; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1299; AVX-NEXT: retq 1300entry: 1301 %X = load <2 x i16>, <2 x i16>* %ptr 1302 %Y = zext <2 x i16> %X to <2 x i64> 1303 ret <2 x i64> %Y 1304} 1305 1306define <4 x i32> @load_zext_4i16_to_4i32(<4 x i16> *%ptr) { 1307; SSE2-LABEL: load_zext_4i16_to_4i32: 1308; SSE2: # BB#0: # %entry 1309; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1310; SSE2-NEXT: pxor %xmm1, %xmm1 1311; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1312; SSE2-NEXT: retq 1313; 1314; SSSE3-LABEL: load_zext_4i16_to_4i32: 1315; SSSE3: # BB#0: # %entry 1316; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1317; SSSE3-NEXT: pxor %xmm1, %xmm1 1318; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1319; SSSE3-NEXT: retq 1320; 1321; SSE41-LABEL: load_zext_4i16_to_4i32: 1322; SSE41: # BB#0: # %entry 1323; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1324; SSE41-NEXT: retq 1325; 1326; AVX-LABEL: load_zext_4i16_to_4i32: 1327; AVX: # BB#0: # %entry 1328; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1329; AVX-NEXT: retq 1330entry: 1331 %X = load <4 x i16>, <4 x i16>* %ptr 1332 %Y = zext <4 x i16> %X to <4 x i32> 1333 ret <4 x i32> %Y 1334} 1335 1336define <4 x i64> @load_zext_4i16_to_4i64(<4 x i16> *%ptr) { 1337; SSE2-LABEL: load_zext_4i16_to_4i64: 1338; SSE2: # BB#0: # %entry 1339; SSE2-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1340; SSE2-NEXT: pxor %xmm2, %xmm2 1341; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1342; SSE2-NEXT: movdqa %xmm1, %xmm0 1343; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1344; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1345; SSE2-NEXT: retq 1346; 1347; SSSE3-LABEL: load_zext_4i16_to_4i64: 1348; SSSE3: # BB#0: # %entry 1349; SSSE3-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 1350; SSSE3-NEXT: pxor %xmm2, %xmm2 1351; SSSE3-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1352; SSSE3-NEXT: movdqa %xmm1, %xmm0 1353; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1354; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1355; SSSE3-NEXT: retq 1356; 1357; SSE41-LABEL: load_zext_4i16_to_4i64: 1358; SSE41: # BB#0: # %entry 1359; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1360; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1361; SSE41-NEXT: retq 1362; 1363; AVX1-LABEL: load_zext_4i16_to_4i64: 1364; AVX1: # BB#0: # %entry 1365; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1366; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = mem[0],zero,zero,zero,mem[1],zero,zero,zero 1367; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1368; AVX1-NEXT: retq 1369; 1370; AVX2-LABEL: load_zext_4i16_to_4i64: 1371; AVX2: # BB#0: # %entry 1372; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1373; AVX2-NEXT: retq 1374; 1375; AVX512-LABEL: load_zext_4i16_to_4i64: 1376; AVX512: # BB#0: # %entry 1377; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = mem[0],zero,zero,zero,mem[1],zero,zero,zero,mem[2],zero,zero,zero,mem[3],zero,zero,zero 1378; AVX512-NEXT: retq 1379entry: 1380 %X = load <4 x i16>, <4 x i16>* %ptr 1381 %Y = zext <4 x i16> %X to <4 x i64> 1382 ret <4 x i64> %Y 1383} 1384 1385define <8 x i32> @load_zext_8i16_to_8i32(<8 x i16> *%ptr) { 1386; SSE2-LABEL: load_zext_8i16_to_8i32: 1387; SSE2: # BB#0: # %entry 1388; SSE2-NEXT: movdqa (%rdi), %xmm1 1389; SSE2-NEXT: pxor %xmm2, %xmm2 1390; SSE2-NEXT: movdqa %xmm1, %xmm0 1391; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1392; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1393; SSE2-NEXT: retq 1394; 1395; SSSE3-LABEL: load_zext_8i16_to_8i32: 1396; SSSE3: # BB#0: # %entry 1397; SSSE3-NEXT: movdqa (%rdi), %xmm1 1398; SSSE3-NEXT: pxor %xmm2, %xmm2 1399; SSSE3-NEXT: movdqa %xmm1, %xmm0 1400; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1401; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1402; SSSE3-NEXT: retq 1403; 1404; SSE41-LABEL: load_zext_8i16_to_8i32: 1405; SSE41: # BB#0: # %entry 1406; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1407; SSE41-NEXT: pmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1408; SSE41-NEXT: retq 1409; 1410; AVX1-LABEL: load_zext_8i16_to_8i32: 1411; AVX1: # BB#0: # %entry 1412; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1413; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm1 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1414; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1415; AVX1-NEXT: retq 1416; 1417; AVX2-LABEL: load_zext_8i16_to_8i32: 1418; AVX2: # BB#0: # %entry 1419; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1420; AVX2-NEXT: retq 1421; 1422; AVX512-LABEL: load_zext_8i16_to_8i32: 1423; AVX512: # BB#0: # %entry 1424; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero,mem[4],zero,mem[5],zero,mem[6],zero,mem[7],zero 1425; AVX512-NEXT: retq 1426entry: 1427 %X = load <8 x i16>, <8 x i16>* %ptr 1428 %Y = zext <8 x i16> %X to <8 x i32> 1429 ret <8 x i32> %Y 1430} 1431 1432define <2 x i64> @load_zext_2i32_to_2i64(<2 x i32> *%ptr) { 1433; SSE2-LABEL: load_zext_2i32_to_2i64: 1434; SSE2: # BB#0: # %entry 1435; SSE2-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1436; SSE2-NEXT: pxor %xmm1, %xmm1 1437; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1438; SSE2-NEXT: retq 1439; 1440; SSSE3-LABEL: load_zext_2i32_to_2i64: 1441; SSSE3: # BB#0: # %entry 1442; SSSE3-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1443; SSSE3-NEXT: pxor %xmm1, %xmm1 1444; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1] 1445; SSSE3-NEXT: retq 1446; 1447; SSE41-LABEL: load_zext_2i32_to_2i64: 1448; SSE41: # BB#0: # %entry 1449; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 1450; SSE41-NEXT: retq 1451; 1452; AVX-LABEL: load_zext_2i32_to_2i64: 1453; AVX: # BB#0: # %entry 1454; AVX-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 1455; AVX-NEXT: retq 1456entry: 1457 %X = load <2 x i32>, <2 x i32>* %ptr 1458 %Y = zext <2 x i32> %X to <2 x i64> 1459 ret <2 x i64> %Y 1460} 1461 1462define <4 x i64> @load_zext_4i32_to_4i64(<4 x i32> *%ptr) { 1463; SSE2-LABEL: load_zext_4i32_to_4i64: 1464; SSE2: # BB#0: # %entry 1465; SSE2-NEXT: movdqa (%rdi), %xmm1 1466; SSE2-NEXT: pxor %xmm2, %xmm2 1467; SSE2-NEXT: movdqa %xmm1, %xmm0 1468; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1469; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1470; SSE2-NEXT: retq 1471; 1472; SSSE3-LABEL: load_zext_4i32_to_4i64: 1473; SSSE3: # BB#0: # %entry 1474; SSSE3-NEXT: movdqa (%rdi), %xmm1 1475; SSSE3-NEXT: pxor %xmm2, %xmm2 1476; SSSE3-NEXT: movdqa %xmm1, %xmm0 1477; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1478; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1479; SSSE3-NEXT: retq 1480; 1481; SSE41-LABEL: load_zext_4i32_to_4i64: 1482; SSE41: # BB#0: # %entry 1483; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 1484; SSE41-NEXT: pmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero 1485; SSE41-NEXT: retq 1486; 1487; AVX1-LABEL: load_zext_4i32_to_4i64: 1488; AVX1: # BB#0: # %entry 1489; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm0 = mem[0],zero,mem[1],zero 1490; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = mem[0],zero,mem[1],zero 1491; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1492; AVX1-NEXT: retq 1493; 1494; AVX2-LABEL: load_zext_4i32_to_4i64: 1495; AVX2: # BB#0: # %entry 1496; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1497; AVX2-NEXT: retq 1498; 1499; AVX512-LABEL: load_zext_4i32_to_4i64: 1500; AVX512: # BB#0: # %entry 1501; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = mem[0],zero,mem[1],zero,mem[2],zero,mem[3],zero 1502; AVX512-NEXT: retq 1503entry: 1504 %X = load <4 x i32>, <4 x i32>* %ptr 1505 %Y = zext <4 x i32> %X to <4 x i64> 1506 ret <4 x i64> %Y 1507} 1508 1509define <8 x i32> @zext_8i8_to_8i32(<8 x i8> %z) { 1510; SSE2-LABEL: zext_8i8_to_8i32: 1511; SSE2: # BB#0: # %entry 1512; SSE2-NEXT: movdqa %xmm0, %xmm1 1513; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 1514; SSE2-NEXT: pxor %xmm2, %xmm2 1515; SSE2-NEXT: movdqa %xmm1, %xmm0 1516; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1517; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1518; SSE2-NEXT: retq 1519; 1520; SSSE3-LABEL: zext_8i8_to_8i32: 1521; SSSE3: # BB#0: # %entry 1522; SSSE3-NEXT: movdqa %xmm0, %xmm1 1523; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1 1524; SSSE3-NEXT: pxor %xmm2, %xmm2 1525; SSSE3-NEXT: movdqa %xmm1, %xmm0 1526; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1527; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1528; SSSE3-NEXT: retq 1529; 1530; SSE41-LABEL: zext_8i8_to_8i32: 1531; SSE41: # BB#0: # %entry 1532; SSE41-NEXT: movdqa %xmm0, %xmm1 1533; SSE41-NEXT: pand {{.*}}(%rip), %xmm1 1534; SSE41-NEXT: pxor %xmm2, %xmm2 1535; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 1536; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1537; SSE41-NEXT: retq 1538; 1539; AVX1-LABEL: zext_8i8_to_8i32: 1540; AVX1: # BB#0: # %entry 1541; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 1542; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1543; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1544; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1545; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1546; AVX1-NEXT: retq 1547; 1548; AVX2-LABEL: zext_8i8_to_8i32: 1549; AVX2: # BB#0: # %entry 1550; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 1551; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1552; AVX2-NEXT: retq 1553; 1554; AVX512-LABEL: zext_8i8_to_8i32: 1555; AVX512: # BB#0: # %entry 1556; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 1557; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1558; AVX512-NEXT: retq 1559entry: 1560 %t = zext <8 x i8> %z to <8 x i32> 1561 ret <8 x i32> %t 1562} 1563 1564define <8 x i32> @shuf_zext_8i16_to_8i32(<8 x i16> %A) nounwind uwtable readnone ssp { 1565; SSE2-LABEL: shuf_zext_8i16_to_8i32: 1566; SSE2: # BB#0: # %entry 1567; SSE2-NEXT: movdqa %xmm0, %xmm1 1568; SSE2-NEXT: pxor %xmm2, %xmm2 1569; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1570; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1571; SSE2-NEXT: retq 1572; 1573; SSSE3-LABEL: shuf_zext_8i16_to_8i32: 1574; SSSE3: # BB#0: # %entry 1575; SSSE3-NEXT: movdqa %xmm0, %xmm1 1576; SSSE3-NEXT: pxor %xmm2, %xmm2 1577; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1578; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1579; SSSE3-NEXT: retq 1580; 1581; SSE41-LABEL: shuf_zext_8i16_to_8i32: 1582; SSE41: # BB#0: # %entry 1583; SSE41-NEXT: movdqa %xmm0, %xmm1 1584; SSE41-NEXT: pxor %xmm2, %xmm2 1585; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 1586; SSE41-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1587; SSE41-NEXT: retq 1588; 1589; AVX1-LABEL: shuf_zext_8i16_to_8i32: 1590; AVX1: # BB#0: # %entry 1591; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1592; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1593; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1594; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 1595; AVX1-NEXT: retq 1596; 1597; AVX2-LABEL: shuf_zext_8i16_to_8i32: 1598; AVX2: # BB#0: # %entry 1599; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1600; AVX2-NEXT: retq 1601; 1602; AVX512-LABEL: shuf_zext_8i16_to_8i32: 1603; AVX512: # BB#0: # %entry 1604; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1605; AVX512-NEXT: retq 1606entry: 1607 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 0, i32 8, i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8> 1608 %Z = bitcast <16 x i16> %B to <8 x i32> 1609 ret <8 x i32> %Z 1610} 1611 1612define <4 x i64> @shuf_zext_4i32_to_4i64(<4 x i32> %A) nounwind uwtable readnone ssp { 1613; SSE2-LABEL: shuf_zext_4i32_to_4i64: 1614; SSE2: # BB#0: # %entry 1615; SSE2-NEXT: movdqa %xmm0, %xmm1 1616; SSE2-NEXT: pxor %xmm2, %xmm2 1617; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1618; SSE2-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1619; SSE2-NEXT: retq 1620; 1621; SSSE3-LABEL: shuf_zext_4i32_to_4i64: 1622; SSSE3: # BB#0: # %entry 1623; SSSE3-NEXT: movdqa %xmm0, %xmm1 1624; SSSE3-NEXT: pxor %xmm2, %xmm2 1625; SSSE3-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1] 1626; SSSE3-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1627; SSSE3-NEXT: retq 1628; 1629; SSE41-LABEL: shuf_zext_4i32_to_4i64: 1630; SSE41: # BB#0: # %entry 1631; SSE41-NEXT: movdqa %xmm0, %xmm1 1632; SSE41-NEXT: pxor %xmm2, %xmm2 1633; SSE41-NEXT: pmovzxdq {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero 1634; SSE41-NEXT: punpckhdq {{.*#+}} xmm1 = xmm1[2],xmm2[2],xmm1[3],xmm2[3] 1635; SSE41-NEXT: retq 1636; 1637; AVX1-LABEL: shuf_zext_4i32_to_4i64: 1638; AVX1: # BB#0: # %entry 1639; AVX1-NEXT: vpmovzxdq {{.*#+}} xmm1 = xmm0[0],zero,xmm0[1],zero 1640; AVX1-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1641; AVX1-NEXT: vblendpd {{.*#+}} xmm0 = xmm2[0],xmm0[1] 1642; AVX1-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,0,3,0] 1643; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1644; AVX1-NEXT: retq 1645; 1646; AVX2-LABEL: shuf_zext_4i32_to_4i64: 1647; AVX2: # BB#0: # %entry 1648; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1649; AVX2-NEXT: retq 1650; 1651; AVX512-LABEL: shuf_zext_4i32_to_4i64: 1652; AVX512: # BB#0: # %entry 1653; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1654; AVX512-NEXT: retq 1655entry: 1656 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 0, i32 4, i32 1, i32 4, i32 2, i32 4, i32 3, i32 4> 1657 %Z = bitcast <8 x i32> %B to <4 x i64> 1658 ret <4 x i64> %Z 1659} 1660 1661define <8 x i32> @shuf_zext_8i8_to_8i32(<8 x i8> %A) { 1662; SSE2-LABEL: shuf_zext_8i8_to_8i32: 1663; SSE2: # BB#0: # %entry 1664; SSE2-NEXT: movdqa %xmm0, %xmm1 1665; SSE2-NEXT: pand {{.*}}(%rip), %xmm1 1666; SSE2-NEXT: packuswb %xmm1, %xmm1 1667; SSE2-NEXT: pxor %xmm2, %xmm2 1668; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1669; SSE2-NEXT: movdqa %xmm1, %xmm0 1670; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1671; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1672; SSE2-NEXT: retq 1673; 1674; SSSE3-LABEL: shuf_zext_8i8_to_8i32: 1675; SSSE3: # BB#0: # %entry 1676; SSSE3-NEXT: movdqa %xmm0, %xmm1 1677; SSSE3-NEXT: pand {{.*}}(%rip), %xmm1 1678; SSSE3-NEXT: pxor %xmm2, %xmm2 1679; SSSE3-NEXT: movdqa %xmm1, %xmm0 1680; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1681; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1682; SSSE3-NEXT: retq 1683; 1684; SSE41-LABEL: shuf_zext_8i8_to_8i32: 1685; SSE41: # BB#0: # %entry 1686; SSE41-NEXT: pshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 1687; SSE41-NEXT: pmovzxbd {{.*#+}} xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1688; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 1689; SSE41-NEXT: pmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1690; SSE41-NEXT: movdqa %xmm2, %xmm0 1691; SSE41-NEXT: retq 1692; 1693; AVX1-LABEL: shuf_zext_8i8_to_8i32: 1694; AVX1: # BB#0: # %entry 1695; AVX1-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 1696; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1697; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,1,2,3] 1698; AVX1-NEXT: vpmovzxbd {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1699; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1700; AVX1-NEXT: retq 1701; 1702; AVX2-LABEL: shuf_zext_8i8_to_8i32: 1703; AVX2: # BB#0: # %entry 1704; AVX2-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 1705; AVX2-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1706; AVX2-NEXT: retq 1707; 1708; AVX512-LABEL: shuf_zext_8i8_to_8i32: 1709; AVX512: # BB#0: # %entry 1710; AVX512-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14,u,u,u,u,u,u,u,u] 1711; AVX512-NEXT: vpmovzxbd {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero 1712; AVX512-NEXT: retq 1713entry: 1714 %B = shufflevector <8 x i8> %A, <8 x i8> zeroinitializer, <32 x i32> <i32 0, i32 8, i32 8, i32 8, i32 1, i32 8, i32 8, i32 8, i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8, i32 6, i32 8, i32 8, i32 8, i32 7, i32 8, i32 8, i32 8> 1715 %Z = bitcast <32 x i8> %B to <8 x i32> 1716 ret <8 x i32> %Z 1717} 1718 1719define <2 x i64> @shuf_zext_16i8_to_2i64_offset6(<16 x i8> %A) nounwind uwtable readnone ssp { 1720; SSE2-LABEL: shuf_zext_16i8_to_2i64_offset6: 1721; SSE2: # BB#0: # %entry 1722; SSE2-NEXT: pxor %xmm1, %xmm1 1723; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1724; SSE2-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 1725; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1726; SSE2-NEXT: retq 1727; 1728; SSSE3-LABEL: shuf_zext_16i8_to_2i64_offset6: 1729; SSSE3: # BB#0: # %entry 1730; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero 1731; SSSE3-NEXT: retq 1732; 1733; SSE41-LABEL: shuf_zext_16i8_to_2i64_offset6: 1734; SSE41: # BB#0: # %entry 1735; SSE41-NEXT: psrlq $48, %xmm0 1736; SSE41-NEXT: pmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1737; SSE41-NEXT: retq 1738; 1739; AVX-LABEL: shuf_zext_16i8_to_2i64_offset6: 1740; AVX: # BB#0: # %entry 1741; AVX-NEXT: vpsrlq $48, %xmm0, %xmm0 1742; AVX-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1743; AVX-NEXT: retq 1744entry: 1745 %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <16 x i32> <i32 6, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 1746 %Z = bitcast <16 x i8> %B to <2 x i64> 1747 ret <2 x i64> %Z 1748} 1749 1750define <4 x i64> @shuf_zext_16i8_to_4i64_offset11(<16 x i8> %A) nounwind uwtable readnone ssp { 1751; SSE2-LABEL: shuf_zext_16i8_to_4i64_offset11: 1752; SSE2: # BB#0: # %entry 1753; SSE2-NEXT: movdqa %xmm0, %xmm1 1754; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero 1755; SSE2-NEXT: pxor %xmm2, %xmm2 1756; SSE2-NEXT: punpckhbw {{.*#+}} xmm1 = xmm1[8],xmm2[8],xmm1[9],xmm2[9],xmm1[10],xmm2[10],xmm1[11],xmm2[11],xmm1[12],xmm2[12],xmm1[13],xmm2[13],xmm1[14],xmm2[14],xmm1[15],xmm2[15] 1757; SSE2-NEXT: movdqa %xmm1, %xmm0 1758; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1759; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1760; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1761; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1762; SSE2-NEXT: retq 1763; 1764; SSSE3-LABEL: shuf_zext_16i8_to_4i64_offset11: 1765; SSSE3: # BB#0: # %entry 1766; SSSE3-NEXT: movdqa %xmm0, %xmm1 1767; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[11],zero,zero,zero,zero,zero,zero,zero,xmm0[12],zero,zero,zero,zero,zero,zero,zero 1768; SSSE3-NEXT: pshufb {{.*#+}} xmm1 = xmm1[13],zero,zero,zero,zero,zero,zero,zero,xmm1[14],zero,zero,zero,zero,zero,zero,zero 1769; SSSE3-NEXT: retq 1770; 1771; SSE41-LABEL: shuf_zext_16i8_to_4i64_offset11: 1772; SSE41: # BB#0: # %entry 1773; SSE41-NEXT: movdqa %xmm0, %xmm1 1774; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1775; SSE41-NEXT: pmovzxbq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 1776; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1777; SSE41-NEXT: pmovzxbq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1778; SSE41-NEXT: movdqa %xmm2, %xmm0 1779; SSE41-NEXT: retq 1780; 1781; AVX1-LABEL: shuf_zext_16i8_to_4i64_offset11: 1782; AVX1: # BB#0: # %entry 1783; AVX1-NEXT: vpsrldq {{.*#+}} xmm1 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1784; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero 1785; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1786; AVX1-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero 1787; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1788; AVX1-NEXT: retq 1789; 1790; AVX2-LABEL: shuf_zext_16i8_to_4i64_offset11: 1791; AVX2: # BB#0: # %entry 1792; AVX2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1793; AVX2-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 1794; AVX2-NEXT: retq 1795; 1796; AVX512-LABEL: shuf_zext_16i8_to_4i64_offset11: 1797; AVX512: # BB#0: # %entry 1798; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1799; AVX512-NEXT: vpmovzxbq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero 1800; AVX512-NEXT: retq 1801entry: 1802 %B = shufflevector <16 x i8> %A, <16 x i8> zeroinitializer, <32 x i32> <i32 11, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16, i32 16> 1803 %Z = bitcast <32 x i8> %B to <4 x i64> 1804 ret <4 x i64> %Z 1805} 1806 1807define <2 x i64> @shuf_zext_8i16_to_2i64_offset6(<8 x i16> %A) nounwind uwtable readnone ssp { 1808; SSE2-LABEL: shuf_zext_8i16_to_2i64_offset6: 1809; SSE2: # BB#0: # %entry 1810; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1811; SSE2-NEXT: pxor %xmm1, %xmm1 1812; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1813; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1814; SSE2-NEXT: retq 1815; 1816; SSSE3-LABEL: shuf_zext_8i16_to_2i64_offset6: 1817; SSSE3: # BB#0: # %entry 1818; SSSE3-NEXT: pshufb {{.*#+}} xmm0 = xmm0[6,7],zero,zero,zero,zero,zero,zero,xmm0[8,9],zero,zero,zero,zero,zero,zero 1819; SSSE3-NEXT: retq 1820; 1821; SSE41-LABEL: shuf_zext_8i16_to_2i64_offset6: 1822; SSE41: # BB#0: # %entry 1823; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1824; SSE41-NEXT: pmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1825; SSE41-NEXT: retq 1826; 1827; AVX-LABEL: shuf_zext_8i16_to_2i64_offset6: 1828; AVX: # BB#0: # %entry 1829; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1830; AVX-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1831; AVX-NEXT: retq 1832entry: 1833 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8> 1834 %Z = bitcast <8 x i16> %B to <2 x i64> 1835 ret <2 x i64> %Z 1836} 1837 1838define <4 x i64> @shuf_zext_8i16_to_4i64_offset2(<8 x i16> %A) nounwind uwtable readnone ssp { 1839; SSE2-LABEL: shuf_zext_8i16_to_4i64_offset2: 1840; SSE2: # BB#0: # %entry 1841; SSE2-NEXT: movdqa %xmm0, %xmm1 1842; SSE2-NEXT: pxor %xmm2, %xmm2 1843; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1844; SSE2-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1845; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1846; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1847; SSE2-NEXT: retq 1848; 1849; SSSE3-LABEL: shuf_zext_8i16_to_4i64_offset2: 1850; SSSE3: # BB#0: # %entry 1851; SSSE3-NEXT: movdqa %xmm0, %xmm1 1852; SSSE3-NEXT: pxor %xmm2, %xmm2 1853; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1854; SSSE3-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1855; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1856; SSSE3-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 1857; SSSE3-NEXT: retq 1858; 1859; SSE41-LABEL: shuf_zext_8i16_to_4i64_offset2: 1860; SSE41: # BB#0: # %entry 1861; SSE41-NEXT: pshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1862; SSE41-NEXT: pmovzxwq {{.*#+}} xmm2 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 1863; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1864; SSE41-NEXT: pmovzxwq {{.*#+}} xmm1 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1865; SSE41-NEXT: movdqa %xmm2, %xmm0 1866; SSE41-NEXT: retq 1867; 1868; AVX1-LABEL: shuf_zext_8i16_to_4i64_offset2: 1869; AVX1: # BB#0: # %entry 1870; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[1,1,2,3] 1871; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,xmm1[1],zero,zero,zero 1872; AVX1-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[2,3,0,1] 1873; AVX1-NEXT: vpmovzxwq {{.*#+}} xmm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero 1874; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1875; AVX1-NEXT: retq 1876; 1877; AVX2-LABEL: shuf_zext_8i16_to_4i64_offset2: 1878; AVX2: # BB#0: # %entry 1879; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3] 1880; AVX2-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1881; AVX2-NEXT: retq 1882; 1883; AVX512-LABEL: shuf_zext_8i16_to_4i64_offset2: 1884; AVX512: # BB#0: # %entry 1885; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,2,3] 1886; AVX512-NEXT: vpmovzxwq {{.*#+}} ymm0 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero 1887; AVX512-NEXT: retq 1888entry: 1889 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 2, i32 8, i32 8, i32 8, i32 3, i32 8, i32 8, i32 8, i32 4, i32 8, i32 8, i32 8, i32 5, i32 8, i32 8, i32 8> 1890 %Z = bitcast <16 x i16> %B to <4 x i64> 1891 ret <4 x i64> %Z 1892} 1893 1894define <4 x i32> @shuf_zext_8i16_to_4i32_offset1(<8 x i16> %A) nounwind uwtable readnone ssp { 1895; SSE2-LABEL: shuf_zext_8i16_to_4i32_offset1: 1896; SSE2: # BB#0: # %entry 1897; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1898; SSE2-NEXT: pxor %xmm1, %xmm1 1899; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1900; SSE2-NEXT: retq 1901; 1902; SSSE3-LABEL: shuf_zext_8i16_to_4i32_offset1: 1903; SSSE3: # BB#0: # %entry 1904; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1905; SSSE3-NEXT: pxor %xmm1, %xmm1 1906; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3] 1907; SSSE3-NEXT: retq 1908; 1909; SSE41-LABEL: shuf_zext_8i16_to_4i32_offset1: 1910; SSE41: # BB#0: # %entry 1911; SSE41-NEXT: psrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1912; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1913; SSE41-NEXT: retq 1914; 1915; AVX-LABEL: shuf_zext_8i16_to_4i32_offset1: 1916; AVX: # BB#0: # %entry 1917; AVX-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zero,zero 1918; AVX-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 1919; AVX-NEXT: retq 1920entry: 1921 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <8 x i32> <i32 1, i32 8, i32 2, i32 8, i32 3, i32 8, i32 4, i32 8> 1922 %Z = bitcast <8 x i16> %B to <4 x i32> 1923 ret <4 x i32> %Z 1924} 1925 1926define <8 x i32> @shuf_zext_8i16_to_8i32_offset3(<8 x i16> %A) nounwind uwtable readnone ssp { 1927; SSE2-LABEL: shuf_zext_8i16_to_8i32_offset3: 1928; SSE2: # BB#0: # %entry 1929; SSE2-NEXT: movdqa %xmm0, %xmm1 1930; SSE2-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1931; SSE2-NEXT: pxor %xmm2, %xmm2 1932; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1933; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1934; SSE2-NEXT: retq 1935; 1936; SSSE3-LABEL: shuf_zext_8i16_to_8i32_offset3: 1937; SSSE3: # BB#0: # %entry 1938; SSSE3-NEXT: movdqa %xmm0, %xmm1 1939; SSSE3-NEXT: psrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1940; SSSE3-NEXT: pxor %xmm2, %xmm2 1941; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1942; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1943; SSSE3-NEXT: retq 1944; 1945; SSE41-LABEL: shuf_zext_8i16_to_8i32_offset3: 1946; SSE41: # BB#0: # %entry 1947; SSE41-NEXT: movdqa %xmm0, %xmm1 1948; SSE41-NEXT: pslldq {{.*#+}} xmm0 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1949; SSE41-NEXT: pxor %xmm2, %xmm2 1950; SSE41-NEXT: punpckhwd {{.*#+}} xmm0 = xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 1951; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1952; SSE41-NEXT: retq 1953; 1954; AVX1-LABEL: shuf_zext_8i16_to_8i32_offset3: 1955; AVX1: # BB#0: # %entry 1956; AVX1-NEXT: vpslldq {{.*#+}} xmm1 = zero,zero,xmm0[0,1,2,3,4,5,6,7,8,9,10,11,12,13] 1957; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 1958; AVX1-NEXT: vpunpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1959; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 1960; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1961; AVX1-NEXT: retq 1962; 1963; AVX2-LABEL: shuf_zext_8i16_to_8i32_offset3: 1964; AVX2: # BB#0: # %entry 1965; AVX2-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1966; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1967; AVX2-NEXT: retq 1968; 1969; AVX512-LABEL: shuf_zext_8i16_to_8i32_offset3: 1970; AVX512: # BB#0: # %entry 1971; AVX512-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero 1972; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 1973; AVX512-NEXT: retq 1974entry: 1975 %B = shufflevector <8 x i16> %A, <8 x i16> zeroinitializer, <16 x i32> <i32 3, i32 8, i32 4, i32 8, i32 5, i32 8, i32 6, i32 8, i32 7, i32 8, i32 undef, i32 8, i32 undef, i32 8, i32 undef, i32 8> 1976 %Z = bitcast <16 x i16> %B to <8 x i32> 1977 ret <8 x i32> %Z 1978} 1979 1980define <8 x i32> @shuf_zext_16i16_to_8i32_offset8(<16 x i16> %A) nounwind uwtable readnone ssp { 1981; SSE2-LABEL: shuf_zext_16i16_to_8i32_offset8: 1982; SSE2: # BB#0: # %entry 1983; SSE2-NEXT: pxor %xmm2, %xmm2 1984; SSE2-NEXT: movdqa %xmm1, %xmm0 1985; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1986; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1987; SSE2-NEXT: retq 1988; 1989; SSSE3-LABEL: shuf_zext_16i16_to_8i32_offset8: 1990; SSSE3: # BB#0: # %entry 1991; SSSE3-NEXT: pxor %xmm2, %xmm2 1992; SSSE3-NEXT: movdqa %xmm1, %xmm0 1993; SSSE3-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 1994; SSSE3-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 1995; SSSE3-NEXT: retq 1996; 1997; SSE41-LABEL: shuf_zext_16i16_to_8i32_offset8: 1998; SSE41: # BB#0: # %entry 1999; SSE41-NEXT: pshufd {{.*#+}} xmm0 = xmm1[2,2,3,3] 2000; SSE41-NEXT: pxor %xmm2, %xmm2 2001; SSE41-NEXT: pblendw {{.*#+}} xmm2 = xmm0[0],xmm2[1,2,3],xmm0[4],xmm2[5,6,7] 2002; SSE41-NEXT: pmovzxwd {{.*#+}} xmm0 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero 2003; SSE41-NEXT: movdqa %xmm2, %xmm1 2004; SSE41-NEXT: retq 2005; 2006; AVX1-LABEL: shuf_zext_16i16_to_8i32_offset8: 2007; AVX1: # BB#0: # %entry 2008; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 2009; AVX1-NEXT: vpshufd {{.*#+}} xmm1 = xmm0[2,2,3,3] 2010; AVX1-NEXT: vpxor %xmm2, %xmm2, %xmm2 2011; AVX1-NEXT: vpblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1,2,3],xmm1[4],xmm2[5,6,7] 2012; AVX1-NEXT: vpmovzxwd {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2013; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 2014; AVX1-NEXT: retq 2015; 2016; AVX2-LABEL: shuf_zext_16i16_to_8i32_offset8: 2017; AVX2: # BB#0: # %entry 2018; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 2019; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2020; AVX2-NEXT: retq 2021; 2022; AVX512-LABEL: shuf_zext_16i16_to_8i32_offset8: 2023; AVX512: # BB#0: # %entry 2024; AVX512-NEXT: vextracti128 $1, %ymm0, %xmm0 2025; AVX512-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 2026; AVX512-NEXT: retq 2027entry: 2028 %B = shufflevector <16 x i16> %A, <16 x i16> zeroinitializer, <16 x i32> <i32 8, i32 16, i32 9, i32 16, i32 10, i32 16, i32 11, i32 16, i32 12, i32 16, i32 undef, i32 16, i32 14, i32 16, i32 undef, i32 16> 2029 %Z = bitcast <16 x i16> %B to <8 x i32> 2030 ret <8 x i32> %Z 2031} 2032 2033define <2 x i64> @shuf_zext_4i32_to_2i64_offset2(<4 x i32> %A) nounwind uwtable readnone ssp { 2034; SSE-LABEL: shuf_zext_4i32_to_2i64_offset2: 2035; SSE: # BB#0: # %entry 2036; SSE-NEXT: pxor %xmm1, %xmm1 2037; SSE-NEXT: punpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2038; SSE-NEXT: retq 2039; 2040; AVX-LABEL: shuf_zext_4i32_to_2i64_offset2: 2041; AVX: # BB#0: # %entry 2042; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 2043; AVX-NEXT: vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3] 2044; AVX-NEXT: retq 2045entry: 2046 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <4 x i32> <i32 2, i32 4, i32 3, i32 4> 2047 %Z = bitcast <4 x i32> %B to <2 x i64> 2048 ret <2 x i64> %Z 2049} 2050 2051define <4 x i64> @shuf_zext_4i32_to_4i64_offset1(<4 x i32> %A) nounwind uwtable readnone ssp { 2052; SSE2-LABEL: shuf_zext_4i32_to_4i64_offset1: 2053; SSE2: # BB#0: # %entry 2054; SSE2-NEXT: movdqa %xmm0, %xmm1 2055; SSE2-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0] 2056; SSE2-NEXT: pand %xmm1, %xmm0 2057; SSE2-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2058; SSE2-NEXT: retq 2059; 2060; SSSE3-LABEL: shuf_zext_4i32_to_4i64_offset1: 2061; SSSE3: # BB#0: # %entry 2062; SSSE3-NEXT: movdqa %xmm0, %xmm1 2063; SSSE3-NEXT: movdqa {{.*#+}} xmm0 = [0,0,4294967295,0] 2064; SSSE3-NEXT: pand %xmm1, %xmm0 2065; SSSE3-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2066; SSSE3-NEXT: retq 2067; 2068; SSE41-LABEL: shuf_zext_4i32_to_4i64_offset1: 2069; SSE41: # BB#0: # %entry 2070; SSE41-NEXT: movdqa %xmm0, %xmm1 2071; SSE41-NEXT: pxor %xmm0, %xmm0 2072; SSE41-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0,1,2,3],xmm1[4,5],xmm0[6,7] 2073; SSE41-NEXT: psrldq {{.*#+}} xmm1 = xmm1[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2074; SSE41-NEXT: retq 2075; 2076; AVX1-LABEL: shuf_zext_4i32_to_4i64_offset1: 2077; AVX1: # BB#0: # %entry 2078; AVX1-NEXT: vxorps %xmm1, %xmm1, %xmm1 2079; AVX1-NEXT: vblendps {{.*#+}} xmm1 = xmm1[0,1],xmm0[2],xmm1[3] 2080; AVX1-NEXT: vpsrldq {{.*#+}} xmm0 = xmm0[12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero 2081; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 2082; AVX1-NEXT: retq 2083; 2084; AVX2-LABEL: shuf_zext_4i32_to_4i64_offset1: 2085; AVX2: # BB#0: # %entry 2086; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3] 2087; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2088; AVX2-NEXT: retq 2089; 2090; AVX512-LABEL: shuf_zext_4i32_to_4i64_offset1: 2091; AVX512: # BB#0: # %entry 2092; AVX512-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[1,2,3,3] 2093; AVX512-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero 2094; AVX512-NEXT: retq 2095entry: 2096 %B = shufflevector <4 x i32> %A, <4 x i32> zeroinitializer, <8 x i32> <i32 undef, i32 4, i32 2, i32 4, i32 3, i32 4, i32 undef, i32 4> 2097 %Z = bitcast <8 x i32> %B to <4 x i64> 2098 ret <4 x i64> %Z 2099} 2100