1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 5 6; 7; PR6455 'Clear Upper Bits' Patterns 8; 9 10define <2 x i64> @_clearupper2xi64a(<2 x i64>) nounwind { 11; SSE-LABEL: _clearupper2xi64a: 12; SSE: # BB#0: 13; SSE-NEXT: andps {{.*}}(%rip), %xmm0 14; SSE-NEXT: retq 15; 16; AVX1-LABEL: _clearupper2xi64a: 17; AVX1: # BB#0: 18; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 19; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 20; AVX1-NEXT: retq 21; 22; AVX2-LABEL: _clearupper2xi64a: 23; AVX2: # BB#0: 24; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 25; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 26; AVX2-NEXT: retq 27 %x0 = extractelement <2 x i64> %0, i32 0 28 %x1 = extractelement <2 x i64> %0, i32 1 29 %trunc0 = trunc i64 %x0 to i32 30 %trunc1 = trunc i64 %x1 to i32 31 %ext0 = zext i32 %trunc0 to i64 32 %ext1 = zext i32 %trunc1 to i64 33 %v0 = insertelement <2 x i64> undef, i64 %ext0, i32 0 34 %v1 = insertelement <2 x i64> %v0, i64 %ext1, i32 1 35 ret <2 x i64> %v1 36} 37 38define <4 x i64> @_clearupper4xi64a(<4 x i64>) nounwind { 39; SSE-LABEL: _clearupper4xi64a: 40; SSE: # BB#0: 41; SSE-NEXT: movaps {{.*#+}} xmm2 = [4294967295,4294967295] 42; SSE-NEXT: andps %xmm2, %xmm0 43; SSE-NEXT: andps %xmm2, %xmm1 44; SSE-NEXT: retq 45; 46; AVX1-LABEL: _clearupper4xi64a: 47; AVX1: # BB#0: 48; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1 49; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 50; AVX1-NEXT: retq 51; 52; AVX2-LABEL: _clearupper4xi64a: 53; AVX2: # BB#0: 54; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 55; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 56; AVX2-NEXT: retq 57 %x0 = extractelement <4 x i64> %0, i32 0 58 %x1 = extractelement <4 x i64> %0, i32 1 59 %x2 = extractelement <4 x i64> %0, i32 2 60 %x3 = extractelement <4 x i64> %0, i32 3 61 %trunc0 = trunc i64 %x0 to i32 62 %trunc1 = trunc i64 %x1 to i32 63 %trunc2 = trunc i64 %x2 to i32 64 %trunc3 = trunc i64 %x3 to i32 65 %ext0 = zext i32 %trunc0 to i64 66 %ext1 = zext i32 %trunc1 to i64 67 %ext2 = zext i32 %trunc2 to i64 68 %ext3 = zext i32 %trunc3 to i64 69 %v0 = insertelement <4 x i64> undef, i64 %ext0, i32 0 70 %v1 = insertelement <4 x i64> %v0, i64 %ext1, i32 1 71 %v2 = insertelement <4 x i64> %v1, i64 %ext2, i32 2 72 %v3 = insertelement <4 x i64> %v2, i64 %ext3, i32 3 73 ret <4 x i64> %v3 74} 75 76define <4 x i32> @_clearupper4xi32a(<4 x i32>) nounwind { 77; SSE-LABEL: _clearupper4xi32a: 78; SSE: # BB#0: 79; SSE-NEXT: andps {{.*}}(%rip), %xmm0 80; SSE-NEXT: retq 81; 82; AVX-LABEL: _clearupper4xi32a: 83; AVX: # BB#0: 84; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 85; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 86; AVX-NEXT: retq 87 %x0 = extractelement <4 x i32> %0, i32 0 88 %x1 = extractelement <4 x i32> %0, i32 1 89 %x2 = extractelement <4 x i32> %0, i32 2 90 %x3 = extractelement <4 x i32> %0, i32 3 91 %trunc0 = trunc i32 %x0 to i16 92 %trunc1 = trunc i32 %x1 to i16 93 %trunc2 = trunc i32 %x2 to i16 94 %trunc3 = trunc i32 %x3 to i16 95 %ext0 = zext i16 %trunc0 to i32 96 %ext1 = zext i16 %trunc1 to i32 97 %ext2 = zext i16 %trunc2 to i32 98 %ext3 = zext i16 %trunc3 to i32 99 %v0 = insertelement <4 x i32> undef, i32 %ext0, i32 0 100 %v1 = insertelement <4 x i32> %v0, i32 %ext1, i32 1 101 %v2 = insertelement <4 x i32> %v1, i32 %ext2, i32 2 102 %v3 = insertelement <4 x i32> %v2, i32 %ext3, i32 3 103 ret <4 x i32> %v3 104} 105 106define <8 x i32> @_clearupper8xi32a(<8 x i32>) nounwind { 107; SSE-LABEL: _clearupper8xi32a: 108; SSE: # BB#0: 109; SSE-NEXT: movaps {{.*#+}} xmm2 = [65535,65535,65535,65535] 110; SSE-NEXT: andps %xmm2, %xmm0 111; SSE-NEXT: andps %xmm2, %xmm1 112; SSE-NEXT: retq 113; 114; AVX1-LABEL: _clearupper8xi32a: 115; AVX1: # BB#0: 116; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 117; AVX1-NEXT: retq 118; 119; AVX2-LABEL: _clearupper8xi32a: 120; AVX2: # BB#0: 121; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 122; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 123; AVX2-NEXT: retq 124 %x0 = extractelement <8 x i32> %0, i32 0 125 %x1 = extractelement <8 x i32> %0, i32 1 126 %x2 = extractelement <8 x i32> %0, i32 2 127 %x3 = extractelement <8 x i32> %0, i32 3 128 %x4 = extractelement <8 x i32> %0, i32 4 129 %x5 = extractelement <8 x i32> %0, i32 5 130 %x6 = extractelement <8 x i32> %0, i32 6 131 %x7 = extractelement <8 x i32> %0, i32 7 132 %trunc0 = trunc i32 %x0 to i16 133 %trunc1 = trunc i32 %x1 to i16 134 %trunc2 = trunc i32 %x2 to i16 135 %trunc3 = trunc i32 %x3 to i16 136 %trunc4 = trunc i32 %x4 to i16 137 %trunc5 = trunc i32 %x5 to i16 138 %trunc6 = trunc i32 %x6 to i16 139 %trunc7 = trunc i32 %x7 to i16 140 %ext0 = zext i16 %trunc0 to i32 141 %ext1 = zext i16 %trunc1 to i32 142 %ext2 = zext i16 %trunc2 to i32 143 %ext3 = zext i16 %trunc3 to i32 144 %ext4 = zext i16 %trunc4 to i32 145 %ext5 = zext i16 %trunc5 to i32 146 %ext6 = zext i16 %trunc6 to i32 147 %ext7 = zext i16 %trunc7 to i32 148 %v0 = insertelement <8 x i32> undef, i32 %ext0, i32 0 149 %v1 = insertelement <8 x i32> %v0, i32 %ext1, i32 1 150 %v2 = insertelement <8 x i32> %v1, i32 %ext2, i32 2 151 %v3 = insertelement <8 x i32> %v2, i32 %ext3, i32 3 152 %v4 = insertelement <8 x i32> %v3, i32 %ext4, i32 4 153 %v5 = insertelement <8 x i32> %v4, i32 %ext5, i32 5 154 %v6 = insertelement <8 x i32> %v5, i32 %ext6, i32 6 155 %v7 = insertelement <8 x i32> %v6, i32 %ext7, i32 7 156 ret <8 x i32> %v7 157} 158 159define <8 x i16> @_clearupper8xi16a(<8 x i16>) nounwind { 160; SSE-LABEL: _clearupper8xi16a: 161; SSE: # BB#0: 162; SSE-NEXT: andps {{.*}}(%rip), %xmm0 163; SSE-NEXT: retq 164; 165; AVX-LABEL: _clearupper8xi16a: 166; AVX: # BB#0: 167; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 168; AVX-NEXT: retq 169 %x0 = extractelement <8 x i16> %0, i32 0 170 %x1 = extractelement <8 x i16> %0, i32 1 171 %x2 = extractelement <8 x i16> %0, i32 2 172 %x3 = extractelement <8 x i16> %0, i32 3 173 %x4 = extractelement <8 x i16> %0, i32 4 174 %x5 = extractelement <8 x i16> %0, i32 5 175 %x6 = extractelement <8 x i16> %0, i32 6 176 %x7 = extractelement <8 x i16> %0, i32 7 177 %trunc0 = trunc i16 %x0 to i8 178 %trunc1 = trunc i16 %x1 to i8 179 %trunc2 = trunc i16 %x2 to i8 180 %trunc3 = trunc i16 %x3 to i8 181 %trunc4 = trunc i16 %x4 to i8 182 %trunc5 = trunc i16 %x5 to i8 183 %trunc6 = trunc i16 %x6 to i8 184 %trunc7 = trunc i16 %x7 to i8 185 %ext0 = zext i8 %trunc0 to i16 186 %ext1 = zext i8 %trunc1 to i16 187 %ext2 = zext i8 %trunc2 to i16 188 %ext3 = zext i8 %trunc3 to i16 189 %ext4 = zext i8 %trunc4 to i16 190 %ext5 = zext i8 %trunc5 to i16 191 %ext6 = zext i8 %trunc6 to i16 192 %ext7 = zext i8 %trunc7 to i16 193 %v0 = insertelement <8 x i16> undef, i16 %ext0, i32 0 194 %v1 = insertelement <8 x i16> %v0, i16 %ext1, i32 1 195 %v2 = insertelement <8 x i16> %v1, i16 %ext2, i32 2 196 %v3 = insertelement <8 x i16> %v2, i16 %ext3, i32 3 197 %v4 = insertelement <8 x i16> %v3, i16 %ext4, i32 4 198 %v5 = insertelement <8 x i16> %v4, i16 %ext5, i32 5 199 %v6 = insertelement <8 x i16> %v5, i16 %ext6, i32 6 200 %v7 = insertelement <8 x i16> %v6, i16 %ext7, i32 7 201 ret <8 x i16> %v7 202} 203 204define <16 x i16> @_clearupper16xi16a(<16 x i16>) nounwind { 205; SSE-LABEL: _clearupper16xi16a: 206; SSE: # BB#0: 207; SSE-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 208; SSE-NEXT: andps %xmm2, %xmm0 209; SSE-NEXT: andps %xmm2, %xmm1 210; SSE-NEXT: retq 211; 212; AVX-LABEL: _clearupper16xi16a: 213; AVX: # BB#0: 214; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 215; AVX-NEXT: retq 216 %x0 = extractelement <16 x i16> %0, i32 0 217 %x1 = extractelement <16 x i16> %0, i32 1 218 %x2 = extractelement <16 x i16> %0, i32 2 219 %x3 = extractelement <16 x i16> %0, i32 3 220 %x4 = extractelement <16 x i16> %0, i32 4 221 %x5 = extractelement <16 x i16> %0, i32 5 222 %x6 = extractelement <16 x i16> %0, i32 6 223 %x7 = extractelement <16 x i16> %0, i32 7 224 %x8 = extractelement <16 x i16> %0, i32 8 225 %x9 = extractelement <16 x i16> %0, i32 9 226 %x10 = extractelement <16 x i16> %0, i32 10 227 %x11 = extractelement <16 x i16> %0, i32 11 228 %x12 = extractelement <16 x i16> %0, i32 12 229 %x13 = extractelement <16 x i16> %0, i32 13 230 %x14 = extractelement <16 x i16> %0, i32 14 231 %x15 = extractelement <16 x i16> %0, i32 15 232 %trunc0 = trunc i16 %x0 to i8 233 %trunc1 = trunc i16 %x1 to i8 234 %trunc2 = trunc i16 %x2 to i8 235 %trunc3 = trunc i16 %x3 to i8 236 %trunc4 = trunc i16 %x4 to i8 237 %trunc5 = trunc i16 %x5 to i8 238 %trunc6 = trunc i16 %x6 to i8 239 %trunc7 = trunc i16 %x7 to i8 240 %trunc8 = trunc i16 %x8 to i8 241 %trunc9 = trunc i16 %x9 to i8 242 %trunc10 = trunc i16 %x10 to i8 243 %trunc11 = trunc i16 %x11 to i8 244 %trunc12 = trunc i16 %x12 to i8 245 %trunc13 = trunc i16 %x13 to i8 246 %trunc14 = trunc i16 %x14 to i8 247 %trunc15 = trunc i16 %x15 to i8 248 %ext0 = zext i8 %trunc0 to i16 249 %ext1 = zext i8 %trunc1 to i16 250 %ext2 = zext i8 %trunc2 to i16 251 %ext3 = zext i8 %trunc3 to i16 252 %ext4 = zext i8 %trunc4 to i16 253 %ext5 = zext i8 %trunc5 to i16 254 %ext6 = zext i8 %trunc6 to i16 255 %ext7 = zext i8 %trunc7 to i16 256 %ext8 = zext i8 %trunc8 to i16 257 %ext9 = zext i8 %trunc9 to i16 258 %ext10 = zext i8 %trunc10 to i16 259 %ext11 = zext i8 %trunc11 to i16 260 %ext12 = zext i8 %trunc12 to i16 261 %ext13 = zext i8 %trunc13 to i16 262 %ext14 = zext i8 %trunc14 to i16 263 %ext15 = zext i8 %trunc15 to i16 264 %v0 = insertelement <16 x i16> undef, i16 %ext0, i32 0 265 %v1 = insertelement <16 x i16> %v0, i16 %ext1, i32 1 266 %v2 = insertelement <16 x i16> %v1, i16 %ext2, i32 2 267 %v3 = insertelement <16 x i16> %v2, i16 %ext3, i32 3 268 %v4 = insertelement <16 x i16> %v3, i16 %ext4, i32 4 269 %v5 = insertelement <16 x i16> %v4, i16 %ext5, i32 5 270 %v6 = insertelement <16 x i16> %v5, i16 %ext6, i32 6 271 %v7 = insertelement <16 x i16> %v6, i16 %ext7, i32 7 272 %v8 = insertelement <16 x i16> %v7, i16 %ext8, i32 8 273 %v9 = insertelement <16 x i16> %v8, i16 %ext9, i32 9 274 %v10 = insertelement <16 x i16> %v9, i16 %ext10, i32 10 275 %v11 = insertelement <16 x i16> %v10, i16 %ext11, i32 11 276 %v12 = insertelement <16 x i16> %v11, i16 %ext12, i32 12 277 %v13 = insertelement <16 x i16> %v12, i16 %ext13, i32 13 278 %v14 = insertelement <16 x i16> %v13, i16 %ext14, i32 14 279 %v15 = insertelement <16 x i16> %v14, i16 %ext15, i32 15 280 ret <16 x i16> %v15 281} 282 283define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind { 284; SSE-LABEL: _clearupper16xi8a: 285; SSE: # BB#0: 286; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 287; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 288; SSE-NEXT: movd %eax, %xmm0 289; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 290; SSE-NEXT: movd %eax, %xmm1 291; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 292; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 293; SSE-NEXT: movd %eax, %xmm0 294; SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 295; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 296; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 297; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 298; SSE-NEXT: movd %eax, %xmm0 299; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 300; SSE-NEXT: movd %eax, %xmm3 301; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 302; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 303; SSE-NEXT: movd %eax, %xmm0 304; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 305; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 306; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 307; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 308; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 309; SSE-NEXT: movd %eax, %xmm0 310; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 311; SSE-NEXT: movd %eax, %xmm2 312; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 313; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 314; SSE-NEXT: movd %eax, %xmm0 315; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 316; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 317; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 318; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 319; SSE-NEXT: movd %eax, %xmm0 320; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 321; SSE-NEXT: movd %eax, %xmm2 322; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 323; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 324; SSE-NEXT: movd %eax, %xmm4 325; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 326; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 327; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 328; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 329; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 330; SSE-NEXT: pand {{.*}}(%rip), %xmm0 331; SSE-NEXT: retq 332; 333; AVX-LABEL: _clearupper16xi8a: 334; AVX: # BB#0: 335; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 336; AVX-NEXT: retq 337 %x0 = extractelement <16 x i8> %0, i32 0 338 %x1 = extractelement <16 x i8> %0, i32 1 339 %x2 = extractelement <16 x i8> %0, i32 2 340 %x3 = extractelement <16 x i8> %0, i32 3 341 %x4 = extractelement <16 x i8> %0, i32 4 342 %x5 = extractelement <16 x i8> %0, i32 5 343 %x6 = extractelement <16 x i8> %0, i32 6 344 %x7 = extractelement <16 x i8> %0, i32 7 345 %x8 = extractelement <16 x i8> %0, i32 8 346 %x9 = extractelement <16 x i8> %0, i32 9 347 %x10 = extractelement <16 x i8> %0, i32 10 348 %x11 = extractelement <16 x i8> %0, i32 11 349 %x12 = extractelement <16 x i8> %0, i32 12 350 %x13 = extractelement <16 x i8> %0, i32 13 351 %x14 = extractelement <16 x i8> %0, i32 14 352 %x15 = extractelement <16 x i8> %0, i32 15 353 %trunc0 = trunc i8 %x0 to i4 354 %trunc1 = trunc i8 %x1 to i4 355 %trunc2 = trunc i8 %x2 to i4 356 %trunc3 = trunc i8 %x3 to i4 357 %trunc4 = trunc i8 %x4 to i4 358 %trunc5 = trunc i8 %x5 to i4 359 %trunc6 = trunc i8 %x6 to i4 360 %trunc7 = trunc i8 %x7 to i4 361 %trunc8 = trunc i8 %x8 to i4 362 %trunc9 = trunc i8 %x9 to i4 363 %trunc10 = trunc i8 %x10 to i4 364 %trunc11 = trunc i8 %x11 to i4 365 %trunc12 = trunc i8 %x12 to i4 366 %trunc13 = trunc i8 %x13 to i4 367 %trunc14 = trunc i8 %x14 to i4 368 %trunc15 = trunc i8 %x15 to i4 369 %ext0 = zext i4 %trunc0 to i8 370 %ext1 = zext i4 %trunc1 to i8 371 %ext2 = zext i4 %trunc2 to i8 372 %ext3 = zext i4 %trunc3 to i8 373 %ext4 = zext i4 %trunc4 to i8 374 %ext5 = zext i4 %trunc5 to i8 375 %ext6 = zext i4 %trunc6 to i8 376 %ext7 = zext i4 %trunc7 to i8 377 %ext8 = zext i4 %trunc8 to i8 378 %ext9 = zext i4 %trunc9 to i8 379 %ext10 = zext i4 %trunc10 to i8 380 %ext11 = zext i4 %trunc11 to i8 381 %ext12 = zext i4 %trunc12 to i8 382 %ext13 = zext i4 %trunc13 to i8 383 %ext14 = zext i4 %trunc14 to i8 384 %ext15 = zext i4 %trunc15 to i8 385 %v0 = insertelement <16 x i8> undef, i8 %ext0, i32 0 386 %v1 = insertelement <16 x i8> %v0, i8 %ext1, i32 1 387 %v2 = insertelement <16 x i8> %v1, i8 %ext2, i32 2 388 %v3 = insertelement <16 x i8> %v2, i8 %ext3, i32 3 389 %v4 = insertelement <16 x i8> %v3, i8 %ext4, i32 4 390 %v5 = insertelement <16 x i8> %v4, i8 %ext5, i32 5 391 %v6 = insertelement <16 x i8> %v5, i8 %ext6, i32 6 392 %v7 = insertelement <16 x i8> %v6, i8 %ext7, i32 7 393 %v8 = insertelement <16 x i8> %v7, i8 %ext8, i32 8 394 %v9 = insertelement <16 x i8> %v8, i8 %ext9, i32 9 395 %v10 = insertelement <16 x i8> %v9, i8 %ext10, i32 10 396 %v11 = insertelement <16 x i8> %v10, i8 %ext11, i32 11 397 %v12 = insertelement <16 x i8> %v11, i8 %ext12, i32 12 398 %v13 = insertelement <16 x i8> %v12, i8 %ext13, i32 13 399 %v14 = insertelement <16 x i8> %v13, i8 %ext14, i32 14 400 %v15 = insertelement <16 x i8> %v14, i8 %ext15, i32 15 401 ret <16 x i8> %v15 402} 403 404define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind { 405; SSE-LABEL: _clearupper32xi8a: 406; SSE: # BB#0: 407; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 408; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 409; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 410; SSE-NEXT: movd %eax, %xmm0 411; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 412; SSE-NEXT: movd %eax, %xmm1 413; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 414; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 415; SSE-NEXT: movd %eax, %xmm0 416; SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 417; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 418; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 419; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 420; SSE-NEXT: movd %eax, %xmm0 421; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 422; SSE-NEXT: movd %eax, %xmm3 423; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 424; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 425; SSE-NEXT: movd %eax, %xmm0 426; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 427; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 428; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 429; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 430; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 431; SSE-NEXT: movd %eax, %xmm0 432; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 433; SSE-NEXT: movd %eax, %xmm2 434; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 435; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 436; SSE-NEXT: movd %eax, %xmm0 437; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 438; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 439; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 440; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 441; SSE-NEXT: movd %eax, %xmm0 442; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 443; SSE-NEXT: movd %eax, %xmm2 444; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 445; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 446; SSE-NEXT: movd %eax, %xmm4 447; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 448; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 449; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 450; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 451; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 452; SSE-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 453; SSE-NEXT: pand %xmm2, %xmm0 454; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 455; SSE-NEXT: movd %eax, %xmm1 456; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 457; SSE-NEXT: movd %eax, %xmm3 458; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 459; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 460; SSE-NEXT: movd %eax, %xmm1 461; SSE-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero 462; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] 463; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] 464; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 465; SSE-NEXT: movd %eax, %xmm1 466; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 467; SSE-NEXT: movd %eax, %xmm5 468; SSE-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7] 469; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 470; SSE-NEXT: movd %eax, %xmm1 471; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 472; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 473; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3] 474; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 475; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 476; SSE-NEXT: movd %eax, %xmm1 477; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 478; SSE-NEXT: movd %eax, %xmm4 479; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] 480; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 481; SSE-NEXT: movd %eax, %xmm1 482; SSE-NEXT: movd {{.*#+}} xmm5 = mem[0],zero,zero,zero 483; SSE-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7] 484; SSE-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3] 485; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 486; SSE-NEXT: movd %eax, %xmm1 487; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 488; SSE-NEXT: movd %eax, %xmm4 489; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] 490; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 491; SSE-NEXT: movd %eax, %xmm6 492; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 493; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7] 494; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 495; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1] 496; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 497; SSE-NEXT: pand %xmm2, %xmm1 498; SSE-NEXT: retq 499; 500; AVX-LABEL: _clearupper32xi8a: 501; AVX: # BB#0: 502; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 503; AVX-NEXT: retq 504 %x0 = extractelement <32 x i8> %0, i32 0 505 %x1 = extractelement <32 x i8> %0, i32 1 506 %x2 = extractelement <32 x i8> %0, i32 2 507 %x3 = extractelement <32 x i8> %0, i32 3 508 %x4 = extractelement <32 x i8> %0, i32 4 509 %x5 = extractelement <32 x i8> %0, i32 5 510 %x6 = extractelement <32 x i8> %0, i32 6 511 %x7 = extractelement <32 x i8> %0, i32 7 512 %x8 = extractelement <32 x i8> %0, i32 8 513 %x9 = extractelement <32 x i8> %0, i32 9 514 %x10 = extractelement <32 x i8> %0, i32 10 515 %x11 = extractelement <32 x i8> %0, i32 11 516 %x12 = extractelement <32 x i8> %0, i32 12 517 %x13 = extractelement <32 x i8> %0, i32 13 518 %x14 = extractelement <32 x i8> %0, i32 14 519 %x15 = extractelement <32 x i8> %0, i32 15 520 %x16 = extractelement <32 x i8> %0, i32 16 521 %x17 = extractelement <32 x i8> %0, i32 17 522 %x18 = extractelement <32 x i8> %0, i32 18 523 %x19 = extractelement <32 x i8> %0, i32 19 524 %x20 = extractelement <32 x i8> %0, i32 20 525 %x21 = extractelement <32 x i8> %0, i32 21 526 %x22 = extractelement <32 x i8> %0, i32 22 527 %x23 = extractelement <32 x i8> %0, i32 23 528 %x24 = extractelement <32 x i8> %0, i32 24 529 %x25 = extractelement <32 x i8> %0, i32 25 530 %x26 = extractelement <32 x i8> %0, i32 26 531 %x27 = extractelement <32 x i8> %0, i32 27 532 %x28 = extractelement <32 x i8> %0, i32 28 533 %x29 = extractelement <32 x i8> %0, i32 29 534 %x30 = extractelement <32 x i8> %0, i32 30 535 %x31 = extractelement <32 x i8> %0, i32 31 536 %trunc0 = trunc i8 %x0 to i4 537 %trunc1 = trunc i8 %x1 to i4 538 %trunc2 = trunc i8 %x2 to i4 539 %trunc3 = trunc i8 %x3 to i4 540 %trunc4 = trunc i8 %x4 to i4 541 %trunc5 = trunc i8 %x5 to i4 542 %trunc6 = trunc i8 %x6 to i4 543 %trunc7 = trunc i8 %x7 to i4 544 %trunc8 = trunc i8 %x8 to i4 545 %trunc9 = trunc i8 %x9 to i4 546 %trunc10 = trunc i8 %x10 to i4 547 %trunc11 = trunc i8 %x11 to i4 548 %trunc12 = trunc i8 %x12 to i4 549 %trunc13 = trunc i8 %x13 to i4 550 %trunc14 = trunc i8 %x14 to i4 551 %trunc15 = trunc i8 %x15 to i4 552 %trunc16 = trunc i8 %x16 to i4 553 %trunc17 = trunc i8 %x17 to i4 554 %trunc18 = trunc i8 %x18 to i4 555 %trunc19 = trunc i8 %x19 to i4 556 %trunc20 = trunc i8 %x20 to i4 557 %trunc21 = trunc i8 %x21 to i4 558 %trunc22 = trunc i8 %x22 to i4 559 %trunc23 = trunc i8 %x23 to i4 560 %trunc24 = trunc i8 %x24 to i4 561 %trunc25 = trunc i8 %x25 to i4 562 %trunc26 = trunc i8 %x26 to i4 563 %trunc27 = trunc i8 %x27 to i4 564 %trunc28 = trunc i8 %x28 to i4 565 %trunc29 = trunc i8 %x29 to i4 566 %trunc30 = trunc i8 %x30 to i4 567 %trunc31 = trunc i8 %x31 to i4 568 %ext0 = zext i4 %trunc0 to i8 569 %ext1 = zext i4 %trunc1 to i8 570 %ext2 = zext i4 %trunc2 to i8 571 %ext3 = zext i4 %trunc3 to i8 572 %ext4 = zext i4 %trunc4 to i8 573 %ext5 = zext i4 %trunc5 to i8 574 %ext6 = zext i4 %trunc6 to i8 575 %ext7 = zext i4 %trunc7 to i8 576 %ext8 = zext i4 %trunc8 to i8 577 %ext9 = zext i4 %trunc9 to i8 578 %ext10 = zext i4 %trunc10 to i8 579 %ext11 = zext i4 %trunc11 to i8 580 %ext12 = zext i4 %trunc12 to i8 581 %ext13 = zext i4 %trunc13 to i8 582 %ext14 = zext i4 %trunc14 to i8 583 %ext15 = zext i4 %trunc15 to i8 584 %ext16 = zext i4 %trunc16 to i8 585 %ext17 = zext i4 %trunc17 to i8 586 %ext18 = zext i4 %trunc18 to i8 587 %ext19 = zext i4 %trunc19 to i8 588 %ext20 = zext i4 %trunc20 to i8 589 %ext21 = zext i4 %trunc21 to i8 590 %ext22 = zext i4 %trunc22 to i8 591 %ext23 = zext i4 %trunc23 to i8 592 %ext24 = zext i4 %trunc24 to i8 593 %ext25 = zext i4 %trunc25 to i8 594 %ext26 = zext i4 %trunc26 to i8 595 %ext27 = zext i4 %trunc27 to i8 596 %ext28 = zext i4 %trunc28 to i8 597 %ext29 = zext i4 %trunc29 to i8 598 %ext30 = zext i4 %trunc30 to i8 599 %ext31 = zext i4 %trunc31 to i8 600 %v0 = insertelement <32 x i8> undef, i8 %ext0, i32 0 601 %v1 = insertelement <32 x i8> %v0, i8 %ext1, i32 1 602 %v2 = insertelement <32 x i8> %v1, i8 %ext2, i32 2 603 %v3 = insertelement <32 x i8> %v2, i8 %ext3, i32 3 604 %v4 = insertelement <32 x i8> %v3, i8 %ext4, i32 4 605 %v5 = insertelement <32 x i8> %v4, i8 %ext5, i32 5 606 %v6 = insertelement <32 x i8> %v5, i8 %ext6, i32 6 607 %v7 = insertelement <32 x i8> %v6, i8 %ext7, i32 7 608 %v8 = insertelement <32 x i8> %v7, i8 %ext8, i32 8 609 %v9 = insertelement <32 x i8> %v8, i8 %ext9, i32 9 610 %v10 = insertelement <32 x i8> %v9, i8 %ext10, i32 10 611 %v11 = insertelement <32 x i8> %v10, i8 %ext11, i32 11 612 %v12 = insertelement <32 x i8> %v11, i8 %ext12, i32 12 613 %v13 = insertelement <32 x i8> %v12, i8 %ext13, i32 13 614 %v14 = insertelement <32 x i8> %v13, i8 %ext14, i32 14 615 %v15 = insertelement <32 x i8> %v14, i8 %ext15, i32 15 616 %v16 = insertelement <32 x i8> %v15, i8 %ext16, i32 16 617 %v17 = insertelement <32 x i8> %v16, i8 %ext17, i32 17 618 %v18 = insertelement <32 x i8> %v17, i8 %ext18, i32 18 619 %v19 = insertelement <32 x i8> %v18, i8 %ext19, i32 19 620 %v20 = insertelement <32 x i8> %v19, i8 %ext20, i32 20 621 %v21 = insertelement <32 x i8> %v20, i8 %ext21, i32 21 622 %v22 = insertelement <32 x i8> %v21, i8 %ext22, i32 22 623 %v23 = insertelement <32 x i8> %v22, i8 %ext23, i32 23 624 %v24 = insertelement <32 x i8> %v23, i8 %ext24, i32 24 625 %v25 = insertelement <32 x i8> %v24, i8 %ext25, i32 25 626 %v26 = insertelement <32 x i8> %v25, i8 %ext26, i32 26 627 %v27 = insertelement <32 x i8> %v26, i8 %ext27, i32 27 628 %v28 = insertelement <32 x i8> %v27, i8 %ext28, i32 28 629 %v29 = insertelement <32 x i8> %v28, i8 %ext29, i32 29 630 %v30 = insertelement <32 x i8> %v29, i8 %ext30, i32 30 631 %v31 = insertelement <32 x i8> %v30, i8 %ext31, i32 31 632 ret <32 x i8> %v31 633} 634 635define <2 x i64> @_clearupper2xi64b(<2 x i64>) nounwind { 636; SSE-LABEL: _clearupper2xi64b: 637; SSE: # BB#0: 638; SSE-NEXT: andps {{.*}}(%rip), %xmm0 639; SSE-NEXT: retq 640; 641; AVX1-LABEL: _clearupper2xi64b: 642; AVX1: # BB#0: 643; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 644; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 645; AVX1-NEXT: retq 646; 647; AVX2-LABEL: _clearupper2xi64b: 648; AVX2: # BB#0: 649; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 650; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 651; AVX2-NEXT: retq 652 %x32 = bitcast <2 x i64> %0 to <4 x i32> 653 %r0 = insertelement <4 x i32> %x32, i32 zeroinitializer, i32 1 654 %r1 = insertelement <4 x i32> %r0, i32 zeroinitializer, i32 3 655 %r = bitcast <4 x i32> %r1 to <2 x i64> 656 ret <2 x i64> %r 657} 658 659define <4 x i64> @_clearupper4xi64b(<4 x i64>) nounwind { 660; SSE-LABEL: _clearupper4xi64b: 661; SSE: # BB#0: 662; SSE-NEXT: movaps {{.*#+}} xmm2 = [4294967295,0,4294967295,0] 663; SSE-NEXT: andps %xmm2, %xmm0 664; SSE-NEXT: andps %xmm2, %xmm1 665; SSE-NEXT: retq 666; 667; AVX1-LABEL: _clearupper4xi64b: 668; AVX1: # BB#0: 669; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1 670; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 671; AVX1-NEXT: retq 672; 673; AVX2-LABEL: _clearupper4xi64b: 674; AVX2: # BB#0: 675; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 676; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 677; AVX2-NEXT: retq 678 %x32 = bitcast <4 x i64> %0 to <8 x i32> 679 %r0 = insertelement <8 x i32> %x32, i32 zeroinitializer, i32 1 680 %r1 = insertelement <8 x i32> %r0, i32 zeroinitializer, i32 3 681 %r2 = insertelement <8 x i32> %r1, i32 zeroinitializer, i32 5 682 %r3 = insertelement <8 x i32> %r2, i32 zeroinitializer, i32 7 683 %r = bitcast <8 x i32> %r3 to <4 x i64> 684 ret <4 x i64> %r 685} 686 687define <4 x i32> @_clearupper4xi32b(<4 x i32>) nounwind { 688; SSE-LABEL: _clearupper4xi32b: 689; SSE: # BB#0: 690; SSE-NEXT: andps {{.*}}(%rip), %xmm0 691; SSE-NEXT: retq 692; 693; AVX-LABEL: _clearupper4xi32b: 694; AVX: # BB#0: 695; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 696; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 697; AVX-NEXT: retq 698 %x16 = bitcast <4 x i32> %0 to <8 x i16> 699 %r0 = insertelement <8 x i16> %x16, i16 zeroinitializer, i32 1 700 %r1 = insertelement <8 x i16> %r0, i16 zeroinitializer, i32 3 701 %r2 = insertelement <8 x i16> %r1, i16 zeroinitializer, i32 5 702 %r3 = insertelement <8 x i16> %r2, i16 zeroinitializer, i32 7 703 %r = bitcast <8 x i16> %r3 to <4 x i32> 704 ret <4 x i32> %r 705} 706 707define <8 x i32> @_clearupper8xi32b(<8 x i32>) nounwind { 708; SSE-LABEL: _clearupper8xi32b: 709; SSE: # BB#0: 710; SSE-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0] 711; SSE-NEXT: andps %xmm2, %xmm0 712; SSE-NEXT: andps %xmm2, %xmm1 713; SSE-NEXT: retq 714; 715; AVX1-LABEL: _clearupper8xi32b: 716; AVX1: # BB#0: 717; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 718; AVX1-NEXT: retq 719; 720; AVX2-LABEL: _clearupper8xi32b: 721; AVX2: # BB#0: 722; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 723; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 724; AVX2-NEXT: retq 725 %x16 = bitcast <8 x i32> %0 to <16 x i16> 726 %r0 = insertelement <16 x i16> %x16, i16 zeroinitializer, i32 1 727 %r1 = insertelement <16 x i16> %r0, i16 zeroinitializer, i32 3 728 %r2 = insertelement <16 x i16> %r1, i16 zeroinitializer, i32 5 729 %r3 = insertelement <16 x i16> %r2, i16 zeroinitializer, i32 7 730 %r4 = insertelement <16 x i16> %r3, i16 zeroinitializer, i32 9 731 %r5 = insertelement <16 x i16> %r4, i16 zeroinitializer, i32 11 732 %r6 = insertelement <16 x i16> %r5, i16 zeroinitializer, i32 13 733 %r7 = insertelement <16 x i16> %r6, i16 zeroinitializer, i32 15 734 %r = bitcast <16 x i16> %r7 to <8 x i32> 735 ret <8 x i32> %r 736} 737 738define <8 x i16> @_clearupper8xi16b(<8 x i16>) nounwind { 739; SSE-LABEL: _clearupper8xi16b: 740; SSE: # BB#0: 741; SSE-NEXT: andps {{.*}}(%rip), %xmm0 742; SSE-NEXT: retq 743; 744; AVX-LABEL: _clearupper8xi16b: 745; AVX: # BB#0: 746; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 747; AVX-NEXT: retq 748 %x8 = bitcast <8 x i16> %0 to <16 x i8> 749 %r0 = insertelement <16 x i8> %x8, i8 zeroinitializer, i32 1 750 %r1 = insertelement <16 x i8> %r0, i8 zeroinitializer, i32 3 751 %r2 = insertelement <16 x i8> %r1, i8 zeroinitializer, i32 5 752 %r3 = insertelement <16 x i8> %r2, i8 zeroinitializer, i32 7 753 %r4 = insertelement <16 x i8> %r3, i8 zeroinitializer, i32 9 754 %r5 = insertelement <16 x i8> %r4, i8 zeroinitializer, i32 11 755 %r6 = insertelement <16 x i8> %r5, i8 zeroinitializer, i32 13 756 %r7 = insertelement <16 x i8> %r6, i8 zeroinitializer, i32 15 757 %r = bitcast <16 x i8> %r7 to <8 x i16> 758 ret <8 x i16> %r 759} 760 761define <16 x i16> @_clearupper16xi16b(<16 x i16>) nounwind { 762; SSE-LABEL: _clearupper16xi16b: 763; SSE: # BB#0: 764; SSE-NEXT: movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 765; SSE-NEXT: andps %xmm2, %xmm0 766; SSE-NEXT: andps %xmm2, %xmm1 767; SSE-NEXT: retq 768; 769; AVX1-LABEL: _clearupper16xi16b: 770; AVX1: # BB#0: 771; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 772; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm2 773; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm0[4,5,6,7] 774; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 775; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 776; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 777; AVX1-NEXT: retq 778; 779; AVX2-LABEL: _clearupper16xi16b: 780; AVX2: # BB#0: 781; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 782; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 783; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm0[4,5,6,7] 784; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 785; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 786; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0 787; AVX2-NEXT: retq 788 %x8 = bitcast <16 x i16> %0 to <32 x i8> 789 %r0 = insertelement <32 x i8> %x8, i8 zeroinitializer, i32 1 790 %r1 = insertelement <32 x i8> %r0, i8 zeroinitializer, i32 3 791 %r2 = insertelement <32 x i8> %r1, i8 zeroinitializer, i32 5 792 %r3 = insertelement <32 x i8> %r2, i8 zeroinitializer, i32 7 793 %r4 = insertelement <32 x i8> %r3, i8 zeroinitializer, i32 9 794 %r5 = insertelement <32 x i8> %r4, i8 zeroinitializer, i32 11 795 %r6 = insertelement <32 x i8> %r5, i8 zeroinitializer, i32 13 796 %r7 = insertelement <32 x i8> %r6, i8 zeroinitializer, i32 15 797 %r8 = insertelement <32 x i8> %r7, i8 zeroinitializer, i32 17 798 %r9 = insertelement <32 x i8> %r8, i8 zeroinitializer, i32 19 799 %r10 = insertelement <32 x i8> %r9, i8 zeroinitializer, i32 21 800 %r11 = insertelement <32 x i8> %r10, i8 zeroinitializer, i32 23 801 %r12 = insertelement <32 x i8> %r11, i8 zeroinitializer, i32 25 802 %r13 = insertelement <32 x i8> %r12, i8 zeroinitializer, i32 27 803 %r14 = insertelement <32 x i8> %r13, i8 zeroinitializer, i32 29 804 %r15 = insertelement <32 x i8> %r14, i8 zeroinitializer, i32 31 805 %r = bitcast <32 x i8> %r15 to <16 x i16> 806 ret <16 x i16> %r 807} 808 809define <16 x i8> @_clearupper16xi8b(<16 x i8>) nounwind { 810; SSE-LABEL: _clearupper16xi8b: 811; SSE: # BB#0: 812; SSE-NEXT: pushq %r14 813; SSE-NEXT: pushq %rbx 814; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 815; SSE-NEXT: movq %xmm0, %rcx 816; SSE-NEXT: movq %rcx, %r8 817; SSE-NEXT: movq %rcx, %r9 818; SSE-NEXT: movq %rcx, %r10 819; SSE-NEXT: movq %rcx, %rax 820; SSE-NEXT: movq %rcx, %rdx 821; SSE-NEXT: movq %rcx, %rsi 822; SSE-NEXT: movq %rcx, %rdi 823; SSE-NEXT: andb $15, %cl 824; SSE-NEXT: movb %cl, -{{[0-9]+}}(%rsp) 825; SSE-NEXT: movq %xmm1, %rcx 826; SSE-NEXT: shrq $56, %rdi 827; SSE-NEXT: andb $15, %dil 828; SSE-NEXT: movb %dil, -{{[0-9]+}}(%rsp) 829; SSE-NEXT: movq %rcx, %r11 830; SSE-NEXT: shrq $48, %rsi 831; SSE-NEXT: andb $15, %sil 832; SSE-NEXT: movb %sil, -{{[0-9]+}}(%rsp) 833; SSE-NEXT: movq %rcx, %r14 834; SSE-NEXT: shrq $40, %rdx 835; SSE-NEXT: andb $15, %dl 836; SSE-NEXT: movb %dl, -{{[0-9]+}}(%rsp) 837; SSE-NEXT: movq %rcx, %rdx 838; SSE-NEXT: shrq $32, %rax 839; SSE-NEXT: andb $15, %al 840; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) 841; SSE-NEXT: movq %rcx, %rax 842; SSE-NEXT: shrq $24, %r10 843; SSE-NEXT: andb $15, %r10b 844; SSE-NEXT: movb %r10b, -{{[0-9]+}}(%rsp) 845; SSE-NEXT: movq %rcx, %rdi 846; SSE-NEXT: shrq $16, %r9 847; SSE-NEXT: andb $15, %r9b 848; SSE-NEXT: movb %r9b, -{{[0-9]+}}(%rsp) 849; SSE-NEXT: movq %rcx, %rsi 850; SSE-NEXT: shrq $8, %r8 851; SSE-NEXT: andb $15, %r8b 852; SSE-NEXT: movb %r8b, -{{[0-9]+}}(%rsp) 853; SSE-NEXT: movq %rcx, %rbx 854; SSE-NEXT: movb $0, -{{[0-9]+}}(%rsp) 855; SSE-NEXT: andb $15, %cl 856; SSE-NEXT: movb %cl, -{{[0-9]+}}(%rsp) 857; SSE-NEXT: shrq $56, %rbx 858; SSE-NEXT: andb $15, %bl 859; SSE-NEXT: movb %bl, -{{[0-9]+}}(%rsp) 860; SSE-NEXT: shrq $48, %rsi 861; SSE-NEXT: andb $15, %sil 862; SSE-NEXT: movb %sil, -{{[0-9]+}}(%rsp) 863; SSE-NEXT: shrq $40, %rdi 864; SSE-NEXT: andb $15, %dil 865; SSE-NEXT: movb %dil, -{{[0-9]+}}(%rsp) 866; SSE-NEXT: shrq $32, %rax 867; SSE-NEXT: andb $15, %al 868; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) 869; SSE-NEXT: shrq $24, %rdx 870; SSE-NEXT: andb $15, %dl 871; SSE-NEXT: movb %dl, -{{[0-9]+}}(%rsp) 872; SSE-NEXT: shrq $16, %r14 873; SSE-NEXT: andb $15, %r14b 874; SSE-NEXT: movb %r14b, -{{[0-9]+}}(%rsp) 875; SSE-NEXT: shrq $8, %r11 876; SSE-NEXT: andb $15, %r11b 877; SSE-NEXT: movb %r11b, -{{[0-9]+}}(%rsp) 878; SSE-NEXT: movb $0, -{{[0-9]+}}(%rsp) 879; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 880; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 881; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 882; SSE-NEXT: popq %rbx 883; SSE-NEXT: popq %r14 884; SSE-NEXT: retq 885; 886; AVX-LABEL: _clearupper16xi8b: 887; AVX: # BB#0: 888; AVX-NEXT: pushq %rbp 889; AVX-NEXT: pushq %r15 890; AVX-NEXT: pushq %r14 891; AVX-NEXT: pushq %r13 892; AVX-NEXT: pushq %r12 893; AVX-NEXT: pushq %rbx 894; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 895; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 896; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rdx 897; AVX-NEXT: movq %rcx, %r8 898; AVX-NEXT: movq %rcx, %r9 899; AVX-NEXT: movq %rcx, %r10 900; AVX-NEXT: movq %rcx, %r11 901; AVX-NEXT: movq %rcx, %r14 902; AVX-NEXT: movq %rcx, %r15 903; AVX-NEXT: movq %rdx, %r12 904; AVX-NEXT: movq %rdx, %r13 905; AVX-NEXT: movq %rdx, %rdi 906; AVX-NEXT: movq %rdx, %rax 907; AVX-NEXT: movq %rdx, %rsi 908; AVX-NEXT: movq %rdx, %rbx 909; AVX-NEXT: movq %rdx, %rbp 910; AVX-NEXT: andb $15, %dl 911; AVX-NEXT: movb %dl, -{{[0-9]+}}(%rsp) 912; AVX-NEXT: movq %rcx, %rdx 913; AVX-NEXT: andb $15, %cl 914; AVX-NEXT: movb %cl, -{{[0-9]+}}(%rsp) 915; AVX-NEXT: shrq $56, %rbp 916; AVX-NEXT: andb $15, %bpl 917; AVX-NEXT: movb %bpl, -{{[0-9]+}}(%rsp) 918; AVX-NEXT: shrq $48, %rbx 919; AVX-NEXT: andb $15, %bl 920; AVX-NEXT: movb %bl, -{{[0-9]+}}(%rsp) 921; AVX-NEXT: shrq $40, %rsi 922; AVX-NEXT: andb $15, %sil 923; AVX-NEXT: movb %sil, -{{[0-9]+}}(%rsp) 924; AVX-NEXT: shrq $32, %rax 925; AVX-NEXT: andb $15, %al 926; AVX-NEXT: movb %al, -{{[0-9]+}}(%rsp) 927; AVX-NEXT: shrq $24, %rdi 928; AVX-NEXT: andb $15, %dil 929; AVX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) 930; AVX-NEXT: shrq $16, %r13 931; AVX-NEXT: andb $15, %r13b 932; AVX-NEXT: movb %r13b, -{{[0-9]+}}(%rsp) 933; AVX-NEXT: shrq $8, %r12 934; AVX-NEXT: andb $15, %r12b 935; AVX-NEXT: movb %r12b, -{{[0-9]+}}(%rsp) 936; AVX-NEXT: shrq $56, %rdx 937; AVX-NEXT: andb $15, %dl 938; AVX-NEXT: movb %dl, -{{[0-9]+}}(%rsp) 939; AVX-NEXT: shrq $48, %r15 940; AVX-NEXT: andb $15, %r15b 941; AVX-NEXT: movb %r15b, -{{[0-9]+}}(%rsp) 942; AVX-NEXT: shrq $40, %r14 943; AVX-NEXT: andb $15, %r14b 944; AVX-NEXT: movb %r14b, -{{[0-9]+}}(%rsp) 945; AVX-NEXT: shrq $32, %r11 946; AVX-NEXT: andb $15, %r11b 947; AVX-NEXT: movb %r11b, -{{[0-9]+}}(%rsp) 948; AVX-NEXT: shrq $24, %r10 949; AVX-NEXT: andb $15, %r10b 950; AVX-NEXT: movb %r10b, -{{[0-9]+}}(%rsp) 951; AVX-NEXT: shrq $16, %r9 952; AVX-NEXT: andb $15, %r9b 953; AVX-NEXT: movb %r9b, -{{[0-9]+}}(%rsp) 954; AVX-NEXT: shrq $8, %r8 955; AVX-NEXT: andb $15, %r8b 956; AVX-NEXT: movb %r8b, -{{[0-9]+}}(%rsp) 957; AVX-NEXT: movb $0, -{{[0-9]+}}(%rsp) 958; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 959; AVX-NEXT: popq %rbx 960; AVX-NEXT: popq %r12 961; AVX-NEXT: popq %r13 962; AVX-NEXT: popq %r14 963; AVX-NEXT: popq %r15 964; AVX-NEXT: popq %rbp 965; AVX-NEXT: retq 966 %x4 = bitcast <16 x i8> %0 to <32 x i4> 967 %r0 = insertelement <32 x i4> %x4, i4 zeroinitializer, i32 1 968 %r1 = insertelement <32 x i4> %r0, i4 zeroinitializer, i32 3 969 %r2 = insertelement <32 x i4> %r1, i4 zeroinitializer, i32 5 970 %r3 = insertelement <32 x i4> %r2, i4 zeroinitializer, i32 7 971 %r4 = insertelement <32 x i4> %r3, i4 zeroinitializer, i32 9 972 %r5 = insertelement <32 x i4> %r4, i4 zeroinitializer, i32 11 973 %r6 = insertelement <32 x i4> %r5, i4 zeroinitializer, i32 13 974 %r7 = insertelement <32 x i4> %r6, i4 zeroinitializer, i32 15 975 %r8 = insertelement <32 x i4> %r7, i4 zeroinitializer, i32 17 976 %r9 = insertelement <32 x i4> %r8, i4 zeroinitializer, i32 19 977 %r10 = insertelement <32 x i4> %r9, i4 zeroinitializer, i32 21 978 %r11 = insertelement <32 x i4> %r10, i4 zeroinitializer, i32 23 979 %r12 = insertelement <32 x i4> %r11, i4 zeroinitializer, i32 25 980 %r13 = insertelement <32 x i4> %r12, i4 zeroinitializer, i32 27 981 %r14 = insertelement <32 x i4> %r13, i4 zeroinitializer, i32 29 982 %r15 = insertelement <32 x i4> %r14, i4 zeroinitializer, i32 31 983 %r = bitcast <32 x i4> %r15 to <16 x i8> 984 ret <16 x i8> %r 985} 986 987define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind { 988; SSE-LABEL: _clearupper32xi8b: 989; SSE: # BB#0: 990; SSE-NEXT: pushq %r14 991; SSE-NEXT: pushq %rbx 992; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 993; SSE-NEXT: movq %xmm0, %rcx 994; SSE-NEXT: movq %rcx, %r8 995; SSE-NEXT: movq %rcx, %r9 996; SSE-NEXT: movq %rcx, %r10 997; SSE-NEXT: movq %rcx, %rax 998; SSE-NEXT: movq %rcx, %rdx 999; SSE-NEXT: movq %rcx, %rsi 1000; SSE-NEXT: movq %rcx, %rdi 1001; SSE-NEXT: andb $15, %cl 1002; SSE-NEXT: movb %cl, -{{[0-9]+}}(%rsp) 1003; SSE-NEXT: movq %xmm2, %rcx 1004; SSE-NEXT: shrq $56, %rdi 1005; SSE-NEXT: andb $15, %dil 1006; SSE-NEXT: movb %dil, -{{[0-9]+}}(%rsp) 1007; SSE-NEXT: movq %rcx, %r11 1008; SSE-NEXT: shrq $48, %rsi 1009; SSE-NEXT: andb $15, %sil 1010; SSE-NEXT: movb %sil, -{{[0-9]+}}(%rsp) 1011; SSE-NEXT: movq %rcx, %r14 1012; SSE-NEXT: shrq $40, %rdx 1013; SSE-NEXT: andb $15, %dl 1014; SSE-NEXT: movb %dl, -{{[0-9]+}}(%rsp) 1015; SSE-NEXT: movq %rcx, %rdx 1016; SSE-NEXT: shrq $32, %rax 1017; SSE-NEXT: andb $15, %al 1018; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) 1019; SSE-NEXT: movq %rcx, %rax 1020; SSE-NEXT: shrq $24, %r10 1021; SSE-NEXT: andb $15, %r10b 1022; SSE-NEXT: movb %r10b, -{{[0-9]+}}(%rsp) 1023; SSE-NEXT: movq %rcx, %rdi 1024; SSE-NEXT: shrq $16, %r9 1025; SSE-NEXT: andb $15, %r9b 1026; SSE-NEXT: movb %r9b, -{{[0-9]+}}(%rsp) 1027; SSE-NEXT: movq %rcx, %rsi 1028; SSE-NEXT: shrq $8, %r8 1029; SSE-NEXT: andb $15, %r8b 1030; SSE-NEXT: movb %r8b, -{{[0-9]+}}(%rsp) 1031; SSE-NEXT: movq %rcx, %rbx 1032; SSE-NEXT: movb $0, -{{[0-9]+}}(%rsp) 1033; SSE-NEXT: andb $15, %cl 1034; SSE-NEXT: movb %cl, -{{[0-9]+}}(%rsp) 1035; SSE-NEXT: shrq $56, %rbx 1036; SSE-NEXT: andb $15, %bl 1037; SSE-NEXT: movb %bl, -{{[0-9]+}}(%rsp) 1038; SSE-NEXT: shrq $48, %rsi 1039; SSE-NEXT: andb $15, %sil 1040; SSE-NEXT: movb %sil, -{{[0-9]+}}(%rsp) 1041; SSE-NEXT: shrq $40, %rdi 1042; SSE-NEXT: andb $15, %dil 1043; SSE-NEXT: movb %dil, -{{[0-9]+}}(%rsp) 1044; SSE-NEXT: shrq $32, %rax 1045; SSE-NEXT: andb $15, %al 1046; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) 1047; SSE-NEXT: shrq $24, %rdx 1048; SSE-NEXT: andb $15, %dl 1049; SSE-NEXT: movb %dl, -{{[0-9]+}}(%rsp) 1050; SSE-NEXT: shrq $16, %r14 1051; SSE-NEXT: andb $15, %r14b 1052; SSE-NEXT: movb %r14b, -{{[0-9]+}}(%rsp) 1053; SSE-NEXT: shrq $8, %r11 1054; SSE-NEXT: andb $15, %r11b 1055; SSE-NEXT: movb %r11b, -{{[0-9]+}}(%rsp) 1056; SSE-NEXT: movb $0, -{{[0-9]+}}(%rsp) 1057; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1058; SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero 1059; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 1060; SSE-NEXT: popq %rbx 1061; SSE-NEXT: popq %r14 1062; SSE-NEXT: retq 1063; 1064; AVX1-LABEL: _clearupper32xi8b: 1065; AVX1: # BB#0: 1066; AVX1-NEXT: pushq %r14 1067; AVX1-NEXT: pushq %rbx 1068; AVX1-NEXT: vpextrq $1, %xmm0, -{{[0-9]+}}(%rsp) 1069; AVX1-NEXT: vmovq %xmm0, -{{[0-9]+}}(%rsp) 1070; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %r14 1071; AVX1-NEXT: vpextrq $1, %xmm0, %rdx 1072; AVX1-NEXT: movq %rdx, %r8 1073; AVX1-NEXT: movq %rdx, %r9 1074; AVX1-NEXT: movq %rdx, %r11 1075; AVX1-NEXT: movq %rdx, %rsi 1076; AVX1-NEXT: movq %rdx, %rdi 1077; AVX1-NEXT: movq %rdx, %rcx 1078; AVX1-NEXT: movq %rdx, %rax 1079; AVX1-NEXT: andb $15, %dl 1080; AVX1-NEXT: movb %dl, -{{[0-9]+}}(%rsp) 1081; AVX1-NEXT: shrq $56, %rax 1082; AVX1-NEXT: andb $15, %al 1083; AVX1-NEXT: movb %al, -{{[0-9]+}}(%rsp) 1084; AVX1-NEXT: movq %r14, %r10 1085; AVX1-NEXT: shrq $48, %rcx 1086; AVX1-NEXT: andb $15, %cl 1087; AVX1-NEXT: movb %cl, -{{[0-9]+}}(%rsp) 1088; AVX1-NEXT: movq %r14, %rdx 1089; AVX1-NEXT: shrq $40, %rdi 1090; AVX1-NEXT: andb $15, %dil 1091; AVX1-NEXT: movb %dil, -{{[0-9]+}}(%rsp) 1092; AVX1-NEXT: movq %r14, %rax 1093; AVX1-NEXT: shrq $32, %rsi 1094; AVX1-NEXT: andb $15, %sil 1095; AVX1-NEXT: movb %sil, -{{[0-9]+}}(%rsp) 1096; AVX1-NEXT: movq %r14, %rcx 1097; AVX1-NEXT: shrq $24, %r11 1098; AVX1-NEXT: andb $15, %r11b 1099; AVX1-NEXT: movb %r11b, -{{[0-9]+}}(%rsp) 1100; AVX1-NEXT: movq %r14, %rsi 1101; AVX1-NEXT: shrq $16, %r9 1102; AVX1-NEXT: andb $15, %r9b 1103; AVX1-NEXT: movb %r9b, -{{[0-9]+}}(%rsp) 1104; AVX1-NEXT: movq %r14, %rdi 1105; AVX1-NEXT: shrq $8, %r8 1106; AVX1-NEXT: andb $15, %r8b 1107; AVX1-NEXT: movb %r8b, -{{[0-9]+}}(%rsp) 1108; AVX1-NEXT: movq %r14, %rbx 1109; AVX1-NEXT: andb $15, %r14b 1110; AVX1-NEXT: movb %r14b, -{{[0-9]+}}(%rsp) 1111; AVX1-NEXT: shrq $8, %r10 1112; AVX1-NEXT: shrq $16, %rdx 1113; AVX1-NEXT: shrq $24, %rax 1114; AVX1-NEXT: shrq $32, %rcx 1115; AVX1-NEXT: shrq $40, %rsi 1116; AVX1-NEXT: shrq $48, %rdi 1117; AVX1-NEXT: shrq $56, %rbx 1118; AVX1-NEXT: andb $15, %bl 1119; AVX1-NEXT: movb %bl, -{{[0-9]+}}(%rsp) 1120; AVX1-NEXT: andb $15, %dil 1121; AVX1-NEXT: movb %dil, -{{[0-9]+}}(%rsp) 1122; AVX1-NEXT: andb $15, %sil 1123; AVX1-NEXT: movb %sil, -{{[0-9]+}}(%rsp) 1124; AVX1-NEXT: andb $15, %cl 1125; AVX1-NEXT: movb %cl, -{{[0-9]+}}(%rsp) 1126; AVX1-NEXT: andb $15, %al 1127; AVX1-NEXT: movb %al, -{{[0-9]+}}(%rsp) 1128; AVX1-NEXT: andb $15, %dl 1129; AVX1-NEXT: movb %dl, -{{[0-9]+}}(%rsp) 1130; AVX1-NEXT: andb $15, %r10b 1131; AVX1-NEXT: movb %r10b, -{{[0-9]+}}(%rsp) 1132; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1133; AVX1-NEXT: vmovq %xmm0, %rax 1134; AVX1-NEXT: movq %rax, %r8 1135; AVX1-NEXT: movq %rax, %rdx 1136; AVX1-NEXT: movq %rax, %rsi 1137; AVX1-NEXT: movq %rax, %rdi 1138; AVX1-NEXT: movl %eax, %ebx 1139; AVX1-NEXT: movl %eax, %ecx 1140; AVX1-NEXT: vmovd %eax, %xmm1 1141; AVX1-NEXT: shrl $8, %eax 1142; AVX1-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 1143; AVX1-NEXT: shrl $16, %ecx 1144; AVX1-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 1145; AVX1-NEXT: shrl $24, %ebx 1146; AVX1-NEXT: vpinsrb $3, %ebx, %xmm1, %xmm1 1147; AVX1-NEXT: shrq $32, %rdi 1148; AVX1-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 1149; AVX1-NEXT: shrq $40, %rsi 1150; AVX1-NEXT: vpinsrb $5, %esi, %xmm1, %xmm1 1151; AVX1-NEXT: movb $0, -{{[0-9]+}}(%rsp) 1152; AVX1-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm2 1153; AVX1-NEXT: shrq $48, %rdx 1154; AVX1-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1 1155; AVX1-NEXT: vpextrq $1, %xmm0, %rax 1156; AVX1-NEXT: shrq $56, %r8 1157; AVX1-NEXT: vpinsrb $7, %r8d, %xmm1, %xmm0 1158; AVX1-NEXT: movl %eax, %ecx 1159; AVX1-NEXT: shrl $8, %ecx 1160; AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 1161; AVX1-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 1162; AVX1-NEXT: movl %eax, %ecx 1163; AVX1-NEXT: shrl $16, %ecx 1164; AVX1-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 1165; AVX1-NEXT: movl %eax, %ecx 1166; AVX1-NEXT: shrl $24, %ecx 1167; AVX1-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 1168; AVX1-NEXT: movq %rax, %rcx 1169; AVX1-NEXT: shrq $32, %rcx 1170; AVX1-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 1171; AVX1-NEXT: movq %rax, %rcx 1172; AVX1-NEXT: shrq $40, %rcx 1173; AVX1-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 1174; AVX1-NEXT: movq %rax, %rcx 1175; AVX1-NEXT: shrq $48, %rcx 1176; AVX1-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 1177; AVX1-NEXT: vmovq %xmm2, %rcx 1178; AVX1-NEXT: shrq $56, %rax 1179; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 1180; AVX1-NEXT: movl %ecx, %eax 1181; AVX1-NEXT: shrl $8, %eax 1182; AVX1-NEXT: vmovd %ecx, %xmm1 1183; AVX1-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 1184; AVX1-NEXT: movl %ecx, %eax 1185; AVX1-NEXT: shrl $16, %eax 1186; AVX1-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 1187; AVX1-NEXT: movl %ecx, %eax 1188; AVX1-NEXT: shrl $24, %eax 1189; AVX1-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 1190; AVX1-NEXT: movq %rcx, %rax 1191; AVX1-NEXT: shrq $32, %rax 1192; AVX1-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 1193; AVX1-NEXT: movq %rcx, %rax 1194; AVX1-NEXT: shrq $40, %rax 1195; AVX1-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 1196; AVX1-NEXT: movq %rcx, %rax 1197; AVX1-NEXT: shrq $48, %rax 1198; AVX1-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 1199; AVX1-NEXT: vpextrq $1, %xmm2, %rax 1200; AVX1-NEXT: shrq $56, %rcx 1201; AVX1-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1 1202; AVX1-NEXT: movl %eax, %ecx 1203; AVX1-NEXT: shrl $8, %ecx 1204; AVX1-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 1205; AVX1-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1 1206; AVX1-NEXT: movl %eax, %ecx 1207; AVX1-NEXT: shrl $16, %ecx 1208; AVX1-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1 1209; AVX1-NEXT: movl %eax, %ecx 1210; AVX1-NEXT: shrl $24, %ecx 1211; AVX1-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1 1212; AVX1-NEXT: movq %rax, %rcx 1213; AVX1-NEXT: shrq $32, %rcx 1214; AVX1-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1 1215; AVX1-NEXT: movq %rax, %rcx 1216; AVX1-NEXT: shrq $40, %rcx 1217; AVX1-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1 1218; AVX1-NEXT: movq %rax, %rcx 1219; AVX1-NEXT: shrq $48, %rcx 1220; AVX1-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1 1221; AVX1-NEXT: shrq $56, %rax 1222; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 1223; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1224; AVX1-NEXT: popq %rbx 1225; AVX1-NEXT: popq %r14 1226; AVX1-NEXT: retq 1227; 1228; AVX2-LABEL: _clearupper32xi8b: 1229; AVX2: # BB#0: 1230; AVX2-NEXT: pushq %r14 1231; AVX2-NEXT: pushq %rbx 1232; AVX2-NEXT: vpextrq $1, %xmm0, -{{[0-9]+}}(%rsp) 1233; AVX2-NEXT: vmovq %xmm0, -{{[0-9]+}}(%rsp) 1234; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %r14 1235; AVX2-NEXT: vpextrq $1, %xmm0, %rdx 1236; AVX2-NEXT: movq %rdx, %r8 1237; AVX2-NEXT: movq %rdx, %r9 1238; AVX2-NEXT: movq %rdx, %r11 1239; AVX2-NEXT: movq %rdx, %rsi 1240; AVX2-NEXT: movq %rdx, %rdi 1241; AVX2-NEXT: movq %rdx, %rcx 1242; AVX2-NEXT: movq %rdx, %rax 1243; AVX2-NEXT: andb $15, %dl 1244; AVX2-NEXT: movb %dl, -{{[0-9]+}}(%rsp) 1245; AVX2-NEXT: shrq $56, %rax 1246; AVX2-NEXT: andb $15, %al 1247; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp) 1248; AVX2-NEXT: movq %r14, %r10 1249; AVX2-NEXT: shrq $48, %rcx 1250; AVX2-NEXT: andb $15, %cl 1251; AVX2-NEXT: movb %cl, -{{[0-9]+}}(%rsp) 1252; AVX2-NEXT: movq %r14, %rdx 1253; AVX2-NEXT: shrq $40, %rdi 1254; AVX2-NEXT: andb $15, %dil 1255; AVX2-NEXT: movb %dil, -{{[0-9]+}}(%rsp) 1256; AVX2-NEXT: movq %r14, %rax 1257; AVX2-NEXT: shrq $32, %rsi 1258; AVX2-NEXT: andb $15, %sil 1259; AVX2-NEXT: movb %sil, -{{[0-9]+}}(%rsp) 1260; AVX2-NEXT: movq %r14, %rcx 1261; AVX2-NEXT: shrq $24, %r11 1262; AVX2-NEXT: andb $15, %r11b 1263; AVX2-NEXT: movb %r11b, -{{[0-9]+}}(%rsp) 1264; AVX2-NEXT: movq %r14, %rsi 1265; AVX2-NEXT: shrq $16, %r9 1266; AVX2-NEXT: andb $15, %r9b 1267; AVX2-NEXT: movb %r9b, -{{[0-9]+}}(%rsp) 1268; AVX2-NEXT: movq %r14, %rdi 1269; AVX2-NEXT: shrq $8, %r8 1270; AVX2-NEXT: andb $15, %r8b 1271; AVX2-NEXT: movb %r8b, -{{[0-9]+}}(%rsp) 1272; AVX2-NEXT: movq %r14, %rbx 1273; AVX2-NEXT: andb $15, %r14b 1274; AVX2-NEXT: movb %r14b, -{{[0-9]+}}(%rsp) 1275; AVX2-NEXT: shrq $8, %r10 1276; AVX2-NEXT: shrq $16, %rdx 1277; AVX2-NEXT: shrq $24, %rax 1278; AVX2-NEXT: shrq $32, %rcx 1279; AVX2-NEXT: shrq $40, %rsi 1280; AVX2-NEXT: shrq $48, %rdi 1281; AVX2-NEXT: shrq $56, %rbx 1282; AVX2-NEXT: andb $15, %bl 1283; AVX2-NEXT: movb %bl, -{{[0-9]+}}(%rsp) 1284; AVX2-NEXT: andb $15, %dil 1285; AVX2-NEXT: movb %dil, -{{[0-9]+}}(%rsp) 1286; AVX2-NEXT: andb $15, %sil 1287; AVX2-NEXT: movb %sil, -{{[0-9]+}}(%rsp) 1288; AVX2-NEXT: andb $15, %cl 1289; AVX2-NEXT: movb %cl, -{{[0-9]+}}(%rsp) 1290; AVX2-NEXT: andb $15, %al 1291; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp) 1292; AVX2-NEXT: andb $15, %dl 1293; AVX2-NEXT: movb %dl, -{{[0-9]+}}(%rsp) 1294; AVX2-NEXT: andb $15, %r10b 1295; AVX2-NEXT: movb %r10b, -{{[0-9]+}}(%rsp) 1296; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1297; AVX2-NEXT: vmovq %xmm0, %rax 1298; AVX2-NEXT: movq %rax, %r8 1299; AVX2-NEXT: movq %rax, %rdx 1300; AVX2-NEXT: movq %rax, %rsi 1301; AVX2-NEXT: movq %rax, %rdi 1302; AVX2-NEXT: movl %eax, %ebx 1303; AVX2-NEXT: movl %eax, %ecx 1304; AVX2-NEXT: vmovd %eax, %xmm1 1305; AVX2-NEXT: shrl $8, %eax 1306; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 1307; AVX2-NEXT: shrl $16, %ecx 1308; AVX2-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 1309; AVX2-NEXT: shrl $24, %ebx 1310; AVX2-NEXT: vpinsrb $3, %ebx, %xmm1, %xmm1 1311; AVX2-NEXT: shrq $32, %rdi 1312; AVX2-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 1313; AVX2-NEXT: shrq $40, %rsi 1314; AVX2-NEXT: vpinsrb $5, %esi, %xmm1, %xmm1 1315; AVX2-NEXT: movb $0, -{{[0-9]+}}(%rsp) 1316; AVX2-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm2 1317; AVX2-NEXT: shrq $48, %rdx 1318; AVX2-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1 1319; AVX2-NEXT: vpextrq $1, %xmm0, %rax 1320; AVX2-NEXT: shrq $56, %r8 1321; AVX2-NEXT: vpinsrb $7, %r8d, %xmm1, %xmm0 1322; AVX2-NEXT: movl %eax, %ecx 1323; AVX2-NEXT: shrl $8, %ecx 1324; AVX2-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 1325; AVX2-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 1326; AVX2-NEXT: movl %eax, %ecx 1327; AVX2-NEXT: shrl $16, %ecx 1328; AVX2-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 1329; AVX2-NEXT: movl %eax, %ecx 1330; AVX2-NEXT: shrl $24, %ecx 1331; AVX2-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 1332; AVX2-NEXT: movq %rax, %rcx 1333; AVX2-NEXT: shrq $32, %rcx 1334; AVX2-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 1335; AVX2-NEXT: movq %rax, %rcx 1336; AVX2-NEXT: shrq $40, %rcx 1337; AVX2-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 1338; AVX2-NEXT: movq %rax, %rcx 1339; AVX2-NEXT: shrq $48, %rcx 1340; AVX2-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 1341; AVX2-NEXT: vmovq %xmm2, %rcx 1342; AVX2-NEXT: shrq $56, %rax 1343; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 1344; AVX2-NEXT: movl %ecx, %eax 1345; AVX2-NEXT: shrl $8, %eax 1346; AVX2-NEXT: vmovd %ecx, %xmm1 1347; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 1348; AVX2-NEXT: movl %ecx, %eax 1349; AVX2-NEXT: shrl $16, %eax 1350; AVX2-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 1351; AVX2-NEXT: movl %ecx, %eax 1352; AVX2-NEXT: shrl $24, %eax 1353; AVX2-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 1354; AVX2-NEXT: movq %rcx, %rax 1355; AVX2-NEXT: shrq $32, %rax 1356; AVX2-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 1357; AVX2-NEXT: movq %rcx, %rax 1358; AVX2-NEXT: shrq $40, %rax 1359; AVX2-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 1360; AVX2-NEXT: movq %rcx, %rax 1361; AVX2-NEXT: shrq $48, %rax 1362; AVX2-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 1363; AVX2-NEXT: vpextrq $1, %xmm2, %rax 1364; AVX2-NEXT: shrq $56, %rcx 1365; AVX2-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1 1366; AVX2-NEXT: movl %eax, %ecx 1367; AVX2-NEXT: shrl $8, %ecx 1368; AVX2-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 1369; AVX2-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1 1370; AVX2-NEXT: movl %eax, %ecx 1371; AVX2-NEXT: shrl $16, %ecx 1372; AVX2-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1 1373; AVX2-NEXT: movl %eax, %ecx 1374; AVX2-NEXT: shrl $24, %ecx 1375; AVX2-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1 1376; AVX2-NEXT: movq %rax, %rcx 1377; AVX2-NEXT: shrq $32, %rcx 1378; AVX2-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1 1379; AVX2-NEXT: movq %rax, %rcx 1380; AVX2-NEXT: shrq $40, %rcx 1381; AVX2-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1 1382; AVX2-NEXT: movq %rax, %rcx 1383; AVX2-NEXT: shrq $48, %rcx 1384; AVX2-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1 1385; AVX2-NEXT: shrq $56, %rax 1386; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 1387; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1388; AVX2-NEXT: popq %rbx 1389; AVX2-NEXT: popq %r14 1390; AVX2-NEXT: retq 1391 %x4 = bitcast <32 x i8> %0 to <64 x i4> 1392 %r0 = insertelement <64 x i4> %x4, i4 zeroinitializer, i32 1 1393 %r1 = insertelement <64 x i4> %r0, i4 zeroinitializer, i32 3 1394 %r2 = insertelement <64 x i4> %r1, i4 zeroinitializer, i32 5 1395 %r3 = insertelement <64 x i4> %r2, i4 zeroinitializer, i32 7 1396 %r4 = insertelement <64 x i4> %r3, i4 zeroinitializer, i32 9 1397 %r5 = insertelement <64 x i4> %r4, i4 zeroinitializer, i32 11 1398 %r6 = insertelement <64 x i4> %r5, i4 zeroinitializer, i32 13 1399 %r7 = insertelement <64 x i4> %r6, i4 zeroinitializer, i32 15 1400 %r8 = insertelement <64 x i4> %r7, i4 zeroinitializer, i32 17 1401 %r9 = insertelement <64 x i4> %r8, i4 zeroinitializer, i32 19 1402 %r10 = insertelement <64 x i4> %r9, i4 zeroinitializer, i32 21 1403 %r11 = insertelement <64 x i4> %r10, i4 zeroinitializer, i32 23 1404 %r12 = insertelement <64 x i4> %r11, i4 zeroinitializer, i32 25 1405 %r13 = insertelement <64 x i4> %r12, i4 zeroinitializer, i32 27 1406 %r14 = insertelement <64 x i4> %r13, i4 zeroinitializer, i32 29 1407 %r15 = insertelement <64 x i4> %r14, i4 zeroinitializer, i32 31 1408 %r16 = insertelement <64 x i4> %r15, i4 zeroinitializer, i32 33 1409 %r17 = insertelement <64 x i4> %r16, i4 zeroinitializer, i32 35 1410 %r18 = insertelement <64 x i4> %r17, i4 zeroinitializer, i32 37 1411 %r19 = insertelement <64 x i4> %r18, i4 zeroinitializer, i32 39 1412 %r20 = insertelement <64 x i4> %r19, i4 zeroinitializer, i32 41 1413 %r21 = insertelement <64 x i4> %r20, i4 zeroinitializer, i32 43 1414 %r22 = insertelement <64 x i4> %r21, i4 zeroinitializer, i32 45 1415 %r23 = insertelement <64 x i4> %r22, i4 zeroinitializer, i32 47 1416 %r24 = insertelement <64 x i4> %r23, i4 zeroinitializer, i32 49 1417 %r25 = insertelement <64 x i4> %r24, i4 zeroinitializer, i32 51 1418 %r26 = insertelement <64 x i4> %r25, i4 zeroinitializer, i32 53 1419 %r27 = insertelement <64 x i4> %r26, i4 zeroinitializer, i32 55 1420 %r28 = insertelement <64 x i4> %r27, i4 zeroinitializer, i32 57 1421 %r29 = insertelement <64 x i4> %r28, i4 zeroinitializer, i32 59 1422 %r30 = insertelement <64 x i4> %r29, i4 zeroinitializer, i32 61 1423 %r31 = insertelement <64 x i4> %r30, i4 zeroinitializer, i32 63 1424 %r = bitcast <64 x i4> %r15 to <32 x i8> 1425 ret <32 x i8> %r 1426} 1427 1428define <2 x i64> @_clearupper2xi64c(<2 x i64>) nounwind { 1429; SSE-LABEL: _clearupper2xi64c: 1430; SSE: # BB#0: 1431; SSE-NEXT: andps {{.*}}(%rip), %xmm0 1432; SSE-NEXT: retq 1433; 1434; AVX1-LABEL: _clearupper2xi64c: 1435; AVX1: # BB#0: 1436; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1437; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1438; AVX1-NEXT: retq 1439; 1440; AVX2-LABEL: _clearupper2xi64c: 1441; AVX2: # BB#0: 1442; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1443; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 1444; AVX2-NEXT: retq 1445 %r = and <2 x i64> <i64 4294967295, i64 4294967295>, %0 1446 ret <2 x i64> %r 1447} 1448 1449define <4 x i64> @_clearupper4xi64c(<4 x i64>) nounwind { 1450; SSE-LABEL: _clearupper4xi64c: 1451; SSE: # BB#0: 1452; SSE-NEXT: movaps {{.*#+}} xmm2 = [4294967295,0,4294967295,0] 1453; SSE-NEXT: andps %xmm2, %xmm0 1454; SSE-NEXT: andps %xmm2, %xmm1 1455; SSE-NEXT: retq 1456; 1457; AVX1-LABEL: _clearupper4xi64c: 1458; AVX1: # BB#0: 1459; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1 1460; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1461; AVX1-NEXT: retq 1462; 1463; AVX2-LABEL: _clearupper4xi64c: 1464; AVX2: # BB#0: 1465; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 1466; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1467; AVX2-NEXT: retq 1468 %r = and <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>, %0 1469 ret <4 x i64> %r 1470} 1471 1472define <4 x i32> @_clearupper4xi32c(<4 x i32>) nounwind { 1473; SSE-LABEL: _clearupper4xi32c: 1474; SSE: # BB#0: 1475; SSE-NEXT: andps {{.*}}(%rip), %xmm0 1476; SSE-NEXT: retq 1477; 1478; AVX-LABEL: _clearupper4xi32c: 1479; AVX: # BB#0: 1480; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 1481; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 1482; AVX-NEXT: retq 1483 %r = and <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>, %0 1484 ret <4 x i32> %r 1485} 1486 1487define <8 x i32> @_clearupper8xi32c(<8 x i32>) nounwind { 1488; SSE-LABEL: _clearupper8xi32c: 1489; SSE: # BB#0: 1490; SSE-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0] 1491; SSE-NEXT: andps %xmm2, %xmm0 1492; SSE-NEXT: andps %xmm2, %xmm1 1493; SSE-NEXT: retq 1494; 1495; AVX1-LABEL: _clearupper8xi32c: 1496; AVX1: # BB#0: 1497; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 1498; AVX1-NEXT: retq 1499; 1500; AVX2-LABEL: _clearupper8xi32c: 1501; AVX2: # BB#0: 1502; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 1503; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 1504; AVX2-NEXT: retq 1505 %r = and <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>, %0 1506 ret <8 x i32> %r 1507} 1508 1509define <8 x i16> @_clearupper8xi16c(<8 x i16>) nounwind { 1510; SSE-LABEL: _clearupper8xi16c: 1511; SSE: # BB#0: 1512; SSE-NEXT: andps {{.*}}(%rip), %xmm0 1513; SSE-NEXT: retq 1514; 1515; AVX-LABEL: _clearupper8xi16c: 1516; AVX: # BB#0: 1517; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 1518; AVX-NEXT: retq 1519 %r = and <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>, %0 1520 ret <8 x i16> %r 1521} 1522 1523define <16 x i16> @_clearupper16xi16c(<16 x i16>) nounwind { 1524; SSE-LABEL: _clearupper16xi16c: 1525; SSE: # BB#0: 1526; SSE-NEXT: movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 1527; SSE-NEXT: andps %xmm2, %xmm0 1528; SSE-NEXT: andps %xmm2, %xmm1 1529; SSE-NEXT: retq 1530; 1531; AVX-LABEL: _clearupper16xi16c: 1532; AVX: # BB#0: 1533; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 1534; AVX-NEXT: retq 1535 %r = and <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>, %0 1536 ret <16 x i16> %r 1537} 1538 1539define <16 x i8> @_clearupper16xi8c(<16 x i8>) nounwind { 1540; SSE-LABEL: _clearupper16xi8c: 1541; SSE: # BB#0: 1542; SSE-NEXT: andps {{.*}}(%rip), %xmm0 1543; SSE-NEXT: retq 1544; 1545; AVX-LABEL: _clearupper16xi8c: 1546; AVX: # BB#0: 1547; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 1548; AVX-NEXT: retq 1549 %r = and <16 x i8> <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>, %0 1550 ret <16 x i8> %r 1551} 1552 1553define <32 x i8> @_clearupper32xi8c(<32 x i8>) nounwind { 1554; SSE-LABEL: _clearupper32xi8c: 1555; SSE: # BB#0: 1556; SSE-NEXT: movaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1557; SSE-NEXT: andps %xmm2, %xmm0 1558; SSE-NEXT: andps %xmm2, %xmm1 1559; SSE-NEXT: retq 1560; 1561; AVX-LABEL: _clearupper32xi8c: 1562; AVX: # BB#0: 1563; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 1564; AVX-NEXT: retq 1565 %r = and <32 x i8> <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>, %0 1566 ret <32 x i8> %r 1567} 1568