1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE42 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 6 7; 8; PR6455 'Clear Upper Bits' Patterns 9; 10 11define <2 x i64> @_clearupper2xi64a(<2 x i64>) nounwind { 12; SSE2-LABEL: _clearupper2xi64a: 13; SSE2: # %bb.0: 14; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 15; SSE2-NEXT: retq 16; 17; SSE42-LABEL: _clearupper2xi64a: 18; SSE42: # %bb.0: 19; SSE42-NEXT: xorps %xmm1, %xmm1 20; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 21; SSE42-NEXT: retq 22; 23; AVX-LABEL: _clearupper2xi64a: 24; AVX: # %bb.0: 25; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 26; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 27; AVX-NEXT: retq 28 %x0 = extractelement <2 x i64> %0, i32 0 29 %x1 = extractelement <2 x i64> %0, i32 1 30 %trunc0 = trunc i64 %x0 to i32 31 %trunc1 = trunc i64 %x1 to i32 32 %ext0 = zext i32 %trunc0 to i64 33 %ext1 = zext i32 %trunc1 to i64 34 %v0 = insertelement <2 x i64> undef, i64 %ext0, i32 0 35 %v1 = insertelement <2 x i64> %v0, i64 %ext1, i32 1 36 ret <2 x i64> %v1 37} 38 39define <4 x i64> @_clearupper4xi64a(<4 x i64>) nounwind { 40; SSE2-LABEL: _clearupper4xi64a: 41; SSE2: # %bb.0: 42; SSE2-NEXT: movaps {{.*#+}} xmm2 = [4294967295,4294967295] 43; SSE2-NEXT: andps %xmm2, %xmm0 44; SSE2-NEXT: andps %xmm2, %xmm1 45; SSE2-NEXT: retq 46; 47; SSE42-LABEL: _clearupper4xi64a: 48; SSE42: # %bb.0: 49; SSE42-NEXT: xorps %xmm2, %xmm2 50; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 51; SSE42-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 52; SSE42-NEXT: retq 53; 54; AVX-LABEL: _clearupper4xi64a: 55; AVX: # %bb.0: 56; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 57; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 58; AVX-NEXT: retq 59 %x0 = extractelement <4 x i64> %0, i32 0 60 %x1 = extractelement <4 x i64> %0, i32 1 61 %x2 = extractelement <4 x i64> %0, i32 2 62 %x3 = extractelement <4 x i64> %0, i32 3 63 %trunc0 = trunc i64 %x0 to i32 64 %trunc1 = trunc i64 %x1 to i32 65 %trunc2 = trunc i64 %x2 to i32 66 %trunc3 = trunc i64 %x3 to i32 67 %ext0 = zext i32 %trunc0 to i64 68 %ext1 = zext i32 %trunc1 to i64 69 %ext2 = zext i32 %trunc2 to i64 70 %ext3 = zext i32 %trunc3 to i64 71 %v0 = insertelement <4 x i64> undef, i64 %ext0, i32 0 72 %v1 = insertelement <4 x i64> %v0, i64 %ext1, i32 1 73 %v2 = insertelement <4 x i64> %v1, i64 %ext2, i32 2 74 %v3 = insertelement <4 x i64> %v2, i64 %ext3, i32 3 75 ret <4 x i64> %v3 76} 77 78define <4 x i32> @_clearupper4xi32a(<4 x i32>) nounwind { 79; SSE2-LABEL: _clearupper4xi32a: 80; SSE2: # %bb.0: 81; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 82; SSE2-NEXT: retq 83; 84; SSE42-LABEL: _clearupper4xi32a: 85; SSE42: # %bb.0: 86; SSE42-NEXT: pxor %xmm1, %xmm1 87; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 88; SSE42-NEXT: retq 89; 90; AVX-LABEL: _clearupper4xi32a: 91; AVX: # %bb.0: 92; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 93; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 94; AVX-NEXT: retq 95 %x0 = extractelement <4 x i32> %0, i32 0 96 %x1 = extractelement <4 x i32> %0, i32 1 97 %x2 = extractelement <4 x i32> %0, i32 2 98 %x3 = extractelement <4 x i32> %0, i32 3 99 %trunc0 = trunc i32 %x0 to i16 100 %trunc1 = trunc i32 %x1 to i16 101 %trunc2 = trunc i32 %x2 to i16 102 %trunc3 = trunc i32 %x3 to i16 103 %ext0 = zext i16 %trunc0 to i32 104 %ext1 = zext i16 %trunc1 to i32 105 %ext2 = zext i16 %trunc2 to i32 106 %ext3 = zext i16 %trunc3 to i32 107 %v0 = insertelement <4 x i32> undef, i32 %ext0, i32 0 108 %v1 = insertelement <4 x i32> %v0, i32 %ext1, i32 1 109 %v2 = insertelement <4 x i32> %v1, i32 %ext2, i32 2 110 %v3 = insertelement <4 x i32> %v2, i32 %ext3, i32 3 111 ret <4 x i32> %v3 112} 113 114define <8 x i32> @_clearupper8xi32a(<8 x i32>) nounwind { 115; SSE2-LABEL: _clearupper8xi32a: 116; SSE2: # %bb.0: 117; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,65535,65535,65535] 118; SSE2-NEXT: andps %xmm2, %xmm0 119; SSE2-NEXT: andps %xmm2, %xmm1 120; SSE2-NEXT: retq 121; 122; SSE42-LABEL: _clearupper8xi32a: 123; SSE42: # %bb.0: 124; SSE42-NEXT: pxor %xmm2, %xmm2 125; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] 126; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] 127; SSE42-NEXT: retq 128; 129; AVX1-LABEL: _clearupper8xi32a: 130; AVX1: # %bb.0: 131; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 132; AVX1-NEXT: retq 133; 134; AVX2-LABEL: _clearupper8xi32a: 135; AVX2: # %bb.0: 136; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 137; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 138; AVX2-NEXT: retq 139 %x0 = extractelement <8 x i32> %0, i32 0 140 %x1 = extractelement <8 x i32> %0, i32 1 141 %x2 = extractelement <8 x i32> %0, i32 2 142 %x3 = extractelement <8 x i32> %0, i32 3 143 %x4 = extractelement <8 x i32> %0, i32 4 144 %x5 = extractelement <8 x i32> %0, i32 5 145 %x6 = extractelement <8 x i32> %0, i32 6 146 %x7 = extractelement <8 x i32> %0, i32 7 147 %trunc0 = trunc i32 %x0 to i16 148 %trunc1 = trunc i32 %x1 to i16 149 %trunc2 = trunc i32 %x2 to i16 150 %trunc3 = trunc i32 %x3 to i16 151 %trunc4 = trunc i32 %x4 to i16 152 %trunc5 = trunc i32 %x5 to i16 153 %trunc6 = trunc i32 %x6 to i16 154 %trunc7 = trunc i32 %x7 to i16 155 %ext0 = zext i16 %trunc0 to i32 156 %ext1 = zext i16 %trunc1 to i32 157 %ext2 = zext i16 %trunc2 to i32 158 %ext3 = zext i16 %trunc3 to i32 159 %ext4 = zext i16 %trunc4 to i32 160 %ext5 = zext i16 %trunc5 to i32 161 %ext6 = zext i16 %trunc6 to i32 162 %ext7 = zext i16 %trunc7 to i32 163 %v0 = insertelement <8 x i32> undef, i32 %ext0, i32 0 164 %v1 = insertelement <8 x i32> %v0, i32 %ext1, i32 1 165 %v2 = insertelement <8 x i32> %v1, i32 %ext2, i32 2 166 %v3 = insertelement <8 x i32> %v2, i32 %ext3, i32 3 167 %v4 = insertelement <8 x i32> %v3, i32 %ext4, i32 4 168 %v5 = insertelement <8 x i32> %v4, i32 %ext5, i32 5 169 %v6 = insertelement <8 x i32> %v5, i32 %ext6, i32 6 170 %v7 = insertelement <8 x i32> %v6, i32 %ext7, i32 7 171 ret <8 x i32> %v7 172} 173 174define <8 x i16> @_clearupper8xi16a(<8 x i16>) nounwind { 175; SSE-LABEL: _clearupper8xi16a: 176; SSE: # %bb.0: 177; SSE-NEXT: andps {{.*}}(%rip), %xmm0 178; SSE-NEXT: retq 179; 180; AVX-LABEL: _clearupper8xi16a: 181; AVX: # %bb.0: 182; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 183; AVX-NEXT: retq 184 %x0 = extractelement <8 x i16> %0, i32 0 185 %x1 = extractelement <8 x i16> %0, i32 1 186 %x2 = extractelement <8 x i16> %0, i32 2 187 %x3 = extractelement <8 x i16> %0, i32 3 188 %x4 = extractelement <8 x i16> %0, i32 4 189 %x5 = extractelement <8 x i16> %0, i32 5 190 %x6 = extractelement <8 x i16> %0, i32 6 191 %x7 = extractelement <8 x i16> %0, i32 7 192 %trunc0 = trunc i16 %x0 to i8 193 %trunc1 = trunc i16 %x1 to i8 194 %trunc2 = trunc i16 %x2 to i8 195 %trunc3 = trunc i16 %x3 to i8 196 %trunc4 = trunc i16 %x4 to i8 197 %trunc5 = trunc i16 %x5 to i8 198 %trunc6 = trunc i16 %x6 to i8 199 %trunc7 = trunc i16 %x7 to i8 200 %ext0 = zext i8 %trunc0 to i16 201 %ext1 = zext i8 %trunc1 to i16 202 %ext2 = zext i8 %trunc2 to i16 203 %ext3 = zext i8 %trunc3 to i16 204 %ext4 = zext i8 %trunc4 to i16 205 %ext5 = zext i8 %trunc5 to i16 206 %ext6 = zext i8 %trunc6 to i16 207 %ext7 = zext i8 %trunc7 to i16 208 %v0 = insertelement <8 x i16> undef, i16 %ext0, i32 0 209 %v1 = insertelement <8 x i16> %v0, i16 %ext1, i32 1 210 %v2 = insertelement <8 x i16> %v1, i16 %ext2, i32 2 211 %v3 = insertelement <8 x i16> %v2, i16 %ext3, i32 3 212 %v4 = insertelement <8 x i16> %v3, i16 %ext4, i32 4 213 %v5 = insertelement <8 x i16> %v4, i16 %ext5, i32 5 214 %v6 = insertelement <8 x i16> %v5, i16 %ext6, i32 6 215 %v7 = insertelement <8 x i16> %v6, i16 %ext7, i32 7 216 ret <8 x i16> %v7 217} 218 219define <16 x i16> @_clearupper16xi16a(<16 x i16>) nounwind { 220; SSE-LABEL: _clearupper16xi16a: 221; SSE: # %bb.0: 222; SSE-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 223; SSE-NEXT: andps %xmm2, %xmm0 224; SSE-NEXT: andps %xmm2, %xmm1 225; SSE-NEXT: retq 226; 227; AVX-LABEL: _clearupper16xi16a: 228; AVX: # %bb.0: 229; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 230; AVX-NEXT: retq 231 %x0 = extractelement <16 x i16> %0, i32 0 232 %x1 = extractelement <16 x i16> %0, i32 1 233 %x2 = extractelement <16 x i16> %0, i32 2 234 %x3 = extractelement <16 x i16> %0, i32 3 235 %x4 = extractelement <16 x i16> %0, i32 4 236 %x5 = extractelement <16 x i16> %0, i32 5 237 %x6 = extractelement <16 x i16> %0, i32 6 238 %x7 = extractelement <16 x i16> %0, i32 7 239 %x8 = extractelement <16 x i16> %0, i32 8 240 %x9 = extractelement <16 x i16> %0, i32 9 241 %x10 = extractelement <16 x i16> %0, i32 10 242 %x11 = extractelement <16 x i16> %0, i32 11 243 %x12 = extractelement <16 x i16> %0, i32 12 244 %x13 = extractelement <16 x i16> %0, i32 13 245 %x14 = extractelement <16 x i16> %0, i32 14 246 %x15 = extractelement <16 x i16> %0, i32 15 247 %trunc0 = trunc i16 %x0 to i8 248 %trunc1 = trunc i16 %x1 to i8 249 %trunc2 = trunc i16 %x2 to i8 250 %trunc3 = trunc i16 %x3 to i8 251 %trunc4 = trunc i16 %x4 to i8 252 %trunc5 = trunc i16 %x5 to i8 253 %trunc6 = trunc i16 %x6 to i8 254 %trunc7 = trunc i16 %x7 to i8 255 %trunc8 = trunc i16 %x8 to i8 256 %trunc9 = trunc i16 %x9 to i8 257 %trunc10 = trunc i16 %x10 to i8 258 %trunc11 = trunc i16 %x11 to i8 259 %trunc12 = trunc i16 %x12 to i8 260 %trunc13 = trunc i16 %x13 to i8 261 %trunc14 = trunc i16 %x14 to i8 262 %trunc15 = trunc i16 %x15 to i8 263 %ext0 = zext i8 %trunc0 to i16 264 %ext1 = zext i8 %trunc1 to i16 265 %ext2 = zext i8 %trunc2 to i16 266 %ext3 = zext i8 %trunc3 to i16 267 %ext4 = zext i8 %trunc4 to i16 268 %ext5 = zext i8 %trunc5 to i16 269 %ext6 = zext i8 %trunc6 to i16 270 %ext7 = zext i8 %trunc7 to i16 271 %ext8 = zext i8 %trunc8 to i16 272 %ext9 = zext i8 %trunc9 to i16 273 %ext10 = zext i8 %trunc10 to i16 274 %ext11 = zext i8 %trunc11 to i16 275 %ext12 = zext i8 %trunc12 to i16 276 %ext13 = zext i8 %trunc13 to i16 277 %ext14 = zext i8 %trunc14 to i16 278 %ext15 = zext i8 %trunc15 to i16 279 %v0 = insertelement <16 x i16> undef, i16 %ext0, i32 0 280 %v1 = insertelement <16 x i16> %v0, i16 %ext1, i32 1 281 %v2 = insertelement <16 x i16> %v1, i16 %ext2, i32 2 282 %v3 = insertelement <16 x i16> %v2, i16 %ext3, i32 3 283 %v4 = insertelement <16 x i16> %v3, i16 %ext4, i32 4 284 %v5 = insertelement <16 x i16> %v4, i16 %ext5, i32 5 285 %v6 = insertelement <16 x i16> %v5, i16 %ext6, i32 6 286 %v7 = insertelement <16 x i16> %v6, i16 %ext7, i32 7 287 %v8 = insertelement <16 x i16> %v7, i16 %ext8, i32 8 288 %v9 = insertelement <16 x i16> %v8, i16 %ext9, i32 9 289 %v10 = insertelement <16 x i16> %v9, i16 %ext10, i32 10 290 %v11 = insertelement <16 x i16> %v10, i16 %ext11, i32 11 291 %v12 = insertelement <16 x i16> %v11, i16 %ext12, i32 12 292 %v13 = insertelement <16 x i16> %v12, i16 %ext13, i32 13 293 %v14 = insertelement <16 x i16> %v13, i16 %ext14, i32 14 294 %v15 = insertelement <16 x i16> %v14, i16 %ext15, i32 15 295 ret <16 x i16> %v15 296} 297 298define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind { 299; SSE-LABEL: _clearupper16xi8a: 300; SSE: # %bb.0: 301; SSE-NEXT: andps {{.*}}(%rip), %xmm0 302; SSE-NEXT: retq 303; 304; AVX-LABEL: _clearupper16xi8a: 305; AVX: # %bb.0: 306; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 307; AVX-NEXT: retq 308 %x0 = extractelement <16 x i8> %0, i32 0 309 %x1 = extractelement <16 x i8> %0, i32 1 310 %x2 = extractelement <16 x i8> %0, i32 2 311 %x3 = extractelement <16 x i8> %0, i32 3 312 %x4 = extractelement <16 x i8> %0, i32 4 313 %x5 = extractelement <16 x i8> %0, i32 5 314 %x6 = extractelement <16 x i8> %0, i32 6 315 %x7 = extractelement <16 x i8> %0, i32 7 316 %x8 = extractelement <16 x i8> %0, i32 8 317 %x9 = extractelement <16 x i8> %0, i32 9 318 %x10 = extractelement <16 x i8> %0, i32 10 319 %x11 = extractelement <16 x i8> %0, i32 11 320 %x12 = extractelement <16 x i8> %0, i32 12 321 %x13 = extractelement <16 x i8> %0, i32 13 322 %x14 = extractelement <16 x i8> %0, i32 14 323 %x15 = extractelement <16 x i8> %0, i32 15 324 %trunc0 = trunc i8 %x0 to i4 325 %trunc1 = trunc i8 %x1 to i4 326 %trunc2 = trunc i8 %x2 to i4 327 %trunc3 = trunc i8 %x3 to i4 328 %trunc4 = trunc i8 %x4 to i4 329 %trunc5 = trunc i8 %x5 to i4 330 %trunc6 = trunc i8 %x6 to i4 331 %trunc7 = trunc i8 %x7 to i4 332 %trunc8 = trunc i8 %x8 to i4 333 %trunc9 = trunc i8 %x9 to i4 334 %trunc10 = trunc i8 %x10 to i4 335 %trunc11 = trunc i8 %x11 to i4 336 %trunc12 = trunc i8 %x12 to i4 337 %trunc13 = trunc i8 %x13 to i4 338 %trunc14 = trunc i8 %x14 to i4 339 %trunc15 = trunc i8 %x15 to i4 340 %ext0 = zext i4 %trunc0 to i8 341 %ext1 = zext i4 %trunc1 to i8 342 %ext2 = zext i4 %trunc2 to i8 343 %ext3 = zext i4 %trunc3 to i8 344 %ext4 = zext i4 %trunc4 to i8 345 %ext5 = zext i4 %trunc5 to i8 346 %ext6 = zext i4 %trunc6 to i8 347 %ext7 = zext i4 %trunc7 to i8 348 %ext8 = zext i4 %trunc8 to i8 349 %ext9 = zext i4 %trunc9 to i8 350 %ext10 = zext i4 %trunc10 to i8 351 %ext11 = zext i4 %trunc11 to i8 352 %ext12 = zext i4 %trunc12 to i8 353 %ext13 = zext i4 %trunc13 to i8 354 %ext14 = zext i4 %trunc14 to i8 355 %ext15 = zext i4 %trunc15 to i8 356 %v0 = insertelement <16 x i8> undef, i8 %ext0, i32 0 357 %v1 = insertelement <16 x i8> %v0, i8 %ext1, i32 1 358 %v2 = insertelement <16 x i8> %v1, i8 %ext2, i32 2 359 %v3 = insertelement <16 x i8> %v2, i8 %ext3, i32 3 360 %v4 = insertelement <16 x i8> %v3, i8 %ext4, i32 4 361 %v5 = insertelement <16 x i8> %v4, i8 %ext5, i32 5 362 %v6 = insertelement <16 x i8> %v5, i8 %ext6, i32 6 363 %v7 = insertelement <16 x i8> %v6, i8 %ext7, i32 7 364 %v8 = insertelement <16 x i8> %v7, i8 %ext8, i32 8 365 %v9 = insertelement <16 x i8> %v8, i8 %ext9, i32 9 366 %v10 = insertelement <16 x i8> %v9, i8 %ext10, i32 10 367 %v11 = insertelement <16 x i8> %v10, i8 %ext11, i32 11 368 %v12 = insertelement <16 x i8> %v11, i8 %ext12, i32 12 369 %v13 = insertelement <16 x i8> %v12, i8 %ext13, i32 13 370 %v14 = insertelement <16 x i8> %v13, i8 %ext14, i32 14 371 %v15 = insertelement <16 x i8> %v14, i8 %ext15, i32 15 372 ret <16 x i8> %v15 373} 374 375define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind { 376; SSE-LABEL: _clearupper32xi8a: 377; SSE: # %bb.0: 378; SSE-NEXT: movaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 379; SSE-NEXT: andps %xmm2, %xmm0 380; SSE-NEXT: andps %xmm2, %xmm1 381; SSE-NEXT: retq 382; 383; AVX-LABEL: _clearupper32xi8a: 384; AVX: # %bb.0: 385; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 386; AVX-NEXT: retq 387 %x0 = extractelement <32 x i8> %0, i32 0 388 %x1 = extractelement <32 x i8> %0, i32 1 389 %x2 = extractelement <32 x i8> %0, i32 2 390 %x3 = extractelement <32 x i8> %0, i32 3 391 %x4 = extractelement <32 x i8> %0, i32 4 392 %x5 = extractelement <32 x i8> %0, i32 5 393 %x6 = extractelement <32 x i8> %0, i32 6 394 %x7 = extractelement <32 x i8> %0, i32 7 395 %x8 = extractelement <32 x i8> %0, i32 8 396 %x9 = extractelement <32 x i8> %0, i32 9 397 %x10 = extractelement <32 x i8> %0, i32 10 398 %x11 = extractelement <32 x i8> %0, i32 11 399 %x12 = extractelement <32 x i8> %0, i32 12 400 %x13 = extractelement <32 x i8> %0, i32 13 401 %x14 = extractelement <32 x i8> %0, i32 14 402 %x15 = extractelement <32 x i8> %0, i32 15 403 %x16 = extractelement <32 x i8> %0, i32 16 404 %x17 = extractelement <32 x i8> %0, i32 17 405 %x18 = extractelement <32 x i8> %0, i32 18 406 %x19 = extractelement <32 x i8> %0, i32 19 407 %x20 = extractelement <32 x i8> %0, i32 20 408 %x21 = extractelement <32 x i8> %0, i32 21 409 %x22 = extractelement <32 x i8> %0, i32 22 410 %x23 = extractelement <32 x i8> %0, i32 23 411 %x24 = extractelement <32 x i8> %0, i32 24 412 %x25 = extractelement <32 x i8> %0, i32 25 413 %x26 = extractelement <32 x i8> %0, i32 26 414 %x27 = extractelement <32 x i8> %0, i32 27 415 %x28 = extractelement <32 x i8> %0, i32 28 416 %x29 = extractelement <32 x i8> %0, i32 29 417 %x30 = extractelement <32 x i8> %0, i32 30 418 %x31 = extractelement <32 x i8> %0, i32 31 419 %trunc0 = trunc i8 %x0 to i4 420 %trunc1 = trunc i8 %x1 to i4 421 %trunc2 = trunc i8 %x2 to i4 422 %trunc3 = trunc i8 %x3 to i4 423 %trunc4 = trunc i8 %x4 to i4 424 %trunc5 = trunc i8 %x5 to i4 425 %trunc6 = trunc i8 %x6 to i4 426 %trunc7 = trunc i8 %x7 to i4 427 %trunc8 = trunc i8 %x8 to i4 428 %trunc9 = trunc i8 %x9 to i4 429 %trunc10 = trunc i8 %x10 to i4 430 %trunc11 = trunc i8 %x11 to i4 431 %trunc12 = trunc i8 %x12 to i4 432 %trunc13 = trunc i8 %x13 to i4 433 %trunc14 = trunc i8 %x14 to i4 434 %trunc15 = trunc i8 %x15 to i4 435 %trunc16 = trunc i8 %x16 to i4 436 %trunc17 = trunc i8 %x17 to i4 437 %trunc18 = trunc i8 %x18 to i4 438 %trunc19 = trunc i8 %x19 to i4 439 %trunc20 = trunc i8 %x20 to i4 440 %trunc21 = trunc i8 %x21 to i4 441 %trunc22 = trunc i8 %x22 to i4 442 %trunc23 = trunc i8 %x23 to i4 443 %trunc24 = trunc i8 %x24 to i4 444 %trunc25 = trunc i8 %x25 to i4 445 %trunc26 = trunc i8 %x26 to i4 446 %trunc27 = trunc i8 %x27 to i4 447 %trunc28 = trunc i8 %x28 to i4 448 %trunc29 = trunc i8 %x29 to i4 449 %trunc30 = trunc i8 %x30 to i4 450 %trunc31 = trunc i8 %x31 to i4 451 %ext0 = zext i4 %trunc0 to i8 452 %ext1 = zext i4 %trunc1 to i8 453 %ext2 = zext i4 %trunc2 to i8 454 %ext3 = zext i4 %trunc3 to i8 455 %ext4 = zext i4 %trunc4 to i8 456 %ext5 = zext i4 %trunc5 to i8 457 %ext6 = zext i4 %trunc6 to i8 458 %ext7 = zext i4 %trunc7 to i8 459 %ext8 = zext i4 %trunc8 to i8 460 %ext9 = zext i4 %trunc9 to i8 461 %ext10 = zext i4 %trunc10 to i8 462 %ext11 = zext i4 %trunc11 to i8 463 %ext12 = zext i4 %trunc12 to i8 464 %ext13 = zext i4 %trunc13 to i8 465 %ext14 = zext i4 %trunc14 to i8 466 %ext15 = zext i4 %trunc15 to i8 467 %ext16 = zext i4 %trunc16 to i8 468 %ext17 = zext i4 %trunc17 to i8 469 %ext18 = zext i4 %trunc18 to i8 470 %ext19 = zext i4 %trunc19 to i8 471 %ext20 = zext i4 %trunc20 to i8 472 %ext21 = zext i4 %trunc21 to i8 473 %ext22 = zext i4 %trunc22 to i8 474 %ext23 = zext i4 %trunc23 to i8 475 %ext24 = zext i4 %trunc24 to i8 476 %ext25 = zext i4 %trunc25 to i8 477 %ext26 = zext i4 %trunc26 to i8 478 %ext27 = zext i4 %trunc27 to i8 479 %ext28 = zext i4 %trunc28 to i8 480 %ext29 = zext i4 %trunc29 to i8 481 %ext30 = zext i4 %trunc30 to i8 482 %ext31 = zext i4 %trunc31 to i8 483 %v0 = insertelement <32 x i8> undef, i8 %ext0, i32 0 484 %v1 = insertelement <32 x i8> %v0, i8 %ext1, i32 1 485 %v2 = insertelement <32 x i8> %v1, i8 %ext2, i32 2 486 %v3 = insertelement <32 x i8> %v2, i8 %ext3, i32 3 487 %v4 = insertelement <32 x i8> %v3, i8 %ext4, i32 4 488 %v5 = insertelement <32 x i8> %v4, i8 %ext5, i32 5 489 %v6 = insertelement <32 x i8> %v5, i8 %ext6, i32 6 490 %v7 = insertelement <32 x i8> %v6, i8 %ext7, i32 7 491 %v8 = insertelement <32 x i8> %v7, i8 %ext8, i32 8 492 %v9 = insertelement <32 x i8> %v8, i8 %ext9, i32 9 493 %v10 = insertelement <32 x i8> %v9, i8 %ext10, i32 10 494 %v11 = insertelement <32 x i8> %v10, i8 %ext11, i32 11 495 %v12 = insertelement <32 x i8> %v11, i8 %ext12, i32 12 496 %v13 = insertelement <32 x i8> %v12, i8 %ext13, i32 13 497 %v14 = insertelement <32 x i8> %v13, i8 %ext14, i32 14 498 %v15 = insertelement <32 x i8> %v14, i8 %ext15, i32 15 499 %v16 = insertelement <32 x i8> %v15, i8 %ext16, i32 16 500 %v17 = insertelement <32 x i8> %v16, i8 %ext17, i32 17 501 %v18 = insertelement <32 x i8> %v17, i8 %ext18, i32 18 502 %v19 = insertelement <32 x i8> %v18, i8 %ext19, i32 19 503 %v20 = insertelement <32 x i8> %v19, i8 %ext20, i32 20 504 %v21 = insertelement <32 x i8> %v20, i8 %ext21, i32 21 505 %v22 = insertelement <32 x i8> %v21, i8 %ext22, i32 22 506 %v23 = insertelement <32 x i8> %v22, i8 %ext23, i32 23 507 %v24 = insertelement <32 x i8> %v23, i8 %ext24, i32 24 508 %v25 = insertelement <32 x i8> %v24, i8 %ext25, i32 25 509 %v26 = insertelement <32 x i8> %v25, i8 %ext26, i32 26 510 %v27 = insertelement <32 x i8> %v26, i8 %ext27, i32 27 511 %v28 = insertelement <32 x i8> %v27, i8 %ext28, i32 28 512 %v29 = insertelement <32 x i8> %v28, i8 %ext29, i32 29 513 %v30 = insertelement <32 x i8> %v29, i8 %ext30, i32 30 514 %v31 = insertelement <32 x i8> %v30, i8 %ext31, i32 31 515 ret <32 x i8> %v31 516} 517 518define <2 x i64> @_clearupper2xi64b(<2 x i64>) nounwind { 519; SSE2-LABEL: _clearupper2xi64b: 520; SSE2: # %bb.0: 521; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 522; SSE2-NEXT: retq 523; 524; SSE42-LABEL: _clearupper2xi64b: 525; SSE42: # %bb.0: 526; SSE42-NEXT: xorps %xmm1, %xmm1 527; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 528; SSE42-NEXT: retq 529; 530; AVX-LABEL: _clearupper2xi64b: 531; AVX: # %bb.0: 532; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 533; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 534; AVX-NEXT: retq 535 %x32 = bitcast <2 x i64> %0 to <4 x i32> 536 %r0 = insertelement <4 x i32> %x32, i32 zeroinitializer, i32 1 537 %r1 = insertelement <4 x i32> %r0, i32 zeroinitializer, i32 3 538 %r = bitcast <4 x i32> %r1 to <2 x i64> 539 ret <2 x i64> %r 540} 541 542define <4 x i64> @_clearupper4xi64b(<4 x i64>) nounwind { 543; SSE2-LABEL: _clearupper4xi64b: 544; SSE2: # %bb.0: 545; SSE2-NEXT: movaps {{.*#+}} xmm2 546; SSE2-NEXT: andps %xmm2, %xmm0 547; SSE2-NEXT: andps %xmm2, %xmm1 548; SSE2-NEXT: retq 549; 550; SSE42-LABEL: _clearupper4xi64b: 551; SSE42: # %bb.0: 552; SSE42-NEXT: xorps %xmm2, %xmm2 553; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 554; SSE42-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 555; SSE42-NEXT: retq 556; 557; AVX-LABEL: _clearupper4xi64b: 558; AVX: # %bb.0: 559; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 560; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 561; AVX-NEXT: retq 562 %x32 = bitcast <4 x i64> %0 to <8 x i32> 563 %r0 = insertelement <8 x i32> %x32, i32 zeroinitializer, i32 1 564 %r1 = insertelement <8 x i32> %r0, i32 zeroinitializer, i32 3 565 %r2 = insertelement <8 x i32> %r1, i32 zeroinitializer, i32 5 566 %r3 = insertelement <8 x i32> %r2, i32 zeroinitializer, i32 7 567 %r = bitcast <8 x i32> %r3 to <4 x i64> 568 ret <4 x i64> %r 569} 570 571define <4 x i32> @_clearupper4xi32b(<4 x i32>) nounwind { 572; SSE2-LABEL: _clearupper4xi32b: 573; SSE2: # %bb.0: 574; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 575; SSE2-NEXT: retq 576; 577; SSE42-LABEL: _clearupper4xi32b: 578; SSE42: # %bb.0: 579; SSE42-NEXT: pxor %xmm1, %xmm1 580; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 581; SSE42-NEXT: retq 582; 583; AVX-LABEL: _clearupper4xi32b: 584; AVX: # %bb.0: 585; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 586; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 587; AVX-NEXT: retq 588 %x16 = bitcast <4 x i32> %0 to <8 x i16> 589 %r0 = insertelement <8 x i16> %x16, i16 zeroinitializer, i32 1 590 %r1 = insertelement <8 x i16> %r0, i16 zeroinitializer, i32 3 591 %r2 = insertelement <8 x i16> %r1, i16 zeroinitializer, i32 5 592 %r3 = insertelement <8 x i16> %r2, i16 zeroinitializer, i32 7 593 %r = bitcast <8 x i16> %r3 to <4 x i32> 594 ret <4 x i32> %r 595} 596 597define <8 x i32> @_clearupper8xi32b(<8 x i32>) nounwind { 598; SSE2-LABEL: _clearupper8xi32b: 599; SSE2: # %bb.0: 600; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0] 601; SSE2-NEXT: andps %xmm2, %xmm0 602; SSE2-NEXT: andps %xmm2, %xmm1 603; SSE2-NEXT: retq 604; 605; SSE42-LABEL: _clearupper8xi32b: 606; SSE42: # %bb.0: 607; SSE42-NEXT: pxor %xmm2, %xmm2 608; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] 609; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] 610; SSE42-NEXT: retq 611; 612; AVX1-LABEL: _clearupper8xi32b: 613; AVX1: # %bb.0: 614; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 615; AVX1-NEXT: retq 616; 617; AVX2-LABEL: _clearupper8xi32b: 618; AVX2: # %bb.0: 619; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 620; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 621; AVX2-NEXT: retq 622 %x16 = bitcast <8 x i32> %0 to <16 x i16> 623 %r0 = insertelement <16 x i16> %x16, i16 zeroinitializer, i32 1 624 %r1 = insertelement <16 x i16> %r0, i16 zeroinitializer, i32 3 625 %r2 = insertelement <16 x i16> %r1, i16 zeroinitializer, i32 5 626 %r3 = insertelement <16 x i16> %r2, i16 zeroinitializer, i32 7 627 %r4 = insertelement <16 x i16> %r3, i16 zeroinitializer, i32 9 628 %r5 = insertelement <16 x i16> %r4, i16 zeroinitializer, i32 11 629 %r6 = insertelement <16 x i16> %r5, i16 zeroinitializer, i32 13 630 %r7 = insertelement <16 x i16> %r6, i16 zeroinitializer, i32 15 631 %r = bitcast <16 x i16> %r7 to <8 x i32> 632 ret <8 x i32> %r 633} 634 635define <8 x i16> @_clearupper8xi16b(<8 x i16>) nounwind { 636; SSE-LABEL: _clearupper8xi16b: 637; SSE: # %bb.0: 638; SSE-NEXT: andps {{.*}}(%rip), %xmm0 639; SSE-NEXT: retq 640; 641; AVX-LABEL: _clearupper8xi16b: 642; AVX: # %bb.0: 643; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 644; AVX-NEXT: retq 645 %x8 = bitcast <8 x i16> %0 to <16 x i8> 646 %r0 = insertelement <16 x i8> %x8, i8 zeroinitializer, i32 1 647 %r1 = insertelement <16 x i8> %r0, i8 zeroinitializer, i32 3 648 %r2 = insertelement <16 x i8> %r1, i8 zeroinitializer, i32 5 649 %r3 = insertelement <16 x i8> %r2, i8 zeroinitializer, i32 7 650 %r4 = insertelement <16 x i8> %r3, i8 zeroinitializer, i32 9 651 %r5 = insertelement <16 x i8> %r4, i8 zeroinitializer, i32 11 652 %r6 = insertelement <16 x i8> %r5, i8 zeroinitializer, i32 13 653 %r7 = insertelement <16 x i8> %r6, i8 zeroinitializer, i32 15 654 %r = bitcast <16 x i8> %r7 to <8 x i16> 655 ret <8 x i16> %r 656} 657 658define <16 x i16> @_clearupper16xi16b(<16 x i16>) nounwind { 659; SSE-LABEL: _clearupper16xi16b: 660; SSE: # %bb.0: 661; SSE-NEXT: movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 662; SSE-NEXT: andps %xmm2, %xmm0 663; SSE-NEXT: andps %xmm2, %xmm1 664; SSE-NEXT: retq 665; 666; AVX-LABEL: _clearupper16xi16b: 667; AVX: # %bb.0: 668; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 669; AVX-NEXT: vandps %xmm1, %xmm0, %xmm2 670; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 671; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 672; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 673; AVX-NEXT: retq 674 %x8 = bitcast <16 x i16> %0 to <32 x i8> 675 %r0 = insertelement <32 x i8> %x8, i8 zeroinitializer, i32 1 676 %r1 = insertelement <32 x i8> %r0, i8 zeroinitializer, i32 3 677 %r2 = insertelement <32 x i8> %r1, i8 zeroinitializer, i32 5 678 %r3 = insertelement <32 x i8> %r2, i8 zeroinitializer, i32 7 679 %r4 = insertelement <32 x i8> %r3, i8 zeroinitializer, i32 9 680 %r5 = insertelement <32 x i8> %r4, i8 zeroinitializer, i32 11 681 %r6 = insertelement <32 x i8> %r5, i8 zeroinitializer, i32 13 682 %r7 = insertelement <32 x i8> %r6, i8 zeroinitializer, i32 15 683 %r8 = insertelement <32 x i8> %r7, i8 zeroinitializer, i32 17 684 %r9 = insertelement <32 x i8> %r8, i8 zeroinitializer, i32 19 685 %r10 = insertelement <32 x i8> %r9, i8 zeroinitializer, i32 21 686 %r11 = insertelement <32 x i8> %r10, i8 zeroinitializer, i32 23 687 %r12 = insertelement <32 x i8> %r11, i8 zeroinitializer, i32 25 688 %r13 = insertelement <32 x i8> %r12, i8 zeroinitializer, i32 27 689 %r14 = insertelement <32 x i8> %r13, i8 zeroinitializer, i32 29 690 %r15 = insertelement <32 x i8> %r14, i8 zeroinitializer, i32 31 691 %r = bitcast <32 x i8> %r15 to <16 x i16> 692 ret <16 x i16> %r 693} 694 695define <16 x i8> @_clearupper16xi8b(<16 x i8>) nounwind { 696; SSE2-LABEL: _clearupper16xi8b: 697; SSE2: # %bb.0: 698; SSE2-NEXT: pushq %rbx 699; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 700; SSE2-NEXT: movq %xmm1, %r10 701; SSE2-NEXT: movq %r10, %r8 702; SSE2-NEXT: shrq $56, %r8 703; SSE2-NEXT: andl $15, %r8d 704; SSE2-NEXT: movq %r10, %r9 705; SSE2-NEXT: shrq $48, %r9 706; SSE2-NEXT: andl $15, %r9d 707; SSE2-NEXT: movq %r10, %rsi 708; SSE2-NEXT: shrq $40, %rsi 709; SSE2-NEXT: andl $15, %esi 710; SSE2-NEXT: movq %r10, %r11 711; SSE2-NEXT: shrq $32, %r11 712; SSE2-NEXT: andl $15, %r11d 713; SSE2-NEXT: movq %xmm0, %rax 714; SSE2-NEXT: movq %rax, %rdx 715; SSE2-NEXT: shrq $56, %rdx 716; SSE2-NEXT: andl $15, %edx 717; SSE2-NEXT: movq %rax, %rcx 718; SSE2-NEXT: shrq $48, %rcx 719; SSE2-NEXT: andl $15, %ecx 720; SSE2-NEXT: movq %rax, %rdi 721; SSE2-NEXT: shrq $40, %rdi 722; SSE2-NEXT: andl $15, %edi 723; SSE2-NEXT: movq %rax, %rbx 724; SSE2-NEXT: shrq $32, %rbx 725; SSE2-NEXT: andl $15, %ebx 726; SSE2-NEXT: shlq $32, %rbx 727; SSE2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 728; SSE2-NEXT: orq %rbx, %rax 729; SSE2-NEXT: shlq $40, %rdi 730; SSE2-NEXT: orq %rax, %rdi 731; SSE2-NEXT: shlq $48, %rcx 732; SSE2-NEXT: orq %rdi, %rcx 733; SSE2-NEXT: shlq $56, %rdx 734; SSE2-NEXT: orq %rcx, %rdx 735; SSE2-NEXT: shlq $32, %r11 736; SSE2-NEXT: andl $252645135, %r10d # imm = 0xF0F0F0F 737; SSE2-NEXT: orq %r11, %r10 738; SSE2-NEXT: shlq $40, %rsi 739; SSE2-NEXT: orq %r10, %rsi 740; SSE2-NEXT: shlq $48, %r9 741; SSE2-NEXT: orq %rsi, %r9 742; SSE2-NEXT: shlq $56, %r8 743; SSE2-NEXT: orq %r9, %r8 744; SSE2-NEXT: movq %rdx, %xmm0 745; SSE2-NEXT: movq %r8, %xmm1 746; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 747; SSE2-NEXT: popq %rbx 748; SSE2-NEXT: retq 749; 750; SSE42-LABEL: _clearupper16xi8b: 751; SSE42: # %bb.0: 752; SSE42-NEXT: pushq %rbx 753; SSE42-NEXT: pextrq $1, %xmm0, %r10 754; SSE42-NEXT: movq %r10, %r8 755; SSE42-NEXT: shrq $56, %r8 756; SSE42-NEXT: andl $15, %r8d 757; SSE42-NEXT: movq %r10, %r9 758; SSE42-NEXT: shrq $48, %r9 759; SSE42-NEXT: andl $15, %r9d 760; SSE42-NEXT: movq %r10, %rsi 761; SSE42-NEXT: shrq $40, %rsi 762; SSE42-NEXT: andl $15, %esi 763; SSE42-NEXT: movq %r10, %r11 764; SSE42-NEXT: shrq $32, %r11 765; SSE42-NEXT: andl $15, %r11d 766; SSE42-NEXT: movq %xmm0, %rax 767; SSE42-NEXT: movq %rax, %rdx 768; SSE42-NEXT: shrq $56, %rdx 769; SSE42-NEXT: andl $15, %edx 770; SSE42-NEXT: movq %rax, %rcx 771; SSE42-NEXT: shrq $48, %rcx 772; SSE42-NEXT: andl $15, %ecx 773; SSE42-NEXT: movq %rax, %rdi 774; SSE42-NEXT: shrq $40, %rdi 775; SSE42-NEXT: andl $15, %edi 776; SSE42-NEXT: movq %rax, %rbx 777; SSE42-NEXT: shrq $32, %rbx 778; SSE42-NEXT: andl $15, %ebx 779; SSE42-NEXT: shlq $32, %rbx 780; SSE42-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 781; SSE42-NEXT: orq %rbx, %rax 782; SSE42-NEXT: shlq $40, %rdi 783; SSE42-NEXT: orq %rax, %rdi 784; SSE42-NEXT: shlq $48, %rcx 785; SSE42-NEXT: orq %rdi, %rcx 786; SSE42-NEXT: shlq $56, %rdx 787; SSE42-NEXT: orq %rcx, %rdx 788; SSE42-NEXT: shlq $32, %r11 789; SSE42-NEXT: andl $252645135, %r10d # imm = 0xF0F0F0F 790; SSE42-NEXT: orq %r11, %r10 791; SSE42-NEXT: shlq $40, %rsi 792; SSE42-NEXT: orq %r10, %rsi 793; SSE42-NEXT: shlq $48, %r9 794; SSE42-NEXT: orq %rsi, %r9 795; SSE42-NEXT: shlq $56, %r8 796; SSE42-NEXT: orq %r9, %r8 797; SSE42-NEXT: movq %r8, %xmm1 798; SSE42-NEXT: movq %rdx, %xmm0 799; SSE42-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 800; SSE42-NEXT: popq %rbx 801; SSE42-NEXT: retq 802; 803; AVX-LABEL: _clearupper16xi8b: 804; AVX: # %bb.0: 805; AVX-NEXT: pushq %rbx 806; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 807; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %r9 808; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rdx 809; AVX-NEXT: movq %r9, %r8 810; AVX-NEXT: shrq $56, %r8 811; AVX-NEXT: andl $15, %r8d 812; AVX-NEXT: movq %r9, %r10 813; AVX-NEXT: shrq $48, %r10 814; AVX-NEXT: andl $15, %r10d 815; AVX-NEXT: movq %r9, %rsi 816; AVX-NEXT: shrq $40, %rsi 817; AVX-NEXT: andl $15, %esi 818; AVX-NEXT: movq %r9, %r11 819; AVX-NEXT: shrq $32, %r11 820; AVX-NEXT: andl $15, %r11d 821; AVX-NEXT: movq %rdx, %rdi 822; AVX-NEXT: shrq $56, %rdi 823; AVX-NEXT: andl $15, %edi 824; AVX-NEXT: movq %rdx, %rax 825; AVX-NEXT: shrq $48, %rax 826; AVX-NEXT: andl $15, %eax 827; AVX-NEXT: movq %rdx, %rcx 828; AVX-NEXT: shrq $40, %rcx 829; AVX-NEXT: andl $15, %ecx 830; AVX-NEXT: movq %rdx, %rbx 831; AVX-NEXT: shrq $32, %rbx 832; AVX-NEXT: andl $15, %ebx 833; AVX-NEXT: shlq $32, %rbx 834; AVX-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F 835; AVX-NEXT: orq %rbx, %rdx 836; AVX-NEXT: shlq $40, %rcx 837; AVX-NEXT: orq %rdx, %rcx 838; AVX-NEXT: shlq $48, %rax 839; AVX-NEXT: orq %rcx, %rax 840; AVX-NEXT: shlq $56, %rdi 841; AVX-NEXT: orq %rax, %rdi 842; AVX-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) 843; AVX-NEXT: shlq $32, %r11 844; AVX-NEXT: andl $252645135, %r9d # imm = 0xF0F0F0F 845; AVX-NEXT: orq %r11, %r9 846; AVX-NEXT: shlq $40, %rsi 847; AVX-NEXT: orq %r9, %rsi 848; AVX-NEXT: shlq $48, %r10 849; AVX-NEXT: orq %rsi, %r10 850; AVX-NEXT: shlq $56, %r8 851; AVX-NEXT: orq %r10, %r8 852; AVX-NEXT: movq %r8, -{{[0-9]+}}(%rsp) 853; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 854; AVX-NEXT: popq %rbx 855; AVX-NEXT: retq 856 %x4 = bitcast <16 x i8> %0 to <32 x i4> 857 %r0 = insertelement <32 x i4> %x4, i4 zeroinitializer, i32 1 858 %r1 = insertelement <32 x i4> %r0, i4 zeroinitializer, i32 3 859 %r2 = insertelement <32 x i4> %r1, i4 zeroinitializer, i32 5 860 %r3 = insertelement <32 x i4> %r2, i4 zeroinitializer, i32 7 861 %r4 = insertelement <32 x i4> %r3, i4 zeroinitializer, i32 9 862 %r5 = insertelement <32 x i4> %r4, i4 zeroinitializer, i32 11 863 %r6 = insertelement <32 x i4> %r5, i4 zeroinitializer, i32 13 864 %r7 = insertelement <32 x i4> %r6, i4 zeroinitializer, i32 15 865 %r8 = insertelement <32 x i4> %r7, i4 zeroinitializer, i32 17 866 %r9 = insertelement <32 x i4> %r8, i4 zeroinitializer, i32 19 867 %r10 = insertelement <32 x i4> %r9, i4 zeroinitializer, i32 21 868 %r11 = insertelement <32 x i4> %r10, i4 zeroinitializer, i32 23 869 %r12 = insertelement <32 x i4> %r11, i4 zeroinitializer, i32 25 870 %r13 = insertelement <32 x i4> %r12, i4 zeroinitializer, i32 27 871 %r14 = insertelement <32 x i4> %r13, i4 zeroinitializer, i32 29 872 %r15 = insertelement <32 x i4> %r14, i4 zeroinitializer, i32 31 873 %r = bitcast <32 x i4> %r15 to <16 x i8> 874 ret <16 x i8> %r 875} 876 877define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind { 878; SSE2-LABEL: _clearupper32xi8b: 879; SSE2: # %bb.0: 880; SSE2-NEXT: pushq %rbx 881; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 882; SSE2-NEXT: movq %xmm2, %r10 883; SSE2-NEXT: movq %r10, %r8 884; SSE2-NEXT: shrq $56, %r8 885; SSE2-NEXT: andl $15, %r8d 886; SSE2-NEXT: movq %r10, %r9 887; SSE2-NEXT: shrq $48, %r9 888; SSE2-NEXT: andl $15, %r9d 889; SSE2-NEXT: movq %r10, %rsi 890; SSE2-NEXT: shrq $40, %rsi 891; SSE2-NEXT: andl $15, %esi 892; SSE2-NEXT: movq %r10, %r11 893; SSE2-NEXT: shrq $32, %r11 894; SSE2-NEXT: andl $15, %r11d 895; SSE2-NEXT: movq %xmm0, %rax 896; SSE2-NEXT: movq %rax, %rdx 897; SSE2-NEXT: shrq $56, %rdx 898; SSE2-NEXT: andl $15, %edx 899; SSE2-NEXT: movq %rax, %rcx 900; SSE2-NEXT: shrq $48, %rcx 901; SSE2-NEXT: andl $15, %ecx 902; SSE2-NEXT: movq %rax, %rdi 903; SSE2-NEXT: shrq $40, %rdi 904; SSE2-NEXT: andl $15, %edi 905; SSE2-NEXT: movq %rax, %rbx 906; SSE2-NEXT: shrq $32, %rbx 907; SSE2-NEXT: andl $15, %ebx 908; SSE2-NEXT: shlq $32, %rbx 909; SSE2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 910; SSE2-NEXT: orq %rbx, %rax 911; SSE2-NEXT: shlq $40, %rdi 912; SSE2-NEXT: orq %rax, %rdi 913; SSE2-NEXT: shlq $48, %rcx 914; SSE2-NEXT: orq %rdi, %rcx 915; SSE2-NEXT: shlq $56, %rdx 916; SSE2-NEXT: orq %rcx, %rdx 917; SSE2-NEXT: shlq $32, %r11 918; SSE2-NEXT: andl $252645135, %r10d # imm = 0xF0F0F0F 919; SSE2-NEXT: orq %r11, %r10 920; SSE2-NEXT: shlq $40, %rsi 921; SSE2-NEXT: orq %r10, %rsi 922; SSE2-NEXT: shlq $48, %r9 923; SSE2-NEXT: orq %rsi, %r9 924; SSE2-NEXT: shlq $56, %r8 925; SSE2-NEXT: orq %r9, %r8 926; SSE2-NEXT: movq %rdx, %xmm0 927; SSE2-NEXT: movq %r8, %xmm2 928; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 929; SSE2-NEXT: popq %rbx 930; SSE2-NEXT: retq 931; 932; SSE42-LABEL: _clearupper32xi8b: 933; SSE42: # %bb.0: 934; SSE42-NEXT: pushq %rbx 935; SSE42-NEXT: pextrq $1, %xmm0, %r10 936; SSE42-NEXT: movq %r10, %r8 937; SSE42-NEXT: shrq $56, %r8 938; SSE42-NEXT: andl $15, %r8d 939; SSE42-NEXT: movq %r10, %r9 940; SSE42-NEXT: shrq $48, %r9 941; SSE42-NEXT: andl $15, %r9d 942; SSE42-NEXT: movq %r10, %rsi 943; SSE42-NEXT: shrq $40, %rsi 944; SSE42-NEXT: andl $15, %esi 945; SSE42-NEXT: movq %r10, %r11 946; SSE42-NEXT: shrq $32, %r11 947; SSE42-NEXT: andl $15, %r11d 948; SSE42-NEXT: movq %xmm0, %rax 949; SSE42-NEXT: movq %rax, %rdx 950; SSE42-NEXT: shrq $56, %rdx 951; SSE42-NEXT: andl $15, %edx 952; SSE42-NEXT: movq %rax, %rcx 953; SSE42-NEXT: shrq $48, %rcx 954; SSE42-NEXT: andl $15, %ecx 955; SSE42-NEXT: movq %rax, %rdi 956; SSE42-NEXT: shrq $40, %rdi 957; SSE42-NEXT: andl $15, %edi 958; SSE42-NEXT: movq %rax, %rbx 959; SSE42-NEXT: shrq $32, %rbx 960; SSE42-NEXT: andl $15, %ebx 961; SSE42-NEXT: shlq $32, %rbx 962; SSE42-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 963; SSE42-NEXT: orq %rbx, %rax 964; SSE42-NEXT: shlq $40, %rdi 965; SSE42-NEXT: orq %rax, %rdi 966; SSE42-NEXT: shlq $48, %rcx 967; SSE42-NEXT: orq %rdi, %rcx 968; SSE42-NEXT: shlq $56, %rdx 969; SSE42-NEXT: orq %rcx, %rdx 970; SSE42-NEXT: shlq $32, %r11 971; SSE42-NEXT: andl $252645135, %r10d # imm = 0xF0F0F0F 972; SSE42-NEXT: orq %r11, %r10 973; SSE42-NEXT: shlq $40, %rsi 974; SSE42-NEXT: orq %r10, %rsi 975; SSE42-NEXT: shlq $48, %r9 976; SSE42-NEXT: orq %rsi, %r9 977; SSE42-NEXT: shlq $56, %r8 978; SSE42-NEXT: orq %r9, %r8 979; SSE42-NEXT: movq %r8, %xmm2 980; SSE42-NEXT: movq %rdx, %xmm0 981; SSE42-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 982; SSE42-NEXT: popq %rbx 983; SSE42-NEXT: retq 984; 985; AVX1-LABEL: _clearupper32xi8b: 986; AVX1: # %bb.0: 987; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 988; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %r9 989; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 990; AVX1-NEXT: movq %r9, %r8 991; AVX1-NEXT: shrq $56, %r8 992; AVX1-NEXT: andl $15, %r8d 993; AVX1-NEXT: movq %rcx, %rsi 994; AVX1-NEXT: movq %rcx, %rdi 995; AVX1-NEXT: movq %rcx, %rdx 996; AVX1-NEXT: movq %rcx, %rax 997; AVX1-NEXT: shrq $32, %rax 998; AVX1-NEXT: andl $15, %eax 999; AVX1-NEXT: shlq $32, %rax 1000; AVX1-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 1001; AVX1-NEXT: orq %rax, %rcx 1002; AVX1-NEXT: movq %r9, %rax 1003; AVX1-NEXT: shrq $48, %rax 1004; AVX1-NEXT: andl $15, %eax 1005; AVX1-NEXT: shrq $40, %rdx 1006; AVX1-NEXT: andl $15, %edx 1007; AVX1-NEXT: shlq $40, %rdx 1008; AVX1-NEXT: orq %rcx, %rdx 1009; AVX1-NEXT: movq %r9, %rcx 1010; AVX1-NEXT: shrq $40, %rcx 1011; AVX1-NEXT: andl $15, %ecx 1012; AVX1-NEXT: shrq $48, %rdi 1013; AVX1-NEXT: andl $15, %edi 1014; AVX1-NEXT: shlq $48, %rdi 1015; AVX1-NEXT: orq %rdx, %rdi 1016; AVX1-NEXT: movq %r9, %rdx 1017; AVX1-NEXT: shrq $32, %rdx 1018; AVX1-NEXT: andl $15, %edx 1019; AVX1-NEXT: shrq $56, %rsi 1020; AVX1-NEXT: andl $15, %esi 1021; AVX1-NEXT: shlq $56, %rsi 1022; AVX1-NEXT: orq %rdi, %rsi 1023; AVX1-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) 1024; AVX1-NEXT: shlq $32, %rdx 1025; AVX1-NEXT: andl $252645135, %r9d # imm = 0xF0F0F0F 1026; AVX1-NEXT: orq %rdx, %r9 1027; AVX1-NEXT: shlq $40, %rcx 1028; AVX1-NEXT: orq %r9, %rcx 1029; AVX1-NEXT: shlq $48, %rax 1030; AVX1-NEXT: orq %rcx, %rax 1031; AVX1-NEXT: shlq $56, %r8 1032; AVX1-NEXT: orq %rax, %r8 1033; AVX1-NEXT: movq %r8, -{{[0-9]+}}(%rsp) 1034; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1035; AVX1-NEXT: vmovq %xmm0, %rax 1036; AVX1-NEXT: movq %rax, %r8 1037; AVX1-NEXT: movq %rax, %r9 1038; AVX1-NEXT: movq %rax, %rsi 1039; AVX1-NEXT: movq %rax, %rdi 1040; AVX1-NEXT: movl %eax, %ecx 1041; AVX1-NEXT: movl %eax, %edx 1042; AVX1-NEXT: vmovd %eax, %xmm1 1043; AVX1-NEXT: shrl $8, %eax 1044; AVX1-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 1045; AVX1-NEXT: shrl $16, %edx 1046; AVX1-NEXT: vpinsrb $2, %edx, %xmm1, %xmm1 1047; AVX1-NEXT: shrl $24, %ecx 1048; AVX1-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1 1049; AVX1-NEXT: shrq $32, %rdi 1050; AVX1-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 1051; AVX1-NEXT: shrq $40, %rsi 1052; AVX1-NEXT: vpinsrb $5, %esi, %xmm1, %xmm1 1053; AVX1-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm2 1054; AVX1-NEXT: shrq $48, %r9 1055; AVX1-NEXT: vpinsrb $6, %r9d, %xmm1, %xmm1 1056; AVX1-NEXT: vpextrq $1, %xmm0, %rax 1057; AVX1-NEXT: shrq $56, %r8 1058; AVX1-NEXT: vpinsrb $7, %r8d, %xmm1, %xmm0 1059; AVX1-NEXT: movl %eax, %ecx 1060; AVX1-NEXT: shrl $8, %ecx 1061; AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 1062; AVX1-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 1063; AVX1-NEXT: movl %eax, %ecx 1064; AVX1-NEXT: shrl $16, %ecx 1065; AVX1-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 1066; AVX1-NEXT: movl %eax, %ecx 1067; AVX1-NEXT: shrl $24, %ecx 1068; AVX1-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 1069; AVX1-NEXT: movq %rax, %rcx 1070; AVX1-NEXT: shrq $32, %rcx 1071; AVX1-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 1072; AVX1-NEXT: movq %rax, %rcx 1073; AVX1-NEXT: shrq $40, %rcx 1074; AVX1-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 1075; AVX1-NEXT: movq %rax, %rcx 1076; AVX1-NEXT: shrq $48, %rcx 1077; AVX1-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 1078; AVX1-NEXT: vmovq %xmm2, %rcx 1079; AVX1-NEXT: shrq $56, %rax 1080; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 1081; AVX1-NEXT: movl %ecx, %eax 1082; AVX1-NEXT: shrl $8, %eax 1083; AVX1-NEXT: vmovd %ecx, %xmm1 1084; AVX1-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 1085; AVX1-NEXT: movl %ecx, %eax 1086; AVX1-NEXT: shrl $16, %eax 1087; AVX1-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 1088; AVX1-NEXT: movl %ecx, %eax 1089; AVX1-NEXT: shrl $24, %eax 1090; AVX1-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 1091; AVX1-NEXT: movq %rcx, %rax 1092; AVX1-NEXT: shrq $32, %rax 1093; AVX1-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 1094; AVX1-NEXT: movq %rcx, %rax 1095; AVX1-NEXT: shrq $40, %rax 1096; AVX1-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 1097; AVX1-NEXT: movq %rcx, %rax 1098; AVX1-NEXT: shrq $48, %rax 1099; AVX1-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 1100; AVX1-NEXT: vpextrq $1, %xmm2, %rax 1101; AVX1-NEXT: shrq $56, %rcx 1102; AVX1-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1 1103; AVX1-NEXT: movl %eax, %ecx 1104; AVX1-NEXT: shrl $8, %ecx 1105; AVX1-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 1106; AVX1-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1 1107; AVX1-NEXT: movl %eax, %ecx 1108; AVX1-NEXT: shrl $16, %ecx 1109; AVX1-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1 1110; AVX1-NEXT: movl %eax, %ecx 1111; AVX1-NEXT: shrl $24, %ecx 1112; AVX1-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1 1113; AVX1-NEXT: movq %rax, %rcx 1114; AVX1-NEXT: shrq $32, %rcx 1115; AVX1-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1 1116; AVX1-NEXT: movq %rax, %rcx 1117; AVX1-NEXT: shrq $40, %rcx 1118; AVX1-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1 1119; AVX1-NEXT: movq %rax, %rcx 1120; AVX1-NEXT: shrq $48, %rcx 1121; AVX1-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1 1122; AVX1-NEXT: shrq $56, %rax 1123; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 1124; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1125; AVX1-NEXT: retq 1126; 1127; AVX2-LABEL: _clearupper32xi8b: 1128; AVX2: # %bb.0: 1129; AVX2-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) 1130; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %r9 1131; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 1132; AVX2-NEXT: movq %r9, %r8 1133; AVX2-NEXT: shrq $56, %r8 1134; AVX2-NEXT: andl $15, %r8d 1135; AVX2-NEXT: movq %rcx, %rsi 1136; AVX2-NEXT: movq %rcx, %rdi 1137; AVX2-NEXT: movq %rcx, %rdx 1138; AVX2-NEXT: movq %rcx, %rax 1139; AVX2-NEXT: shrq $32, %rax 1140; AVX2-NEXT: andl $15, %eax 1141; AVX2-NEXT: shlq $32, %rax 1142; AVX2-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 1143; AVX2-NEXT: orq %rax, %rcx 1144; AVX2-NEXT: movq %r9, %rax 1145; AVX2-NEXT: shrq $48, %rax 1146; AVX2-NEXT: andl $15, %eax 1147; AVX2-NEXT: shrq $40, %rdx 1148; AVX2-NEXT: andl $15, %edx 1149; AVX2-NEXT: shlq $40, %rdx 1150; AVX2-NEXT: orq %rcx, %rdx 1151; AVX2-NEXT: movq %r9, %rcx 1152; AVX2-NEXT: shrq $40, %rcx 1153; AVX2-NEXT: andl $15, %ecx 1154; AVX2-NEXT: shrq $48, %rdi 1155; AVX2-NEXT: andl $15, %edi 1156; AVX2-NEXT: shlq $48, %rdi 1157; AVX2-NEXT: orq %rdx, %rdi 1158; AVX2-NEXT: movq %r9, %rdx 1159; AVX2-NEXT: shrq $32, %rdx 1160; AVX2-NEXT: andl $15, %edx 1161; AVX2-NEXT: shrq $56, %rsi 1162; AVX2-NEXT: andl $15, %esi 1163; AVX2-NEXT: shlq $56, %rsi 1164; AVX2-NEXT: orq %rdi, %rsi 1165; AVX2-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) 1166; AVX2-NEXT: shlq $32, %rdx 1167; AVX2-NEXT: andl $252645135, %r9d # imm = 0xF0F0F0F 1168; AVX2-NEXT: orq %rdx, %r9 1169; AVX2-NEXT: shlq $40, %rcx 1170; AVX2-NEXT: orq %r9, %rcx 1171; AVX2-NEXT: shlq $48, %rax 1172; AVX2-NEXT: orq %rcx, %rax 1173; AVX2-NEXT: shlq $56, %r8 1174; AVX2-NEXT: orq %rax, %r8 1175; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp) 1176; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1177; AVX2-NEXT: vmovq %xmm0, %rax 1178; AVX2-NEXT: movq %rax, %r8 1179; AVX2-NEXT: movq %rax, %r9 1180; AVX2-NEXT: movq %rax, %rsi 1181; AVX2-NEXT: movq %rax, %rdi 1182; AVX2-NEXT: movl %eax, %ecx 1183; AVX2-NEXT: movl %eax, %edx 1184; AVX2-NEXT: vmovd %eax, %xmm1 1185; AVX2-NEXT: shrl $8, %eax 1186; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 1187; AVX2-NEXT: shrl $16, %edx 1188; AVX2-NEXT: vpinsrb $2, %edx, %xmm1, %xmm1 1189; AVX2-NEXT: shrl $24, %ecx 1190; AVX2-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1 1191; AVX2-NEXT: shrq $32, %rdi 1192; AVX2-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 1193; AVX2-NEXT: shrq $40, %rsi 1194; AVX2-NEXT: vpinsrb $5, %esi, %xmm1, %xmm1 1195; AVX2-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm2 1196; AVX2-NEXT: shrq $48, %r9 1197; AVX2-NEXT: vpinsrb $6, %r9d, %xmm1, %xmm1 1198; AVX2-NEXT: vpextrq $1, %xmm0, %rax 1199; AVX2-NEXT: shrq $56, %r8 1200; AVX2-NEXT: vpinsrb $7, %r8d, %xmm1, %xmm0 1201; AVX2-NEXT: movl %eax, %ecx 1202; AVX2-NEXT: shrl $8, %ecx 1203; AVX2-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 1204; AVX2-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 1205; AVX2-NEXT: movl %eax, %ecx 1206; AVX2-NEXT: shrl $16, %ecx 1207; AVX2-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 1208; AVX2-NEXT: movl %eax, %ecx 1209; AVX2-NEXT: shrl $24, %ecx 1210; AVX2-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 1211; AVX2-NEXT: movq %rax, %rcx 1212; AVX2-NEXT: shrq $32, %rcx 1213; AVX2-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 1214; AVX2-NEXT: movq %rax, %rcx 1215; AVX2-NEXT: shrq $40, %rcx 1216; AVX2-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 1217; AVX2-NEXT: movq %rax, %rcx 1218; AVX2-NEXT: shrq $48, %rcx 1219; AVX2-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 1220; AVX2-NEXT: vmovq %xmm2, %rcx 1221; AVX2-NEXT: shrq $56, %rax 1222; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 1223; AVX2-NEXT: movl %ecx, %eax 1224; AVX2-NEXT: shrl $8, %eax 1225; AVX2-NEXT: vmovd %ecx, %xmm1 1226; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 1227; AVX2-NEXT: movl %ecx, %eax 1228; AVX2-NEXT: shrl $16, %eax 1229; AVX2-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 1230; AVX2-NEXT: movl %ecx, %eax 1231; AVX2-NEXT: shrl $24, %eax 1232; AVX2-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 1233; AVX2-NEXT: movq %rcx, %rax 1234; AVX2-NEXT: shrq $32, %rax 1235; AVX2-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 1236; AVX2-NEXT: movq %rcx, %rax 1237; AVX2-NEXT: shrq $40, %rax 1238; AVX2-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 1239; AVX2-NEXT: movq %rcx, %rax 1240; AVX2-NEXT: shrq $48, %rax 1241; AVX2-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 1242; AVX2-NEXT: vpextrq $1, %xmm2, %rax 1243; AVX2-NEXT: shrq $56, %rcx 1244; AVX2-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1 1245; AVX2-NEXT: movl %eax, %ecx 1246; AVX2-NEXT: shrl $8, %ecx 1247; AVX2-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 1248; AVX2-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1 1249; AVX2-NEXT: movl %eax, %ecx 1250; AVX2-NEXT: shrl $16, %ecx 1251; AVX2-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1 1252; AVX2-NEXT: movl %eax, %ecx 1253; AVX2-NEXT: shrl $24, %ecx 1254; AVX2-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1 1255; AVX2-NEXT: movq %rax, %rcx 1256; AVX2-NEXT: shrq $32, %rcx 1257; AVX2-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1 1258; AVX2-NEXT: movq %rax, %rcx 1259; AVX2-NEXT: shrq $40, %rcx 1260; AVX2-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1 1261; AVX2-NEXT: movq %rax, %rcx 1262; AVX2-NEXT: shrq $48, %rcx 1263; AVX2-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1 1264; AVX2-NEXT: shrq $56, %rax 1265; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 1266; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1267; AVX2-NEXT: retq 1268 %x4 = bitcast <32 x i8> %0 to <64 x i4> 1269 %r0 = insertelement <64 x i4> %x4, i4 zeroinitializer, i32 1 1270 %r1 = insertelement <64 x i4> %r0, i4 zeroinitializer, i32 3 1271 %r2 = insertelement <64 x i4> %r1, i4 zeroinitializer, i32 5 1272 %r3 = insertelement <64 x i4> %r2, i4 zeroinitializer, i32 7 1273 %r4 = insertelement <64 x i4> %r3, i4 zeroinitializer, i32 9 1274 %r5 = insertelement <64 x i4> %r4, i4 zeroinitializer, i32 11 1275 %r6 = insertelement <64 x i4> %r5, i4 zeroinitializer, i32 13 1276 %r7 = insertelement <64 x i4> %r6, i4 zeroinitializer, i32 15 1277 %r8 = insertelement <64 x i4> %r7, i4 zeroinitializer, i32 17 1278 %r9 = insertelement <64 x i4> %r8, i4 zeroinitializer, i32 19 1279 %r10 = insertelement <64 x i4> %r9, i4 zeroinitializer, i32 21 1280 %r11 = insertelement <64 x i4> %r10, i4 zeroinitializer, i32 23 1281 %r12 = insertelement <64 x i4> %r11, i4 zeroinitializer, i32 25 1282 %r13 = insertelement <64 x i4> %r12, i4 zeroinitializer, i32 27 1283 %r14 = insertelement <64 x i4> %r13, i4 zeroinitializer, i32 29 1284 %r15 = insertelement <64 x i4> %r14, i4 zeroinitializer, i32 31 1285 %r16 = insertelement <64 x i4> %r15, i4 zeroinitializer, i32 33 1286 %r17 = insertelement <64 x i4> %r16, i4 zeroinitializer, i32 35 1287 %r18 = insertelement <64 x i4> %r17, i4 zeroinitializer, i32 37 1288 %r19 = insertelement <64 x i4> %r18, i4 zeroinitializer, i32 39 1289 %r20 = insertelement <64 x i4> %r19, i4 zeroinitializer, i32 41 1290 %r21 = insertelement <64 x i4> %r20, i4 zeroinitializer, i32 43 1291 %r22 = insertelement <64 x i4> %r21, i4 zeroinitializer, i32 45 1292 %r23 = insertelement <64 x i4> %r22, i4 zeroinitializer, i32 47 1293 %r24 = insertelement <64 x i4> %r23, i4 zeroinitializer, i32 49 1294 %r25 = insertelement <64 x i4> %r24, i4 zeroinitializer, i32 51 1295 %r26 = insertelement <64 x i4> %r25, i4 zeroinitializer, i32 53 1296 %r27 = insertelement <64 x i4> %r26, i4 zeroinitializer, i32 55 1297 %r28 = insertelement <64 x i4> %r27, i4 zeroinitializer, i32 57 1298 %r29 = insertelement <64 x i4> %r28, i4 zeroinitializer, i32 59 1299 %r30 = insertelement <64 x i4> %r29, i4 zeroinitializer, i32 61 1300 %r31 = insertelement <64 x i4> %r30, i4 zeroinitializer, i32 63 1301 %r = bitcast <64 x i4> %r15 to <32 x i8> 1302 ret <32 x i8> %r 1303} 1304 1305define <2 x i64> @_clearupper2xi64c(<2 x i64>) nounwind { 1306; SSE2-LABEL: _clearupper2xi64c: 1307; SSE2: # %bb.0: 1308; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 1309; SSE2-NEXT: retq 1310; 1311; SSE42-LABEL: _clearupper2xi64c: 1312; SSE42: # %bb.0: 1313; SSE42-NEXT: xorps %xmm1, %xmm1 1314; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 1315; SSE42-NEXT: retq 1316; 1317; AVX-LABEL: _clearupper2xi64c: 1318; AVX: # %bb.0: 1319; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 1320; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 1321; AVX-NEXT: retq 1322 %r = and <2 x i64> <i64 4294967295, i64 4294967295>, %0 1323 ret <2 x i64> %r 1324} 1325 1326define <4 x i64> @_clearupper4xi64c(<4 x i64>) nounwind { 1327; SSE2-LABEL: _clearupper4xi64c: 1328; SSE2: # %bb.0: 1329; SSE2-NEXT: movaps {{.*#+}} xmm2 = [4294967295,0,4294967295,0] 1330; SSE2-NEXT: andps %xmm2, %xmm0 1331; SSE2-NEXT: andps %xmm2, %xmm1 1332; SSE2-NEXT: retq 1333; 1334; SSE42-LABEL: _clearupper4xi64c: 1335; SSE42: # %bb.0: 1336; SSE42-NEXT: xorps %xmm2, %xmm2 1337; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 1338; SSE42-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 1339; SSE42-NEXT: retq 1340; 1341; AVX-LABEL: _clearupper4xi64c: 1342; AVX: # %bb.0: 1343; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 1344; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1345; AVX-NEXT: retq 1346 %r = and <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>, %0 1347 ret <4 x i64> %r 1348} 1349 1350define <4 x i32> @_clearupper4xi32c(<4 x i32>) nounwind { 1351; SSE2-LABEL: _clearupper4xi32c: 1352; SSE2: # %bb.0: 1353; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 1354; SSE2-NEXT: retq 1355; 1356; SSE42-LABEL: _clearupper4xi32c: 1357; SSE42: # %bb.0: 1358; SSE42-NEXT: pxor %xmm1, %xmm1 1359; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 1360; SSE42-NEXT: retq 1361; 1362; AVX-LABEL: _clearupper4xi32c: 1363; AVX: # %bb.0: 1364; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 1365; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 1366; AVX-NEXT: retq 1367 %r = and <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>, %0 1368 ret <4 x i32> %r 1369} 1370 1371define <8 x i32> @_clearupper8xi32c(<8 x i32>) nounwind { 1372; SSE2-LABEL: _clearupper8xi32c: 1373; SSE2: # %bb.0: 1374; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0] 1375; SSE2-NEXT: andps %xmm2, %xmm0 1376; SSE2-NEXT: andps %xmm2, %xmm1 1377; SSE2-NEXT: retq 1378; 1379; SSE42-LABEL: _clearupper8xi32c: 1380; SSE42: # %bb.0: 1381; SSE42-NEXT: pxor %xmm2, %xmm2 1382; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] 1383; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] 1384; SSE42-NEXT: retq 1385; 1386; AVX1-LABEL: _clearupper8xi32c: 1387; AVX1: # %bb.0: 1388; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 1389; AVX1-NEXT: retq 1390; 1391; AVX2-LABEL: _clearupper8xi32c: 1392; AVX2: # %bb.0: 1393; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1394; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 1395; AVX2-NEXT: retq 1396 %r = and <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>, %0 1397 ret <8 x i32> %r 1398} 1399 1400define <8 x i16> @_clearupper8xi16c(<8 x i16>) nounwind { 1401; SSE-LABEL: _clearupper8xi16c: 1402; SSE: # %bb.0: 1403; SSE-NEXT: andps {{.*}}(%rip), %xmm0 1404; SSE-NEXT: retq 1405; 1406; AVX-LABEL: _clearupper8xi16c: 1407; AVX: # %bb.0: 1408; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 1409; AVX-NEXT: retq 1410 %r = and <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>, %0 1411 ret <8 x i16> %r 1412} 1413 1414define <16 x i16> @_clearupper16xi16c(<16 x i16>) nounwind { 1415; SSE-LABEL: _clearupper16xi16c: 1416; SSE: # %bb.0: 1417; SSE-NEXT: movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 1418; SSE-NEXT: andps %xmm2, %xmm0 1419; SSE-NEXT: andps %xmm2, %xmm1 1420; SSE-NEXT: retq 1421; 1422; AVX-LABEL: _clearupper16xi16c: 1423; AVX: # %bb.0: 1424; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 1425; AVX-NEXT: retq 1426 %r = and <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>, %0 1427 ret <16 x i16> %r 1428} 1429 1430define <16 x i8> @_clearupper16xi8c(<16 x i8>) nounwind { 1431; SSE-LABEL: _clearupper16xi8c: 1432; SSE: # %bb.0: 1433; SSE-NEXT: andps {{.*}}(%rip), %xmm0 1434; SSE-NEXT: retq 1435; 1436; AVX-LABEL: _clearupper16xi8c: 1437; AVX: # %bb.0: 1438; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 1439; AVX-NEXT: retq 1440 %r = and <16 x i8> <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>, %0 1441 ret <16 x i8> %r 1442} 1443 1444define <32 x i8> @_clearupper32xi8c(<32 x i8>) nounwind { 1445; SSE-LABEL: _clearupper32xi8c: 1446; SSE: # %bb.0: 1447; SSE-NEXT: movaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1448; SSE-NEXT: andps %xmm2, %xmm0 1449; SSE-NEXT: andps %xmm2, %xmm1 1450; SSE-NEXT: retq 1451; 1452; AVX-LABEL: _clearupper32xi8c: 1453; AVX: # %bb.0: 1454; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 1455; AVX-NEXT: retq 1456 %r = and <32 x i8> <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>, %0 1457 ret <32 x i8> %r 1458} 1459