1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE42 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 6 7; 8; PR6455 'Clear Upper Bits' Patterns 9; 10 11define <2 x i64> @_clearupper2xi64a(<2 x i64>) nounwind { 12; SSE2-LABEL: _clearupper2xi64a: 13; SSE2: # %bb.0: 14; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 15; SSE2-NEXT: retq 16; 17; SSE42-LABEL: _clearupper2xi64a: 18; SSE42: # %bb.0: 19; SSE42-NEXT: xorps %xmm1, %xmm1 20; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 21; SSE42-NEXT: retq 22; 23; AVX-LABEL: _clearupper2xi64a: 24; AVX: # %bb.0: 25; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 26; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 27; AVX-NEXT: retq 28 %x0 = extractelement <2 x i64> %0, i32 0 29 %x1 = extractelement <2 x i64> %0, i32 1 30 %trunc0 = trunc i64 %x0 to i32 31 %trunc1 = trunc i64 %x1 to i32 32 %ext0 = zext i32 %trunc0 to i64 33 %ext1 = zext i32 %trunc1 to i64 34 %v0 = insertelement <2 x i64> undef, i64 %ext0, i32 0 35 %v1 = insertelement <2 x i64> %v0, i64 %ext1, i32 1 36 ret <2 x i64> %v1 37} 38 39define <4 x i64> @_clearupper4xi64a(<4 x i64>) nounwind { 40; SSE2-LABEL: _clearupper4xi64a: 41; SSE2: # %bb.0: 42; SSE2-NEXT: movaps {{.*#+}} xmm2 = [4294967295,4294967295] 43; SSE2-NEXT: andps %xmm2, %xmm0 44; SSE2-NEXT: andps %xmm2, %xmm1 45; SSE2-NEXT: retq 46; 47; SSE42-LABEL: _clearupper4xi64a: 48; SSE42: # %bb.0: 49; SSE42-NEXT: xorps %xmm2, %xmm2 50; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 51; SSE42-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 52; SSE42-NEXT: retq 53; 54; AVX-LABEL: _clearupper4xi64a: 55; AVX: # %bb.0: 56; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 57; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 58; AVX-NEXT: retq 59 %x0 = extractelement <4 x i64> %0, i32 0 60 %x1 = extractelement <4 x i64> %0, i32 1 61 %x2 = extractelement <4 x i64> %0, i32 2 62 %x3 = extractelement <4 x i64> %0, i32 3 63 %trunc0 = trunc i64 %x0 to i32 64 %trunc1 = trunc i64 %x1 to i32 65 %trunc2 = trunc i64 %x2 to i32 66 %trunc3 = trunc i64 %x3 to i32 67 %ext0 = zext i32 %trunc0 to i64 68 %ext1 = zext i32 %trunc1 to i64 69 %ext2 = zext i32 %trunc2 to i64 70 %ext3 = zext i32 %trunc3 to i64 71 %v0 = insertelement <4 x i64> undef, i64 %ext0, i32 0 72 %v1 = insertelement <4 x i64> %v0, i64 %ext1, i32 1 73 %v2 = insertelement <4 x i64> %v1, i64 %ext2, i32 2 74 %v3 = insertelement <4 x i64> %v2, i64 %ext3, i32 3 75 ret <4 x i64> %v3 76} 77 78define <4 x i32> @_clearupper4xi32a(<4 x i32>) nounwind { 79; SSE2-LABEL: _clearupper4xi32a: 80; SSE2: # %bb.0: 81; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 82; SSE2-NEXT: retq 83; 84; SSE42-LABEL: _clearupper4xi32a: 85; SSE42: # %bb.0: 86; SSE42-NEXT: pxor %xmm1, %xmm1 87; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 88; SSE42-NEXT: retq 89; 90; AVX-LABEL: _clearupper4xi32a: 91; AVX: # %bb.0: 92; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 93; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 94; AVX-NEXT: retq 95 %x0 = extractelement <4 x i32> %0, i32 0 96 %x1 = extractelement <4 x i32> %0, i32 1 97 %x2 = extractelement <4 x i32> %0, i32 2 98 %x3 = extractelement <4 x i32> %0, i32 3 99 %trunc0 = trunc i32 %x0 to i16 100 %trunc1 = trunc i32 %x1 to i16 101 %trunc2 = trunc i32 %x2 to i16 102 %trunc3 = trunc i32 %x3 to i16 103 %ext0 = zext i16 %trunc0 to i32 104 %ext1 = zext i16 %trunc1 to i32 105 %ext2 = zext i16 %trunc2 to i32 106 %ext3 = zext i16 %trunc3 to i32 107 %v0 = insertelement <4 x i32> undef, i32 %ext0, i32 0 108 %v1 = insertelement <4 x i32> %v0, i32 %ext1, i32 1 109 %v2 = insertelement <4 x i32> %v1, i32 %ext2, i32 2 110 %v3 = insertelement <4 x i32> %v2, i32 %ext3, i32 3 111 ret <4 x i32> %v3 112} 113 114define <8 x i32> @_clearupper8xi32a(<8 x i32>) nounwind { 115; SSE2-LABEL: _clearupper8xi32a: 116; SSE2: # %bb.0: 117; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,65535,65535,65535] 118; SSE2-NEXT: andps %xmm2, %xmm0 119; SSE2-NEXT: andps %xmm2, %xmm1 120; SSE2-NEXT: retq 121; 122; SSE42-LABEL: _clearupper8xi32a: 123; SSE42: # %bb.0: 124; SSE42-NEXT: pxor %xmm2, %xmm2 125; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] 126; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] 127; SSE42-NEXT: retq 128; 129; AVX1-LABEL: _clearupper8xi32a: 130; AVX1: # %bb.0: 131; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 132; AVX1-NEXT: retq 133; 134; AVX2-LABEL: _clearupper8xi32a: 135; AVX2: # %bb.0: 136; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 137; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 138; AVX2-NEXT: retq 139 %x0 = extractelement <8 x i32> %0, i32 0 140 %x1 = extractelement <8 x i32> %0, i32 1 141 %x2 = extractelement <8 x i32> %0, i32 2 142 %x3 = extractelement <8 x i32> %0, i32 3 143 %x4 = extractelement <8 x i32> %0, i32 4 144 %x5 = extractelement <8 x i32> %0, i32 5 145 %x6 = extractelement <8 x i32> %0, i32 6 146 %x7 = extractelement <8 x i32> %0, i32 7 147 %trunc0 = trunc i32 %x0 to i16 148 %trunc1 = trunc i32 %x1 to i16 149 %trunc2 = trunc i32 %x2 to i16 150 %trunc3 = trunc i32 %x3 to i16 151 %trunc4 = trunc i32 %x4 to i16 152 %trunc5 = trunc i32 %x5 to i16 153 %trunc6 = trunc i32 %x6 to i16 154 %trunc7 = trunc i32 %x7 to i16 155 %ext0 = zext i16 %trunc0 to i32 156 %ext1 = zext i16 %trunc1 to i32 157 %ext2 = zext i16 %trunc2 to i32 158 %ext3 = zext i16 %trunc3 to i32 159 %ext4 = zext i16 %trunc4 to i32 160 %ext5 = zext i16 %trunc5 to i32 161 %ext6 = zext i16 %trunc6 to i32 162 %ext7 = zext i16 %trunc7 to i32 163 %v0 = insertelement <8 x i32> undef, i32 %ext0, i32 0 164 %v1 = insertelement <8 x i32> %v0, i32 %ext1, i32 1 165 %v2 = insertelement <8 x i32> %v1, i32 %ext2, i32 2 166 %v3 = insertelement <8 x i32> %v2, i32 %ext3, i32 3 167 %v4 = insertelement <8 x i32> %v3, i32 %ext4, i32 4 168 %v5 = insertelement <8 x i32> %v4, i32 %ext5, i32 5 169 %v6 = insertelement <8 x i32> %v5, i32 %ext6, i32 6 170 %v7 = insertelement <8 x i32> %v6, i32 %ext7, i32 7 171 ret <8 x i32> %v7 172} 173 174define <8 x i16> @_clearupper8xi16a(<8 x i16>) nounwind { 175; SSE-LABEL: _clearupper8xi16a: 176; SSE: # %bb.0: 177; SSE-NEXT: andps {{.*}}(%rip), %xmm0 178; SSE-NEXT: retq 179; 180; AVX-LABEL: _clearupper8xi16a: 181; AVX: # %bb.0: 182; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 183; AVX-NEXT: retq 184 %x0 = extractelement <8 x i16> %0, i32 0 185 %x1 = extractelement <8 x i16> %0, i32 1 186 %x2 = extractelement <8 x i16> %0, i32 2 187 %x3 = extractelement <8 x i16> %0, i32 3 188 %x4 = extractelement <8 x i16> %0, i32 4 189 %x5 = extractelement <8 x i16> %0, i32 5 190 %x6 = extractelement <8 x i16> %0, i32 6 191 %x7 = extractelement <8 x i16> %0, i32 7 192 %trunc0 = trunc i16 %x0 to i8 193 %trunc1 = trunc i16 %x1 to i8 194 %trunc2 = trunc i16 %x2 to i8 195 %trunc3 = trunc i16 %x3 to i8 196 %trunc4 = trunc i16 %x4 to i8 197 %trunc5 = trunc i16 %x5 to i8 198 %trunc6 = trunc i16 %x6 to i8 199 %trunc7 = trunc i16 %x7 to i8 200 %ext0 = zext i8 %trunc0 to i16 201 %ext1 = zext i8 %trunc1 to i16 202 %ext2 = zext i8 %trunc2 to i16 203 %ext3 = zext i8 %trunc3 to i16 204 %ext4 = zext i8 %trunc4 to i16 205 %ext5 = zext i8 %trunc5 to i16 206 %ext6 = zext i8 %trunc6 to i16 207 %ext7 = zext i8 %trunc7 to i16 208 %v0 = insertelement <8 x i16> undef, i16 %ext0, i32 0 209 %v1 = insertelement <8 x i16> %v0, i16 %ext1, i32 1 210 %v2 = insertelement <8 x i16> %v1, i16 %ext2, i32 2 211 %v3 = insertelement <8 x i16> %v2, i16 %ext3, i32 3 212 %v4 = insertelement <8 x i16> %v3, i16 %ext4, i32 4 213 %v5 = insertelement <8 x i16> %v4, i16 %ext5, i32 5 214 %v6 = insertelement <8 x i16> %v5, i16 %ext6, i32 6 215 %v7 = insertelement <8 x i16> %v6, i16 %ext7, i32 7 216 ret <8 x i16> %v7 217} 218 219define <16 x i16> @_clearupper16xi16a(<16 x i16>) nounwind { 220; SSE-LABEL: _clearupper16xi16a: 221; SSE: # %bb.0: 222; SSE-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 223; SSE-NEXT: andps %xmm2, %xmm0 224; SSE-NEXT: andps %xmm2, %xmm1 225; SSE-NEXT: retq 226; 227; AVX-LABEL: _clearupper16xi16a: 228; AVX: # %bb.0: 229; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 230; AVX-NEXT: retq 231 %x0 = extractelement <16 x i16> %0, i32 0 232 %x1 = extractelement <16 x i16> %0, i32 1 233 %x2 = extractelement <16 x i16> %0, i32 2 234 %x3 = extractelement <16 x i16> %0, i32 3 235 %x4 = extractelement <16 x i16> %0, i32 4 236 %x5 = extractelement <16 x i16> %0, i32 5 237 %x6 = extractelement <16 x i16> %0, i32 6 238 %x7 = extractelement <16 x i16> %0, i32 7 239 %x8 = extractelement <16 x i16> %0, i32 8 240 %x9 = extractelement <16 x i16> %0, i32 9 241 %x10 = extractelement <16 x i16> %0, i32 10 242 %x11 = extractelement <16 x i16> %0, i32 11 243 %x12 = extractelement <16 x i16> %0, i32 12 244 %x13 = extractelement <16 x i16> %0, i32 13 245 %x14 = extractelement <16 x i16> %0, i32 14 246 %x15 = extractelement <16 x i16> %0, i32 15 247 %trunc0 = trunc i16 %x0 to i8 248 %trunc1 = trunc i16 %x1 to i8 249 %trunc2 = trunc i16 %x2 to i8 250 %trunc3 = trunc i16 %x3 to i8 251 %trunc4 = trunc i16 %x4 to i8 252 %trunc5 = trunc i16 %x5 to i8 253 %trunc6 = trunc i16 %x6 to i8 254 %trunc7 = trunc i16 %x7 to i8 255 %trunc8 = trunc i16 %x8 to i8 256 %trunc9 = trunc i16 %x9 to i8 257 %trunc10 = trunc i16 %x10 to i8 258 %trunc11 = trunc i16 %x11 to i8 259 %trunc12 = trunc i16 %x12 to i8 260 %trunc13 = trunc i16 %x13 to i8 261 %trunc14 = trunc i16 %x14 to i8 262 %trunc15 = trunc i16 %x15 to i8 263 %ext0 = zext i8 %trunc0 to i16 264 %ext1 = zext i8 %trunc1 to i16 265 %ext2 = zext i8 %trunc2 to i16 266 %ext3 = zext i8 %trunc3 to i16 267 %ext4 = zext i8 %trunc4 to i16 268 %ext5 = zext i8 %trunc5 to i16 269 %ext6 = zext i8 %trunc6 to i16 270 %ext7 = zext i8 %trunc7 to i16 271 %ext8 = zext i8 %trunc8 to i16 272 %ext9 = zext i8 %trunc9 to i16 273 %ext10 = zext i8 %trunc10 to i16 274 %ext11 = zext i8 %trunc11 to i16 275 %ext12 = zext i8 %trunc12 to i16 276 %ext13 = zext i8 %trunc13 to i16 277 %ext14 = zext i8 %trunc14 to i16 278 %ext15 = zext i8 %trunc15 to i16 279 %v0 = insertelement <16 x i16> undef, i16 %ext0, i32 0 280 %v1 = insertelement <16 x i16> %v0, i16 %ext1, i32 1 281 %v2 = insertelement <16 x i16> %v1, i16 %ext2, i32 2 282 %v3 = insertelement <16 x i16> %v2, i16 %ext3, i32 3 283 %v4 = insertelement <16 x i16> %v3, i16 %ext4, i32 4 284 %v5 = insertelement <16 x i16> %v4, i16 %ext5, i32 5 285 %v6 = insertelement <16 x i16> %v5, i16 %ext6, i32 6 286 %v7 = insertelement <16 x i16> %v6, i16 %ext7, i32 7 287 %v8 = insertelement <16 x i16> %v7, i16 %ext8, i32 8 288 %v9 = insertelement <16 x i16> %v8, i16 %ext9, i32 9 289 %v10 = insertelement <16 x i16> %v9, i16 %ext10, i32 10 290 %v11 = insertelement <16 x i16> %v10, i16 %ext11, i32 11 291 %v12 = insertelement <16 x i16> %v11, i16 %ext12, i32 12 292 %v13 = insertelement <16 x i16> %v12, i16 %ext13, i32 13 293 %v14 = insertelement <16 x i16> %v13, i16 %ext14, i32 14 294 %v15 = insertelement <16 x i16> %v14, i16 %ext15, i32 15 295 ret <16 x i16> %v15 296} 297 298define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind { 299; SSE-LABEL: _clearupper16xi8a: 300; SSE: # %bb.0: 301; SSE-NEXT: andps {{.*}}(%rip), %xmm0 302; SSE-NEXT: retq 303; 304; AVX-LABEL: _clearupper16xi8a: 305; AVX: # %bb.0: 306; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 307; AVX-NEXT: retq 308 %x0 = extractelement <16 x i8> %0, i32 0 309 %x1 = extractelement <16 x i8> %0, i32 1 310 %x2 = extractelement <16 x i8> %0, i32 2 311 %x3 = extractelement <16 x i8> %0, i32 3 312 %x4 = extractelement <16 x i8> %0, i32 4 313 %x5 = extractelement <16 x i8> %0, i32 5 314 %x6 = extractelement <16 x i8> %0, i32 6 315 %x7 = extractelement <16 x i8> %0, i32 7 316 %x8 = extractelement <16 x i8> %0, i32 8 317 %x9 = extractelement <16 x i8> %0, i32 9 318 %x10 = extractelement <16 x i8> %0, i32 10 319 %x11 = extractelement <16 x i8> %0, i32 11 320 %x12 = extractelement <16 x i8> %0, i32 12 321 %x13 = extractelement <16 x i8> %0, i32 13 322 %x14 = extractelement <16 x i8> %0, i32 14 323 %x15 = extractelement <16 x i8> %0, i32 15 324 %trunc0 = trunc i8 %x0 to i4 325 %trunc1 = trunc i8 %x1 to i4 326 %trunc2 = trunc i8 %x2 to i4 327 %trunc3 = trunc i8 %x3 to i4 328 %trunc4 = trunc i8 %x4 to i4 329 %trunc5 = trunc i8 %x5 to i4 330 %trunc6 = trunc i8 %x6 to i4 331 %trunc7 = trunc i8 %x7 to i4 332 %trunc8 = trunc i8 %x8 to i4 333 %trunc9 = trunc i8 %x9 to i4 334 %trunc10 = trunc i8 %x10 to i4 335 %trunc11 = trunc i8 %x11 to i4 336 %trunc12 = trunc i8 %x12 to i4 337 %trunc13 = trunc i8 %x13 to i4 338 %trunc14 = trunc i8 %x14 to i4 339 %trunc15 = trunc i8 %x15 to i4 340 %ext0 = zext i4 %trunc0 to i8 341 %ext1 = zext i4 %trunc1 to i8 342 %ext2 = zext i4 %trunc2 to i8 343 %ext3 = zext i4 %trunc3 to i8 344 %ext4 = zext i4 %trunc4 to i8 345 %ext5 = zext i4 %trunc5 to i8 346 %ext6 = zext i4 %trunc6 to i8 347 %ext7 = zext i4 %trunc7 to i8 348 %ext8 = zext i4 %trunc8 to i8 349 %ext9 = zext i4 %trunc9 to i8 350 %ext10 = zext i4 %trunc10 to i8 351 %ext11 = zext i4 %trunc11 to i8 352 %ext12 = zext i4 %trunc12 to i8 353 %ext13 = zext i4 %trunc13 to i8 354 %ext14 = zext i4 %trunc14 to i8 355 %ext15 = zext i4 %trunc15 to i8 356 %v0 = insertelement <16 x i8> undef, i8 %ext0, i32 0 357 %v1 = insertelement <16 x i8> %v0, i8 %ext1, i32 1 358 %v2 = insertelement <16 x i8> %v1, i8 %ext2, i32 2 359 %v3 = insertelement <16 x i8> %v2, i8 %ext3, i32 3 360 %v4 = insertelement <16 x i8> %v3, i8 %ext4, i32 4 361 %v5 = insertelement <16 x i8> %v4, i8 %ext5, i32 5 362 %v6 = insertelement <16 x i8> %v5, i8 %ext6, i32 6 363 %v7 = insertelement <16 x i8> %v6, i8 %ext7, i32 7 364 %v8 = insertelement <16 x i8> %v7, i8 %ext8, i32 8 365 %v9 = insertelement <16 x i8> %v8, i8 %ext9, i32 9 366 %v10 = insertelement <16 x i8> %v9, i8 %ext10, i32 10 367 %v11 = insertelement <16 x i8> %v10, i8 %ext11, i32 11 368 %v12 = insertelement <16 x i8> %v11, i8 %ext12, i32 12 369 %v13 = insertelement <16 x i8> %v12, i8 %ext13, i32 13 370 %v14 = insertelement <16 x i8> %v13, i8 %ext14, i32 14 371 %v15 = insertelement <16 x i8> %v14, i8 %ext15, i32 15 372 ret <16 x i8> %v15 373} 374 375define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind { 376; SSE-LABEL: _clearupper32xi8a: 377; SSE: # %bb.0: 378; SSE-NEXT: movaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 379; SSE-NEXT: andps %xmm2, %xmm0 380; SSE-NEXT: andps %xmm2, %xmm1 381; SSE-NEXT: retq 382; 383; AVX-LABEL: _clearupper32xi8a: 384; AVX: # %bb.0: 385; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 386; AVX-NEXT: retq 387 %x0 = extractelement <32 x i8> %0, i32 0 388 %x1 = extractelement <32 x i8> %0, i32 1 389 %x2 = extractelement <32 x i8> %0, i32 2 390 %x3 = extractelement <32 x i8> %0, i32 3 391 %x4 = extractelement <32 x i8> %0, i32 4 392 %x5 = extractelement <32 x i8> %0, i32 5 393 %x6 = extractelement <32 x i8> %0, i32 6 394 %x7 = extractelement <32 x i8> %0, i32 7 395 %x8 = extractelement <32 x i8> %0, i32 8 396 %x9 = extractelement <32 x i8> %0, i32 9 397 %x10 = extractelement <32 x i8> %0, i32 10 398 %x11 = extractelement <32 x i8> %0, i32 11 399 %x12 = extractelement <32 x i8> %0, i32 12 400 %x13 = extractelement <32 x i8> %0, i32 13 401 %x14 = extractelement <32 x i8> %0, i32 14 402 %x15 = extractelement <32 x i8> %0, i32 15 403 %x16 = extractelement <32 x i8> %0, i32 16 404 %x17 = extractelement <32 x i8> %0, i32 17 405 %x18 = extractelement <32 x i8> %0, i32 18 406 %x19 = extractelement <32 x i8> %0, i32 19 407 %x20 = extractelement <32 x i8> %0, i32 20 408 %x21 = extractelement <32 x i8> %0, i32 21 409 %x22 = extractelement <32 x i8> %0, i32 22 410 %x23 = extractelement <32 x i8> %0, i32 23 411 %x24 = extractelement <32 x i8> %0, i32 24 412 %x25 = extractelement <32 x i8> %0, i32 25 413 %x26 = extractelement <32 x i8> %0, i32 26 414 %x27 = extractelement <32 x i8> %0, i32 27 415 %x28 = extractelement <32 x i8> %0, i32 28 416 %x29 = extractelement <32 x i8> %0, i32 29 417 %x30 = extractelement <32 x i8> %0, i32 30 418 %x31 = extractelement <32 x i8> %0, i32 31 419 %trunc0 = trunc i8 %x0 to i4 420 %trunc1 = trunc i8 %x1 to i4 421 %trunc2 = trunc i8 %x2 to i4 422 %trunc3 = trunc i8 %x3 to i4 423 %trunc4 = trunc i8 %x4 to i4 424 %trunc5 = trunc i8 %x5 to i4 425 %trunc6 = trunc i8 %x6 to i4 426 %trunc7 = trunc i8 %x7 to i4 427 %trunc8 = trunc i8 %x8 to i4 428 %trunc9 = trunc i8 %x9 to i4 429 %trunc10 = trunc i8 %x10 to i4 430 %trunc11 = trunc i8 %x11 to i4 431 %trunc12 = trunc i8 %x12 to i4 432 %trunc13 = trunc i8 %x13 to i4 433 %trunc14 = trunc i8 %x14 to i4 434 %trunc15 = trunc i8 %x15 to i4 435 %trunc16 = trunc i8 %x16 to i4 436 %trunc17 = trunc i8 %x17 to i4 437 %trunc18 = trunc i8 %x18 to i4 438 %trunc19 = trunc i8 %x19 to i4 439 %trunc20 = trunc i8 %x20 to i4 440 %trunc21 = trunc i8 %x21 to i4 441 %trunc22 = trunc i8 %x22 to i4 442 %trunc23 = trunc i8 %x23 to i4 443 %trunc24 = trunc i8 %x24 to i4 444 %trunc25 = trunc i8 %x25 to i4 445 %trunc26 = trunc i8 %x26 to i4 446 %trunc27 = trunc i8 %x27 to i4 447 %trunc28 = trunc i8 %x28 to i4 448 %trunc29 = trunc i8 %x29 to i4 449 %trunc30 = trunc i8 %x30 to i4 450 %trunc31 = trunc i8 %x31 to i4 451 %ext0 = zext i4 %trunc0 to i8 452 %ext1 = zext i4 %trunc1 to i8 453 %ext2 = zext i4 %trunc2 to i8 454 %ext3 = zext i4 %trunc3 to i8 455 %ext4 = zext i4 %trunc4 to i8 456 %ext5 = zext i4 %trunc5 to i8 457 %ext6 = zext i4 %trunc6 to i8 458 %ext7 = zext i4 %trunc7 to i8 459 %ext8 = zext i4 %trunc8 to i8 460 %ext9 = zext i4 %trunc9 to i8 461 %ext10 = zext i4 %trunc10 to i8 462 %ext11 = zext i4 %trunc11 to i8 463 %ext12 = zext i4 %trunc12 to i8 464 %ext13 = zext i4 %trunc13 to i8 465 %ext14 = zext i4 %trunc14 to i8 466 %ext15 = zext i4 %trunc15 to i8 467 %ext16 = zext i4 %trunc16 to i8 468 %ext17 = zext i4 %trunc17 to i8 469 %ext18 = zext i4 %trunc18 to i8 470 %ext19 = zext i4 %trunc19 to i8 471 %ext20 = zext i4 %trunc20 to i8 472 %ext21 = zext i4 %trunc21 to i8 473 %ext22 = zext i4 %trunc22 to i8 474 %ext23 = zext i4 %trunc23 to i8 475 %ext24 = zext i4 %trunc24 to i8 476 %ext25 = zext i4 %trunc25 to i8 477 %ext26 = zext i4 %trunc26 to i8 478 %ext27 = zext i4 %trunc27 to i8 479 %ext28 = zext i4 %trunc28 to i8 480 %ext29 = zext i4 %trunc29 to i8 481 %ext30 = zext i4 %trunc30 to i8 482 %ext31 = zext i4 %trunc31 to i8 483 %v0 = insertelement <32 x i8> undef, i8 %ext0, i32 0 484 %v1 = insertelement <32 x i8> %v0, i8 %ext1, i32 1 485 %v2 = insertelement <32 x i8> %v1, i8 %ext2, i32 2 486 %v3 = insertelement <32 x i8> %v2, i8 %ext3, i32 3 487 %v4 = insertelement <32 x i8> %v3, i8 %ext4, i32 4 488 %v5 = insertelement <32 x i8> %v4, i8 %ext5, i32 5 489 %v6 = insertelement <32 x i8> %v5, i8 %ext6, i32 6 490 %v7 = insertelement <32 x i8> %v6, i8 %ext7, i32 7 491 %v8 = insertelement <32 x i8> %v7, i8 %ext8, i32 8 492 %v9 = insertelement <32 x i8> %v8, i8 %ext9, i32 9 493 %v10 = insertelement <32 x i8> %v9, i8 %ext10, i32 10 494 %v11 = insertelement <32 x i8> %v10, i8 %ext11, i32 11 495 %v12 = insertelement <32 x i8> %v11, i8 %ext12, i32 12 496 %v13 = insertelement <32 x i8> %v12, i8 %ext13, i32 13 497 %v14 = insertelement <32 x i8> %v13, i8 %ext14, i32 14 498 %v15 = insertelement <32 x i8> %v14, i8 %ext15, i32 15 499 %v16 = insertelement <32 x i8> %v15, i8 %ext16, i32 16 500 %v17 = insertelement <32 x i8> %v16, i8 %ext17, i32 17 501 %v18 = insertelement <32 x i8> %v17, i8 %ext18, i32 18 502 %v19 = insertelement <32 x i8> %v18, i8 %ext19, i32 19 503 %v20 = insertelement <32 x i8> %v19, i8 %ext20, i32 20 504 %v21 = insertelement <32 x i8> %v20, i8 %ext21, i32 21 505 %v22 = insertelement <32 x i8> %v21, i8 %ext22, i32 22 506 %v23 = insertelement <32 x i8> %v22, i8 %ext23, i32 23 507 %v24 = insertelement <32 x i8> %v23, i8 %ext24, i32 24 508 %v25 = insertelement <32 x i8> %v24, i8 %ext25, i32 25 509 %v26 = insertelement <32 x i8> %v25, i8 %ext26, i32 26 510 %v27 = insertelement <32 x i8> %v26, i8 %ext27, i32 27 511 %v28 = insertelement <32 x i8> %v27, i8 %ext28, i32 28 512 %v29 = insertelement <32 x i8> %v28, i8 %ext29, i32 29 513 %v30 = insertelement <32 x i8> %v29, i8 %ext30, i32 30 514 %v31 = insertelement <32 x i8> %v30, i8 %ext31, i32 31 515 ret <32 x i8> %v31 516} 517 518define <2 x i64> @_clearupper2xi64b(<2 x i64>) nounwind { 519; SSE2-LABEL: _clearupper2xi64b: 520; SSE2: # %bb.0: 521; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 522; SSE2-NEXT: retq 523; 524; SSE42-LABEL: _clearupper2xi64b: 525; SSE42: # %bb.0: 526; SSE42-NEXT: xorps %xmm1, %xmm1 527; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 528; SSE42-NEXT: retq 529; 530; AVX-LABEL: _clearupper2xi64b: 531; AVX: # %bb.0: 532; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 533; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 534; AVX-NEXT: retq 535 %x32 = bitcast <2 x i64> %0 to <4 x i32> 536 %r0 = insertelement <4 x i32> %x32, i32 zeroinitializer, i32 1 537 %r1 = insertelement <4 x i32> %r0, i32 zeroinitializer, i32 3 538 %r = bitcast <4 x i32> %r1 to <2 x i64> 539 ret <2 x i64> %r 540} 541 542define <4 x i64> @_clearupper4xi64b(<4 x i64>) nounwind { 543; SSE2-LABEL: _clearupper4xi64b: 544; SSE2: # %bb.0: 545; SSE2-NEXT: movaps {{.*#+}} xmm2 = [NaN,0.0E+0,NaN,0.0E+0] 546; SSE2-NEXT: andps %xmm2, %xmm0 547; SSE2-NEXT: andps %xmm2, %xmm1 548; SSE2-NEXT: retq 549; 550; SSE42-LABEL: _clearupper4xi64b: 551; SSE42: # %bb.0: 552; SSE42-NEXT: xorps %xmm2, %xmm2 553; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 554; SSE42-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 555; SSE42-NEXT: retq 556; 557; AVX-LABEL: _clearupper4xi64b: 558; AVX: # %bb.0: 559; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 560; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 561; AVX-NEXT: retq 562 %x32 = bitcast <4 x i64> %0 to <8 x i32> 563 %r0 = insertelement <8 x i32> %x32, i32 zeroinitializer, i32 1 564 %r1 = insertelement <8 x i32> %r0, i32 zeroinitializer, i32 3 565 %r2 = insertelement <8 x i32> %r1, i32 zeroinitializer, i32 5 566 %r3 = insertelement <8 x i32> %r2, i32 zeroinitializer, i32 7 567 %r = bitcast <8 x i32> %r3 to <4 x i64> 568 ret <4 x i64> %r 569} 570 571define <4 x i32> @_clearupper4xi32b(<4 x i32>) nounwind { 572; SSE2-LABEL: _clearupper4xi32b: 573; SSE2: # %bb.0: 574; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 575; SSE2-NEXT: retq 576; 577; SSE42-LABEL: _clearupper4xi32b: 578; SSE42: # %bb.0: 579; SSE42-NEXT: pxor %xmm1, %xmm1 580; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 581; SSE42-NEXT: retq 582; 583; AVX-LABEL: _clearupper4xi32b: 584; AVX: # %bb.0: 585; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 586; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 587; AVX-NEXT: retq 588 %x16 = bitcast <4 x i32> %0 to <8 x i16> 589 %r0 = insertelement <8 x i16> %x16, i16 zeroinitializer, i32 1 590 %r1 = insertelement <8 x i16> %r0, i16 zeroinitializer, i32 3 591 %r2 = insertelement <8 x i16> %r1, i16 zeroinitializer, i32 5 592 %r3 = insertelement <8 x i16> %r2, i16 zeroinitializer, i32 7 593 %r = bitcast <8 x i16> %r3 to <4 x i32> 594 ret <4 x i32> %r 595} 596 597define <8 x i32> @_clearupper8xi32b(<8 x i32>) nounwind { 598; SSE2-LABEL: _clearupper8xi32b: 599; SSE2: # %bb.0: 600; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0] 601; SSE2-NEXT: andps %xmm2, %xmm0 602; SSE2-NEXT: andps %xmm2, %xmm1 603; SSE2-NEXT: retq 604; 605; SSE42-LABEL: _clearupper8xi32b: 606; SSE42: # %bb.0: 607; SSE42-NEXT: pxor %xmm2, %xmm2 608; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] 609; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] 610; SSE42-NEXT: retq 611; 612; AVX1-LABEL: _clearupper8xi32b: 613; AVX1: # %bb.0: 614; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 615; AVX1-NEXT: retq 616; 617; AVX2-LABEL: _clearupper8xi32b: 618; AVX2: # %bb.0: 619; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 620; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 621; AVX2-NEXT: retq 622 %x16 = bitcast <8 x i32> %0 to <16 x i16> 623 %r0 = insertelement <16 x i16> %x16, i16 zeroinitializer, i32 1 624 %r1 = insertelement <16 x i16> %r0, i16 zeroinitializer, i32 3 625 %r2 = insertelement <16 x i16> %r1, i16 zeroinitializer, i32 5 626 %r3 = insertelement <16 x i16> %r2, i16 zeroinitializer, i32 7 627 %r4 = insertelement <16 x i16> %r3, i16 zeroinitializer, i32 9 628 %r5 = insertelement <16 x i16> %r4, i16 zeroinitializer, i32 11 629 %r6 = insertelement <16 x i16> %r5, i16 zeroinitializer, i32 13 630 %r7 = insertelement <16 x i16> %r6, i16 zeroinitializer, i32 15 631 %r = bitcast <16 x i16> %r7 to <8 x i32> 632 ret <8 x i32> %r 633} 634 635define <8 x i16> @_clearupper8xi16b(<8 x i16>) nounwind { 636; SSE-LABEL: _clearupper8xi16b: 637; SSE: # %bb.0: 638; SSE-NEXT: andps {{.*}}(%rip), %xmm0 639; SSE-NEXT: retq 640; 641; AVX-LABEL: _clearupper8xi16b: 642; AVX: # %bb.0: 643; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 644; AVX-NEXT: retq 645 %x8 = bitcast <8 x i16> %0 to <16 x i8> 646 %r0 = insertelement <16 x i8> %x8, i8 zeroinitializer, i32 1 647 %r1 = insertelement <16 x i8> %r0, i8 zeroinitializer, i32 3 648 %r2 = insertelement <16 x i8> %r1, i8 zeroinitializer, i32 5 649 %r3 = insertelement <16 x i8> %r2, i8 zeroinitializer, i32 7 650 %r4 = insertelement <16 x i8> %r3, i8 zeroinitializer, i32 9 651 %r5 = insertelement <16 x i8> %r4, i8 zeroinitializer, i32 11 652 %r6 = insertelement <16 x i8> %r5, i8 zeroinitializer, i32 13 653 %r7 = insertelement <16 x i8> %r6, i8 zeroinitializer, i32 15 654 %r = bitcast <16 x i8> %r7 to <8 x i16> 655 ret <8 x i16> %r 656} 657 658define <16 x i16> @_clearupper16xi16b(<16 x i16>) nounwind { 659; SSE-LABEL: _clearupper16xi16b: 660; SSE: # %bb.0: 661; SSE-NEXT: movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 662; SSE-NEXT: andps %xmm2, %xmm0 663; SSE-NEXT: andps %xmm2, %xmm1 664; SSE-NEXT: retq 665; 666; AVX-LABEL: _clearupper16xi16b: 667; AVX: # %bb.0: 668; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 669; AVX-NEXT: vandps %xmm1, %xmm0, %xmm2 670; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 671; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 672; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 673; AVX-NEXT: retq 674 %x8 = bitcast <16 x i16> %0 to <32 x i8> 675 %r0 = insertelement <32 x i8> %x8, i8 zeroinitializer, i32 1 676 %r1 = insertelement <32 x i8> %r0, i8 zeroinitializer, i32 3 677 %r2 = insertelement <32 x i8> %r1, i8 zeroinitializer, i32 5 678 %r3 = insertelement <32 x i8> %r2, i8 zeroinitializer, i32 7 679 %r4 = insertelement <32 x i8> %r3, i8 zeroinitializer, i32 9 680 %r5 = insertelement <32 x i8> %r4, i8 zeroinitializer, i32 11 681 %r6 = insertelement <32 x i8> %r5, i8 zeroinitializer, i32 13 682 %r7 = insertelement <32 x i8> %r6, i8 zeroinitializer, i32 15 683 %r8 = insertelement <32 x i8> %r7, i8 zeroinitializer, i32 17 684 %r9 = insertelement <32 x i8> %r8, i8 zeroinitializer, i32 19 685 %r10 = insertelement <32 x i8> %r9, i8 zeroinitializer, i32 21 686 %r11 = insertelement <32 x i8> %r10, i8 zeroinitializer, i32 23 687 %r12 = insertelement <32 x i8> %r11, i8 zeroinitializer, i32 25 688 %r13 = insertelement <32 x i8> %r12, i8 zeroinitializer, i32 27 689 %r14 = insertelement <32 x i8> %r13, i8 zeroinitializer, i32 29 690 %r15 = insertelement <32 x i8> %r14, i8 zeroinitializer, i32 31 691 %r = bitcast <32 x i8> %r15 to <16 x i16> 692 ret <16 x i16> %r 693} 694 695define <16 x i8> @_clearupper16xi8b(<16 x i8>) nounwind { 696; SSE2-LABEL: _clearupper16xi8b: 697; SSE2: # %bb.0: 698; SSE2-NEXT: pushq %rbx 699; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 700; SSE2-NEXT: movq %xmm1, %r10 701; SSE2-NEXT: movq %r10, %r8 702; SSE2-NEXT: shrq $56, %r8 703; SSE2-NEXT: andl $15, %r8d 704; SSE2-NEXT: movq %r10, %r9 705; SSE2-NEXT: shrq $48, %r9 706; SSE2-NEXT: andl $15, %r9d 707; SSE2-NEXT: movq %r10, %rsi 708; SSE2-NEXT: shrq $40, %rsi 709; SSE2-NEXT: andl $15, %esi 710; SSE2-NEXT: movq %r10, %r11 711; SSE2-NEXT: shrq $32, %r11 712; SSE2-NEXT: andl $15, %r11d 713; SSE2-NEXT: movq %xmm0, %rax 714; SSE2-NEXT: movq %rax, %rdx 715; SSE2-NEXT: shrq $56, %rdx 716; SSE2-NEXT: andl $15, %edx 717; SSE2-NEXT: movq %rax, %rcx 718; SSE2-NEXT: shrq $48, %rcx 719; SSE2-NEXT: andl $15, %ecx 720; SSE2-NEXT: movq %rax, %rdi 721; SSE2-NEXT: shrq $40, %rdi 722; SSE2-NEXT: andl $15, %edi 723; SSE2-NEXT: movq %rax, %rbx 724; SSE2-NEXT: shrq $32, %rbx 725; SSE2-NEXT: andl $15, %ebx 726; SSE2-NEXT: shlq $32, %rbx 727; SSE2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 728; SSE2-NEXT: orq %rbx, %rax 729; SSE2-NEXT: shlq $40, %rdi 730; SSE2-NEXT: orq %rax, %rdi 731; SSE2-NEXT: shlq $48, %rcx 732; SSE2-NEXT: orq %rdi, %rcx 733; SSE2-NEXT: shlq $56, %rdx 734; SSE2-NEXT: orq %rcx, %rdx 735; SSE2-NEXT: shlq $32, %r11 736; SSE2-NEXT: andl $252645135, %r10d # imm = 0xF0F0F0F 737; SSE2-NEXT: orq %r11, %r10 738; SSE2-NEXT: shlq $40, %rsi 739; SSE2-NEXT: orq %r10, %rsi 740; SSE2-NEXT: shlq $48, %r9 741; SSE2-NEXT: orq %rsi, %r9 742; SSE2-NEXT: shlq $56, %r8 743; SSE2-NEXT: orq %r9, %r8 744; SSE2-NEXT: movq %rdx, %xmm0 745; SSE2-NEXT: movq %r8, %xmm1 746; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 747; SSE2-NEXT: popq %rbx 748; SSE2-NEXT: retq 749; 750; SSE42-LABEL: _clearupper16xi8b: 751; SSE42: # %bb.0: 752; SSE42-NEXT: pushq %rbx 753; SSE42-NEXT: pextrq $1, %xmm0, %r10 754; SSE42-NEXT: movq %r10, %r8 755; SSE42-NEXT: shrq $56, %r8 756; SSE42-NEXT: andl $15, %r8d 757; SSE42-NEXT: movq %r10, %r9 758; SSE42-NEXT: shrq $48, %r9 759; SSE42-NEXT: andl $15, %r9d 760; SSE42-NEXT: movq %r10, %rsi 761; SSE42-NEXT: shrq $40, %rsi 762; SSE42-NEXT: andl $15, %esi 763; SSE42-NEXT: movq %r10, %r11 764; SSE42-NEXT: shrq $32, %r11 765; SSE42-NEXT: andl $15, %r11d 766; SSE42-NEXT: movq %xmm0, %rax 767; SSE42-NEXT: movq %rax, %rdx 768; SSE42-NEXT: shrq $56, %rdx 769; SSE42-NEXT: andl $15, %edx 770; SSE42-NEXT: movq %rax, %rcx 771; SSE42-NEXT: shrq $48, %rcx 772; SSE42-NEXT: andl $15, %ecx 773; SSE42-NEXT: movq %rax, %rdi 774; SSE42-NEXT: shrq $40, %rdi 775; SSE42-NEXT: andl $15, %edi 776; SSE42-NEXT: movq %rax, %rbx 777; SSE42-NEXT: shrq $32, %rbx 778; SSE42-NEXT: andl $15, %ebx 779; SSE42-NEXT: shlq $32, %rbx 780; SSE42-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 781; SSE42-NEXT: orq %rbx, %rax 782; SSE42-NEXT: shlq $40, %rdi 783; SSE42-NEXT: orq %rax, %rdi 784; SSE42-NEXT: shlq $48, %rcx 785; SSE42-NEXT: orq %rdi, %rcx 786; SSE42-NEXT: shlq $56, %rdx 787; SSE42-NEXT: orq %rcx, %rdx 788; SSE42-NEXT: shlq $32, %r11 789; SSE42-NEXT: andl $252645135, %r10d # imm = 0xF0F0F0F 790; SSE42-NEXT: orq %r11, %r10 791; SSE42-NEXT: shlq $40, %rsi 792; SSE42-NEXT: orq %r10, %rsi 793; SSE42-NEXT: shlq $48, %r9 794; SSE42-NEXT: orq %rsi, %r9 795; SSE42-NEXT: shlq $56, %r8 796; SSE42-NEXT: orq %r9, %r8 797; SSE42-NEXT: movq %r8, %xmm1 798; SSE42-NEXT: movq %rdx, %xmm0 799; SSE42-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 800; SSE42-NEXT: popq %rbx 801; SSE42-NEXT: retq 802; 803; AVX-LABEL: _clearupper16xi8b: 804; AVX: # %bb.0: 805; AVX-NEXT: pushq %rbx 806; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 807; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %r9 808; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 809; AVX-NEXT: movq %r9, %r8 810; AVX-NEXT: shrq $56, %r8 811; AVX-NEXT: andl $15, %r8d 812; AVX-NEXT: movq %r9, %r10 813; AVX-NEXT: shrq $48, %r10 814; AVX-NEXT: andl $15, %r10d 815; AVX-NEXT: movq %rcx, %rdx 816; AVX-NEXT: shldq $24, %r9, %rdx 817; AVX-NEXT: andl $15, %edx 818; AVX-NEXT: movq %r9, %r11 819; AVX-NEXT: shrq $32, %r11 820; AVX-NEXT: andl $15, %r11d 821; AVX-NEXT: movq %rcx, %rdi 822; AVX-NEXT: shrq $56, %rdi 823; AVX-NEXT: andl $15, %edi 824; AVX-NEXT: movq %rcx, %rsi 825; AVX-NEXT: shrq $48, %rsi 826; AVX-NEXT: andl $15, %esi 827; AVX-NEXT: movq %rcx, %rax 828; AVX-NEXT: shrq $40, %rax 829; AVX-NEXT: andl $15, %eax 830; AVX-NEXT: movq %rcx, %rbx 831; AVX-NEXT: shrq $32, %rbx 832; AVX-NEXT: andl $15, %ebx 833; AVX-NEXT: shlq $32, %rbx 834; AVX-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 835; AVX-NEXT: orq %rbx, %rcx 836; AVX-NEXT: shlq $40, %rax 837; AVX-NEXT: orq %rcx, %rax 838; AVX-NEXT: shlq $48, %rsi 839; AVX-NEXT: orq %rax, %rsi 840; AVX-NEXT: shlq $56, %rdi 841; AVX-NEXT: orq %rsi, %rdi 842; AVX-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) 843; AVX-NEXT: shlq $32, %r11 844; AVX-NEXT: andl $252645135, %r9d # imm = 0xF0F0F0F 845; AVX-NEXT: orq %r11, %r9 846; AVX-NEXT: shlq $40, %rdx 847; AVX-NEXT: orq %r9, %rdx 848; AVX-NEXT: shlq $48, %r10 849; AVX-NEXT: orq %rdx, %r10 850; AVX-NEXT: shlq $56, %r8 851; AVX-NEXT: orq %r10, %r8 852; AVX-NEXT: movq %r8, -{{[0-9]+}}(%rsp) 853; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 854; AVX-NEXT: popq %rbx 855; AVX-NEXT: retq 856 %x4 = bitcast <16 x i8> %0 to <32 x i4> 857 %r0 = insertelement <32 x i4> %x4, i4 zeroinitializer, i32 1 858 %r1 = insertelement <32 x i4> %r0, i4 zeroinitializer, i32 3 859 %r2 = insertelement <32 x i4> %r1, i4 zeroinitializer, i32 5 860 %r3 = insertelement <32 x i4> %r2, i4 zeroinitializer, i32 7 861 %r4 = insertelement <32 x i4> %r3, i4 zeroinitializer, i32 9 862 %r5 = insertelement <32 x i4> %r4, i4 zeroinitializer, i32 11 863 %r6 = insertelement <32 x i4> %r5, i4 zeroinitializer, i32 13 864 %r7 = insertelement <32 x i4> %r6, i4 zeroinitializer, i32 15 865 %r8 = insertelement <32 x i4> %r7, i4 zeroinitializer, i32 17 866 %r9 = insertelement <32 x i4> %r8, i4 zeroinitializer, i32 19 867 %r10 = insertelement <32 x i4> %r9, i4 zeroinitializer, i32 21 868 %r11 = insertelement <32 x i4> %r10, i4 zeroinitializer, i32 23 869 %r12 = insertelement <32 x i4> %r11, i4 zeroinitializer, i32 25 870 %r13 = insertelement <32 x i4> %r12, i4 zeroinitializer, i32 27 871 %r14 = insertelement <32 x i4> %r13, i4 zeroinitializer, i32 29 872 %r15 = insertelement <32 x i4> %r14, i4 zeroinitializer, i32 31 873 %r = bitcast <32 x i4> %r15 to <16 x i8> 874 ret <16 x i8> %r 875} 876 877define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind { 878; SSE2-LABEL: _clearupper32xi8b: 879; SSE2: # %bb.0: 880; SSE2-NEXT: pushq %rbx 881; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 882; SSE2-NEXT: movq %xmm2, %r10 883; SSE2-NEXT: movq %r10, %r8 884; SSE2-NEXT: shrq $56, %r8 885; SSE2-NEXT: andl $15, %r8d 886; SSE2-NEXT: movq %r10, %r9 887; SSE2-NEXT: shrq $48, %r9 888; SSE2-NEXT: andl $15, %r9d 889; SSE2-NEXT: movq %r10, %rsi 890; SSE2-NEXT: shrq $40, %rsi 891; SSE2-NEXT: andl $15, %esi 892; SSE2-NEXT: movq %r10, %r11 893; SSE2-NEXT: shrq $32, %r11 894; SSE2-NEXT: andl $15, %r11d 895; SSE2-NEXT: movq %xmm0, %rax 896; SSE2-NEXT: movq %rax, %rdx 897; SSE2-NEXT: shrq $56, %rdx 898; SSE2-NEXT: andl $15, %edx 899; SSE2-NEXT: movq %rax, %rcx 900; SSE2-NEXT: shrq $48, %rcx 901; SSE2-NEXT: andl $15, %ecx 902; SSE2-NEXT: movq %rax, %rdi 903; SSE2-NEXT: shrq $40, %rdi 904; SSE2-NEXT: andl $15, %edi 905; SSE2-NEXT: movq %rax, %rbx 906; SSE2-NEXT: shrq $32, %rbx 907; SSE2-NEXT: andl $15, %ebx 908; SSE2-NEXT: shlq $32, %rbx 909; SSE2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 910; SSE2-NEXT: orq %rbx, %rax 911; SSE2-NEXT: shlq $40, %rdi 912; SSE2-NEXT: orq %rax, %rdi 913; SSE2-NEXT: shlq $48, %rcx 914; SSE2-NEXT: orq %rdi, %rcx 915; SSE2-NEXT: shlq $56, %rdx 916; SSE2-NEXT: orq %rcx, %rdx 917; SSE2-NEXT: shlq $32, %r11 918; SSE2-NEXT: andl $252645135, %r10d # imm = 0xF0F0F0F 919; SSE2-NEXT: orq %r11, %r10 920; SSE2-NEXT: shlq $40, %rsi 921; SSE2-NEXT: orq %r10, %rsi 922; SSE2-NEXT: shlq $48, %r9 923; SSE2-NEXT: orq %rsi, %r9 924; SSE2-NEXT: shlq $56, %r8 925; SSE2-NEXT: orq %r9, %r8 926; SSE2-NEXT: movq %rdx, %xmm0 927; SSE2-NEXT: movq %r8, %xmm2 928; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 929; SSE2-NEXT: popq %rbx 930; SSE2-NEXT: retq 931; 932; SSE42-LABEL: _clearupper32xi8b: 933; SSE42: # %bb.0: 934; SSE42-NEXT: pushq %rbx 935; SSE42-NEXT: pextrq $1, %xmm0, %r10 936; SSE42-NEXT: movq %r10, %r8 937; SSE42-NEXT: shrq $56, %r8 938; SSE42-NEXT: andl $15, %r8d 939; SSE42-NEXT: movq %r10, %r9 940; SSE42-NEXT: shrq $48, %r9 941; SSE42-NEXT: andl $15, %r9d 942; SSE42-NEXT: movq %r10, %rsi 943; SSE42-NEXT: shrq $40, %rsi 944; SSE42-NEXT: andl $15, %esi 945; SSE42-NEXT: movq %r10, %r11 946; SSE42-NEXT: shrq $32, %r11 947; SSE42-NEXT: andl $15, %r11d 948; SSE42-NEXT: movq %xmm0, %rax 949; SSE42-NEXT: movq %rax, %rdx 950; SSE42-NEXT: shrq $56, %rdx 951; SSE42-NEXT: andl $15, %edx 952; SSE42-NEXT: movq %rax, %rcx 953; SSE42-NEXT: shrq $48, %rcx 954; SSE42-NEXT: andl $15, %ecx 955; SSE42-NEXT: movq %rax, %rdi 956; SSE42-NEXT: shrq $40, %rdi 957; SSE42-NEXT: andl $15, %edi 958; SSE42-NEXT: movq %rax, %rbx 959; SSE42-NEXT: shrq $32, %rbx 960; SSE42-NEXT: andl $15, %ebx 961; SSE42-NEXT: shlq $32, %rbx 962; SSE42-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 963; SSE42-NEXT: orq %rbx, %rax 964; SSE42-NEXT: shlq $40, %rdi 965; SSE42-NEXT: orq %rax, %rdi 966; SSE42-NEXT: shlq $48, %rcx 967; SSE42-NEXT: orq %rdi, %rcx 968; SSE42-NEXT: shlq $56, %rdx 969; SSE42-NEXT: orq %rcx, %rdx 970; SSE42-NEXT: shlq $32, %r11 971; SSE42-NEXT: andl $252645135, %r10d # imm = 0xF0F0F0F 972; SSE42-NEXT: orq %r11, %r10 973; SSE42-NEXT: shlq $40, %rsi 974; SSE42-NEXT: orq %r10, %rsi 975; SSE42-NEXT: shlq $48, %r9 976; SSE42-NEXT: orq %rsi, %r9 977; SSE42-NEXT: shlq $56, %r8 978; SSE42-NEXT: orq %r9, %r8 979; SSE42-NEXT: movq %r8, %xmm2 980; SSE42-NEXT: movq %rdx, %xmm0 981; SSE42-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 982; SSE42-NEXT: popq %rbx 983; SSE42-NEXT: retq 984; 985; AVX1-LABEL: _clearupper32xi8b: 986; AVX1: # %bb.0: 987; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 988; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %rax 989; AVX1-NEXT: movq %rax, %r8 990; AVX1-NEXT: movq %rax, %rdx 991; AVX1-NEXT: movq %rax, %rsi 992; AVX1-NEXT: movq %rax, %rdi 993; AVX1-NEXT: movq %rax, %rcx 994; AVX1-NEXT: shrq $32, %rcx 995; AVX1-NEXT: andl $15, %ecx 996; AVX1-NEXT: shlq $32, %rcx 997; AVX1-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 998; AVX1-NEXT: orq %rcx, %rax 999; AVX1-NEXT: shrq $40, %rdi 1000; AVX1-NEXT: andl $15, %edi 1001; AVX1-NEXT: shlq $40, %rdi 1002; AVX1-NEXT: orq %rax, %rdi 1003; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %rax 1004; AVX1-NEXT: shrq $48, %rsi 1005; AVX1-NEXT: andl $15, %esi 1006; AVX1-NEXT: shlq $48, %rsi 1007; AVX1-NEXT: orq %rdi, %rsi 1008; AVX1-NEXT: movq %rax, %rcx 1009; AVX1-NEXT: shrq $56, %rdx 1010; AVX1-NEXT: andl $15, %edx 1011; AVX1-NEXT: shlq $56, %rdx 1012; AVX1-NEXT: orq %rsi, %rdx 1013; AVX1-NEXT: movq %rax, %rsi 1014; AVX1-NEXT: shldq $24, %rax, %r8 1015; AVX1-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) 1016; AVX1-NEXT: movq %rax, %rdx 1017; AVX1-NEXT: shrq $32, %rdx 1018; AVX1-NEXT: andl $15, %edx 1019; AVX1-NEXT: shlq $32, %rdx 1020; AVX1-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 1021; AVX1-NEXT: orq %rdx, %rax 1022; AVX1-NEXT: andl $15, %r8d 1023; AVX1-NEXT: shlq $40, %r8 1024; AVX1-NEXT: orq %rax, %r8 1025; AVX1-NEXT: shrq $48, %rsi 1026; AVX1-NEXT: andl $15, %esi 1027; AVX1-NEXT: shlq $48, %rsi 1028; AVX1-NEXT: orq %r8, %rsi 1029; AVX1-NEXT: shrq $56, %rcx 1030; AVX1-NEXT: andl $15, %ecx 1031; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1032; AVX1-NEXT: shlq $56, %rcx 1033; AVX1-NEXT: orq %rsi, %rcx 1034; AVX1-NEXT: vmovq %xmm0, %rax 1035; AVX1-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) 1036; AVX1-NEXT: movl %eax, %ecx 1037; AVX1-NEXT: shrl $8, %ecx 1038; AVX1-NEXT: vmovd %eax, %xmm1 1039; AVX1-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1 1040; AVX1-NEXT: movl %eax, %ecx 1041; AVX1-NEXT: shrl $16, %ecx 1042; AVX1-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 1043; AVX1-NEXT: movl %eax, %ecx 1044; AVX1-NEXT: shrl $24, %ecx 1045; AVX1-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1 1046; AVX1-NEXT: movq %rax, %rcx 1047; AVX1-NEXT: shrq $32, %rcx 1048; AVX1-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1 1049; AVX1-NEXT: movq %rax, %rcx 1050; AVX1-NEXT: shrq $40, %rcx 1051; AVX1-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1 1052; AVX1-NEXT: movq %rax, %rcx 1053; AVX1-NEXT: shrq $48, %rcx 1054; AVX1-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1 1055; AVX1-NEXT: vpextrq $1, %xmm0, %rcx 1056; AVX1-NEXT: shrq $56, %rax 1057; AVX1-NEXT: vpinsrb $7, %eax, %xmm1, %xmm0 1058; AVX1-NEXT: movl %ecx, %eax 1059; AVX1-NEXT: shrl $8, %eax 1060; AVX1-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 1061; AVX1-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 1062; AVX1-NEXT: movl %ecx, %eax 1063; AVX1-NEXT: shrl $16, %eax 1064; AVX1-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 1065; AVX1-NEXT: movl %ecx, %eax 1066; AVX1-NEXT: shrl $24, %eax 1067; AVX1-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 1068; AVX1-NEXT: movq %rcx, %rax 1069; AVX1-NEXT: shrq $32, %rax 1070; AVX1-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 1071; AVX1-NEXT: movq %rcx, %rax 1072; AVX1-NEXT: shrq $40, %rax 1073; AVX1-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 1074; AVX1-NEXT: movq %rcx, %rax 1075; AVX1-NEXT: shrq $48, %rax 1076; AVX1-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 1077; AVX1-NEXT: shrq $56, %rcx 1078; AVX1-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0 1079; AVX1-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm1 1080; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1081; AVX1-NEXT: retq 1082; 1083; AVX2-LABEL: _clearupper32xi8b: 1084; AVX2: # %bb.0: 1085; AVX2-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) 1086; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %rax 1087; AVX2-NEXT: movq %rax, %r8 1088; AVX2-NEXT: movq %rax, %rdx 1089; AVX2-NEXT: movq %rax, %rsi 1090; AVX2-NEXT: movq %rax, %rdi 1091; AVX2-NEXT: movq %rax, %rcx 1092; AVX2-NEXT: shrq $32, %rcx 1093; AVX2-NEXT: andl $15, %ecx 1094; AVX2-NEXT: shlq $32, %rcx 1095; AVX2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 1096; AVX2-NEXT: orq %rcx, %rax 1097; AVX2-NEXT: shrq $40, %rdi 1098; AVX2-NEXT: andl $15, %edi 1099; AVX2-NEXT: shlq $40, %rdi 1100; AVX2-NEXT: orq %rax, %rdi 1101; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %rax 1102; AVX2-NEXT: shrq $48, %rsi 1103; AVX2-NEXT: andl $15, %esi 1104; AVX2-NEXT: shlq $48, %rsi 1105; AVX2-NEXT: orq %rdi, %rsi 1106; AVX2-NEXT: movq %rax, %rcx 1107; AVX2-NEXT: shrq $56, %rdx 1108; AVX2-NEXT: andl $15, %edx 1109; AVX2-NEXT: shlq $56, %rdx 1110; AVX2-NEXT: orq %rsi, %rdx 1111; AVX2-NEXT: movq %rax, %rsi 1112; AVX2-NEXT: shldq $24, %rax, %r8 1113; AVX2-NEXT: movq %rdx, -{{[0-9]+}}(%rsp) 1114; AVX2-NEXT: movq %rax, %rdx 1115; AVX2-NEXT: shrq $32, %rdx 1116; AVX2-NEXT: andl $15, %edx 1117; AVX2-NEXT: shlq $32, %rdx 1118; AVX2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 1119; AVX2-NEXT: orq %rdx, %rax 1120; AVX2-NEXT: andl $15, %r8d 1121; AVX2-NEXT: shlq $40, %r8 1122; AVX2-NEXT: orq %rax, %r8 1123; AVX2-NEXT: shrq $48, %rsi 1124; AVX2-NEXT: andl $15, %esi 1125; AVX2-NEXT: shlq $48, %rsi 1126; AVX2-NEXT: orq %r8, %rsi 1127; AVX2-NEXT: shrq $56, %rcx 1128; AVX2-NEXT: andl $15, %ecx 1129; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1130; AVX2-NEXT: shlq $56, %rcx 1131; AVX2-NEXT: orq %rsi, %rcx 1132; AVX2-NEXT: vmovq %xmm0, %rax 1133; AVX2-NEXT: movq %rcx, -{{[0-9]+}}(%rsp) 1134; AVX2-NEXT: movl %eax, %ecx 1135; AVX2-NEXT: shrl $8, %ecx 1136; AVX2-NEXT: vmovd %eax, %xmm1 1137; AVX2-NEXT: vpinsrb $1, %ecx, %xmm1, %xmm1 1138; AVX2-NEXT: movl %eax, %ecx 1139; AVX2-NEXT: shrl $16, %ecx 1140; AVX2-NEXT: vpinsrb $2, %ecx, %xmm1, %xmm1 1141; AVX2-NEXT: movl %eax, %ecx 1142; AVX2-NEXT: shrl $24, %ecx 1143; AVX2-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1 1144; AVX2-NEXT: movq %rax, %rcx 1145; AVX2-NEXT: shrq $32, %rcx 1146; AVX2-NEXT: vpinsrb $4, %ecx, %xmm1, %xmm1 1147; AVX2-NEXT: movq %rax, %rcx 1148; AVX2-NEXT: shrq $40, %rcx 1149; AVX2-NEXT: vpinsrb $5, %ecx, %xmm1, %xmm1 1150; AVX2-NEXT: movq %rax, %rcx 1151; AVX2-NEXT: shrq $48, %rcx 1152; AVX2-NEXT: vpinsrb $6, %ecx, %xmm1, %xmm1 1153; AVX2-NEXT: vpextrq $1, %xmm0, %rcx 1154; AVX2-NEXT: shrq $56, %rax 1155; AVX2-NEXT: vpinsrb $7, %eax, %xmm1, %xmm0 1156; AVX2-NEXT: movl %ecx, %eax 1157; AVX2-NEXT: shrl $8, %eax 1158; AVX2-NEXT: vpinsrb $8, %ecx, %xmm0, %xmm0 1159; AVX2-NEXT: vpinsrb $9, %eax, %xmm0, %xmm0 1160; AVX2-NEXT: movl %ecx, %eax 1161; AVX2-NEXT: shrl $16, %eax 1162; AVX2-NEXT: vpinsrb $10, %eax, %xmm0, %xmm0 1163; AVX2-NEXT: movl %ecx, %eax 1164; AVX2-NEXT: shrl $24, %eax 1165; AVX2-NEXT: vpinsrb $11, %eax, %xmm0, %xmm0 1166; AVX2-NEXT: movq %rcx, %rax 1167; AVX2-NEXT: shrq $32, %rax 1168; AVX2-NEXT: vpinsrb $12, %eax, %xmm0, %xmm0 1169; AVX2-NEXT: movq %rcx, %rax 1170; AVX2-NEXT: shrq $40, %rax 1171; AVX2-NEXT: vpinsrb $13, %eax, %xmm0, %xmm0 1172; AVX2-NEXT: movq %rcx, %rax 1173; AVX2-NEXT: shrq $48, %rax 1174; AVX2-NEXT: vpinsrb $14, %eax, %xmm0, %xmm0 1175; AVX2-NEXT: shrq $56, %rcx 1176; AVX2-NEXT: vpinsrb $15, %ecx, %xmm0, %xmm0 1177; AVX2-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm1 1178; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1179; AVX2-NEXT: retq 1180 %x4 = bitcast <32 x i8> %0 to <64 x i4> 1181 %r0 = insertelement <64 x i4> %x4, i4 zeroinitializer, i32 1 1182 %r1 = insertelement <64 x i4> %r0, i4 zeroinitializer, i32 3 1183 %r2 = insertelement <64 x i4> %r1, i4 zeroinitializer, i32 5 1184 %r3 = insertelement <64 x i4> %r2, i4 zeroinitializer, i32 7 1185 %r4 = insertelement <64 x i4> %r3, i4 zeroinitializer, i32 9 1186 %r5 = insertelement <64 x i4> %r4, i4 zeroinitializer, i32 11 1187 %r6 = insertelement <64 x i4> %r5, i4 zeroinitializer, i32 13 1188 %r7 = insertelement <64 x i4> %r6, i4 zeroinitializer, i32 15 1189 %r8 = insertelement <64 x i4> %r7, i4 zeroinitializer, i32 17 1190 %r9 = insertelement <64 x i4> %r8, i4 zeroinitializer, i32 19 1191 %r10 = insertelement <64 x i4> %r9, i4 zeroinitializer, i32 21 1192 %r11 = insertelement <64 x i4> %r10, i4 zeroinitializer, i32 23 1193 %r12 = insertelement <64 x i4> %r11, i4 zeroinitializer, i32 25 1194 %r13 = insertelement <64 x i4> %r12, i4 zeroinitializer, i32 27 1195 %r14 = insertelement <64 x i4> %r13, i4 zeroinitializer, i32 29 1196 %r15 = insertelement <64 x i4> %r14, i4 zeroinitializer, i32 31 1197 %r16 = insertelement <64 x i4> %r15, i4 zeroinitializer, i32 33 1198 %r17 = insertelement <64 x i4> %r16, i4 zeroinitializer, i32 35 1199 %r18 = insertelement <64 x i4> %r17, i4 zeroinitializer, i32 37 1200 %r19 = insertelement <64 x i4> %r18, i4 zeroinitializer, i32 39 1201 %r20 = insertelement <64 x i4> %r19, i4 zeroinitializer, i32 41 1202 %r21 = insertelement <64 x i4> %r20, i4 zeroinitializer, i32 43 1203 %r22 = insertelement <64 x i4> %r21, i4 zeroinitializer, i32 45 1204 %r23 = insertelement <64 x i4> %r22, i4 zeroinitializer, i32 47 1205 %r24 = insertelement <64 x i4> %r23, i4 zeroinitializer, i32 49 1206 %r25 = insertelement <64 x i4> %r24, i4 zeroinitializer, i32 51 1207 %r26 = insertelement <64 x i4> %r25, i4 zeroinitializer, i32 53 1208 %r27 = insertelement <64 x i4> %r26, i4 zeroinitializer, i32 55 1209 %r28 = insertelement <64 x i4> %r27, i4 zeroinitializer, i32 57 1210 %r29 = insertelement <64 x i4> %r28, i4 zeroinitializer, i32 59 1211 %r30 = insertelement <64 x i4> %r29, i4 zeroinitializer, i32 61 1212 %r31 = insertelement <64 x i4> %r30, i4 zeroinitializer, i32 63 1213 %r = bitcast <64 x i4> %r15 to <32 x i8> 1214 ret <32 x i8> %r 1215} 1216 1217define <2 x i64> @_clearupper2xi64c(<2 x i64>) nounwind { 1218; SSE2-LABEL: _clearupper2xi64c: 1219; SSE2: # %bb.0: 1220; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 1221; SSE2-NEXT: retq 1222; 1223; SSE42-LABEL: _clearupper2xi64c: 1224; SSE42: # %bb.0: 1225; SSE42-NEXT: xorps %xmm1, %xmm1 1226; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 1227; SSE42-NEXT: retq 1228; 1229; AVX-LABEL: _clearupper2xi64c: 1230; AVX: # %bb.0: 1231; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 1232; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 1233; AVX-NEXT: retq 1234 %r = and <2 x i64> <i64 4294967295, i64 4294967295>, %0 1235 ret <2 x i64> %r 1236} 1237 1238define <4 x i64> @_clearupper4xi64c(<4 x i64>) nounwind { 1239; SSE2-LABEL: _clearupper4xi64c: 1240; SSE2: # %bb.0: 1241; SSE2-NEXT: movaps {{.*#+}} xmm2 = [4294967295,0,4294967295,0] 1242; SSE2-NEXT: andps %xmm2, %xmm0 1243; SSE2-NEXT: andps %xmm2, %xmm1 1244; SSE2-NEXT: retq 1245; 1246; SSE42-LABEL: _clearupper4xi64c: 1247; SSE42: # %bb.0: 1248; SSE42-NEXT: xorps %xmm2, %xmm2 1249; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 1250; SSE42-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 1251; SSE42-NEXT: retq 1252; 1253; AVX-LABEL: _clearupper4xi64c: 1254; AVX: # %bb.0: 1255; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 1256; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1257; AVX-NEXT: retq 1258 %r = and <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>, %0 1259 ret <4 x i64> %r 1260} 1261 1262define <4 x i32> @_clearupper4xi32c(<4 x i32>) nounwind { 1263; SSE2-LABEL: _clearupper4xi32c: 1264; SSE2: # %bb.0: 1265; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 1266; SSE2-NEXT: retq 1267; 1268; SSE42-LABEL: _clearupper4xi32c: 1269; SSE42: # %bb.0: 1270; SSE42-NEXT: pxor %xmm1, %xmm1 1271; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 1272; SSE42-NEXT: retq 1273; 1274; AVX-LABEL: _clearupper4xi32c: 1275; AVX: # %bb.0: 1276; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 1277; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 1278; AVX-NEXT: retq 1279 %r = and <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>, %0 1280 ret <4 x i32> %r 1281} 1282 1283define <8 x i32> @_clearupper8xi32c(<8 x i32>) nounwind { 1284; SSE2-LABEL: _clearupper8xi32c: 1285; SSE2: # %bb.0: 1286; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0] 1287; SSE2-NEXT: andps %xmm2, %xmm0 1288; SSE2-NEXT: andps %xmm2, %xmm1 1289; SSE2-NEXT: retq 1290; 1291; SSE42-LABEL: _clearupper8xi32c: 1292; SSE42: # %bb.0: 1293; SSE42-NEXT: pxor %xmm2, %xmm2 1294; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] 1295; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] 1296; SSE42-NEXT: retq 1297; 1298; AVX1-LABEL: _clearupper8xi32c: 1299; AVX1: # %bb.0: 1300; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 1301; AVX1-NEXT: retq 1302; 1303; AVX2-LABEL: _clearupper8xi32c: 1304; AVX2: # %bb.0: 1305; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1306; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 1307; AVX2-NEXT: retq 1308 %r = and <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>, %0 1309 ret <8 x i32> %r 1310} 1311 1312define <8 x i16> @_clearupper8xi16c(<8 x i16>) nounwind { 1313; SSE-LABEL: _clearupper8xi16c: 1314; SSE: # %bb.0: 1315; SSE-NEXT: andps {{.*}}(%rip), %xmm0 1316; SSE-NEXT: retq 1317; 1318; AVX-LABEL: _clearupper8xi16c: 1319; AVX: # %bb.0: 1320; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 1321; AVX-NEXT: retq 1322 %r = and <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>, %0 1323 ret <8 x i16> %r 1324} 1325 1326define <16 x i16> @_clearupper16xi16c(<16 x i16>) nounwind { 1327; SSE-LABEL: _clearupper16xi16c: 1328; SSE: # %bb.0: 1329; SSE-NEXT: movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 1330; SSE-NEXT: andps %xmm2, %xmm0 1331; SSE-NEXT: andps %xmm2, %xmm1 1332; SSE-NEXT: retq 1333; 1334; AVX-LABEL: _clearupper16xi16c: 1335; AVX: # %bb.0: 1336; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 1337; AVX-NEXT: retq 1338 %r = and <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>, %0 1339 ret <16 x i16> %r 1340} 1341 1342define <16 x i8> @_clearupper16xi8c(<16 x i8>) nounwind { 1343; SSE-LABEL: _clearupper16xi8c: 1344; SSE: # %bb.0: 1345; SSE-NEXT: andps {{.*}}(%rip), %xmm0 1346; SSE-NEXT: retq 1347; 1348; AVX-LABEL: _clearupper16xi8c: 1349; AVX: # %bb.0: 1350; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 1351; AVX-NEXT: retq 1352 %r = and <16 x i8> <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>, %0 1353 ret <16 x i8> %r 1354} 1355 1356define <32 x i8> @_clearupper32xi8c(<32 x i8>) nounwind { 1357; SSE-LABEL: _clearupper32xi8c: 1358; SSE: # %bb.0: 1359; SSE-NEXT: movaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1360; SSE-NEXT: andps %xmm2, %xmm0 1361; SSE-NEXT: andps %xmm2, %xmm1 1362; SSE-NEXT: retq 1363; 1364; AVX-LABEL: _clearupper32xi8c: 1365; AVX: # %bb.0: 1366; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 1367; AVX-NEXT: retq 1368 %r = and <32 x i8> <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>, %0 1369 ret <32 x i8> %r 1370} 1371