1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefix=SSE --check-prefix=SSE42 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 6 7; 8; PR6455 'Clear Upper Bits' Patterns 9; 10 11define <2 x i64> @_clearupper2xi64a(<2 x i64>) nounwind { 12; SSE2-LABEL: _clearupper2xi64a: 13; SSE2: # %bb.0: 14; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 15; SSE2-NEXT: retq 16; 17; SSE42-LABEL: _clearupper2xi64a: 18; SSE42: # %bb.0: 19; SSE42-NEXT: xorps %xmm1, %xmm1 20; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 21; SSE42-NEXT: retq 22; 23; AVX-LABEL: _clearupper2xi64a: 24; AVX: # %bb.0: 25; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 26; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 27; AVX-NEXT: retq 28 %x0 = extractelement <2 x i64> %0, i32 0 29 %x1 = extractelement <2 x i64> %0, i32 1 30 %trunc0 = trunc i64 %x0 to i32 31 %trunc1 = trunc i64 %x1 to i32 32 %ext0 = zext i32 %trunc0 to i64 33 %ext1 = zext i32 %trunc1 to i64 34 %v0 = insertelement <2 x i64> undef, i64 %ext0, i32 0 35 %v1 = insertelement <2 x i64> %v0, i64 %ext1, i32 1 36 ret <2 x i64> %v1 37} 38 39define <4 x i64> @_clearupper4xi64a(<4 x i64>) nounwind { 40; SSE2-LABEL: _clearupper4xi64a: 41; SSE2: # %bb.0: 42; SSE2-NEXT: movaps {{.*#+}} xmm2 = [4294967295,4294967295] 43; SSE2-NEXT: andps %xmm2, %xmm0 44; SSE2-NEXT: andps %xmm2, %xmm1 45; SSE2-NEXT: retq 46; 47; SSE42-LABEL: _clearupper4xi64a: 48; SSE42: # %bb.0: 49; SSE42-NEXT: xorps %xmm2, %xmm2 50; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 51; SSE42-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 52; SSE42-NEXT: retq 53; 54; AVX-LABEL: _clearupper4xi64a: 55; AVX: # %bb.0: 56; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 57; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 58; AVX-NEXT: retq 59 %x0 = extractelement <4 x i64> %0, i32 0 60 %x1 = extractelement <4 x i64> %0, i32 1 61 %x2 = extractelement <4 x i64> %0, i32 2 62 %x3 = extractelement <4 x i64> %0, i32 3 63 %trunc0 = trunc i64 %x0 to i32 64 %trunc1 = trunc i64 %x1 to i32 65 %trunc2 = trunc i64 %x2 to i32 66 %trunc3 = trunc i64 %x3 to i32 67 %ext0 = zext i32 %trunc0 to i64 68 %ext1 = zext i32 %trunc1 to i64 69 %ext2 = zext i32 %trunc2 to i64 70 %ext3 = zext i32 %trunc3 to i64 71 %v0 = insertelement <4 x i64> undef, i64 %ext0, i32 0 72 %v1 = insertelement <4 x i64> %v0, i64 %ext1, i32 1 73 %v2 = insertelement <4 x i64> %v1, i64 %ext2, i32 2 74 %v3 = insertelement <4 x i64> %v2, i64 %ext3, i32 3 75 ret <4 x i64> %v3 76} 77 78define <4 x i32> @_clearupper4xi32a(<4 x i32>) nounwind { 79; SSE2-LABEL: _clearupper4xi32a: 80; SSE2: # %bb.0: 81; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 82; SSE2-NEXT: retq 83; 84; SSE42-LABEL: _clearupper4xi32a: 85; SSE42: # %bb.0: 86; SSE42-NEXT: pxor %xmm1, %xmm1 87; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 88; SSE42-NEXT: retq 89; 90; AVX-LABEL: _clearupper4xi32a: 91; AVX: # %bb.0: 92; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 93; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 94; AVX-NEXT: retq 95 %x0 = extractelement <4 x i32> %0, i32 0 96 %x1 = extractelement <4 x i32> %0, i32 1 97 %x2 = extractelement <4 x i32> %0, i32 2 98 %x3 = extractelement <4 x i32> %0, i32 3 99 %trunc0 = trunc i32 %x0 to i16 100 %trunc1 = trunc i32 %x1 to i16 101 %trunc2 = trunc i32 %x2 to i16 102 %trunc3 = trunc i32 %x3 to i16 103 %ext0 = zext i16 %trunc0 to i32 104 %ext1 = zext i16 %trunc1 to i32 105 %ext2 = zext i16 %trunc2 to i32 106 %ext3 = zext i16 %trunc3 to i32 107 %v0 = insertelement <4 x i32> undef, i32 %ext0, i32 0 108 %v1 = insertelement <4 x i32> %v0, i32 %ext1, i32 1 109 %v2 = insertelement <4 x i32> %v1, i32 %ext2, i32 2 110 %v3 = insertelement <4 x i32> %v2, i32 %ext3, i32 3 111 ret <4 x i32> %v3 112} 113 114define <8 x i32> @_clearupper8xi32a(<8 x i32>) nounwind { 115; SSE2-LABEL: _clearupper8xi32a: 116; SSE2: # %bb.0: 117; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,65535,65535,65535] 118; SSE2-NEXT: andps %xmm2, %xmm0 119; SSE2-NEXT: andps %xmm2, %xmm1 120; SSE2-NEXT: retq 121; 122; SSE42-LABEL: _clearupper8xi32a: 123; SSE42: # %bb.0: 124; SSE42-NEXT: pxor %xmm2, %xmm2 125; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] 126; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] 127; SSE42-NEXT: retq 128; 129; AVX1-LABEL: _clearupper8xi32a: 130; AVX1: # %bb.0: 131; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 132; AVX1-NEXT: retq 133; 134; AVX2-LABEL: _clearupper8xi32a: 135; AVX2: # %bb.0: 136; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 137; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 138; AVX2-NEXT: retq 139 %x0 = extractelement <8 x i32> %0, i32 0 140 %x1 = extractelement <8 x i32> %0, i32 1 141 %x2 = extractelement <8 x i32> %0, i32 2 142 %x3 = extractelement <8 x i32> %0, i32 3 143 %x4 = extractelement <8 x i32> %0, i32 4 144 %x5 = extractelement <8 x i32> %0, i32 5 145 %x6 = extractelement <8 x i32> %0, i32 6 146 %x7 = extractelement <8 x i32> %0, i32 7 147 %trunc0 = trunc i32 %x0 to i16 148 %trunc1 = trunc i32 %x1 to i16 149 %trunc2 = trunc i32 %x2 to i16 150 %trunc3 = trunc i32 %x3 to i16 151 %trunc4 = trunc i32 %x4 to i16 152 %trunc5 = trunc i32 %x5 to i16 153 %trunc6 = trunc i32 %x6 to i16 154 %trunc7 = trunc i32 %x7 to i16 155 %ext0 = zext i16 %trunc0 to i32 156 %ext1 = zext i16 %trunc1 to i32 157 %ext2 = zext i16 %trunc2 to i32 158 %ext3 = zext i16 %trunc3 to i32 159 %ext4 = zext i16 %trunc4 to i32 160 %ext5 = zext i16 %trunc5 to i32 161 %ext6 = zext i16 %trunc6 to i32 162 %ext7 = zext i16 %trunc7 to i32 163 %v0 = insertelement <8 x i32> undef, i32 %ext0, i32 0 164 %v1 = insertelement <8 x i32> %v0, i32 %ext1, i32 1 165 %v2 = insertelement <8 x i32> %v1, i32 %ext2, i32 2 166 %v3 = insertelement <8 x i32> %v2, i32 %ext3, i32 3 167 %v4 = insertelement <8 x i32> %v3, i32 %ext4, i32 4 168 %v5 = insertelement <8 x i32> %v4, i32 %ext5, i32 5 169 %v6 = insertelement <8 x i32> %v5, i32 %ext6, i32 6 170 %v7 = insertelement <8 x i32> %v6, i32 %ext7, i32 7 171 ret <8 x i32> %v7 172} 173 174define <8 x i16> @_clearupper8xi16a(<8 x i16>) nounwind { 175; SSE-LABEL: _clearupper8xi16a: 176; SSE: # %bb.0: 177; SSE-NEXT: andps {{.*}}(%rip), %xmm0 178; SSE-NEXT: retq 179; 180; AVX-LABEL: _clearupper8xi16a: 181; AVX: # %bb.0: 182; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 183; AVX-NEXT: retq 184 %x0 = extractelement <8 x i16> %0, i32 0 185 %x1 = extractelement <8 x i16> %0, i32 1 186 %x2 = extractelement <8 x i16> %0, i32 2 187 %x3 = extractelement <8 x i16> %0, i32 3 188 %x4 = extractelement <8 x i16> %0, i32 4 189 %x5 = extractelement <8 x i16> %0, i32 5 190 %x6 = extractelement <8 x i16> %0, i32 6 191 %x7 = extractelement <8 x i16> %0, i32 7 192 %trunc0 = trunc i16 %x0 to i8 193 %trunc1 = trunc i16 %x1 to i8 194 %trunc2 = trunc i16 %x2 to i8 195 %trunc3 = trunc i16 %x3 to i8 196 %trunc4 = trunc i16 %x4 to i8 197 %trunc5 = trunc i16 %x5 to i8 198 %trunc6 = trunc i16 %x6 to i8 199 %trunc7 = trunc i16 %x7 to i8 200 %ext0 = zext i8 %trunc0 to i16 201 %ext1 = zext i8 %trunc1 to i16 202 %ext2 = zext i8 %trunc2 to i16 203 %ext3 = zext i8 %trunc3 to i16 204 %ext4 = zext i8 %trunc4 to i16 205 %ext5 = zext i8 %trunc5 to i16 206 %ext6 = zext i8 %trunc6 to i16 207 %ext7 = zext i8 %trunc7 to i16 208 %v0 = insertelement <8 x i16> undef, i16 %ext0, i32 0 209 %v1 = insertelement <8 x i16> %v0, i16 %ext1, i32 1 210 %v2 = insertelement <8 x i16> %v1, i16 %ext2, i32 2 211 %v3 = insertelement <8 x i16> %v2, i16 %ext3, i32 3 212 %v4 = insertelement <8 x i16> %v3, i16 %ext4, i32 4 213 %v5 = insertelement <8 x i16> %v4, i16 %ext5, i32 5 214 %v6 = insertelement <8 x i16> %v5, i16 %ext6, i32 6 215 %v7 = insertelement <8 x i16> %v6, i16 %ext7, i32 7 216 ret <8 x i16> %v7 217} 218 219define <16 x i16> @_clearupper16xi16a(<16 x i16>) nounwind { 220; SSE-LABEL: _clearupper16xi16a: 221; SSE: # %bb.0: 222; SSE-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 223; SSE-NEXT: andps %xmm2, %xmm0 224; SSE-NEXT: andps %xmm2, %xmm1 225; SSE-NEXT: retq 226; 227; AVX-LABEL: _clearupper16xi16a: 228; AVX: # %bb.0: 229; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 230; AVX-NEXT: retq 231 %x0 = extractelement <16 x i16> %0, i32 0 232 %x1 = extractelement <16 x i16> %0, i32 1 233 %x2 = extractelement <16 x i16> %0, i32 2 234 %x3 = extractelement <16 x i16> %0, i32 3 235 %x4 = extractelement <16 x i16> %0, i32 4 236 %x5 = extractelement <16 x i16> %0, i32 5 237 %x6 = extractelement <16 x i16> %0, i32 6 238 %x7 = extractelement <16 x i16> %0, i32 7 239 %x8 = extractelement <16 x i16> %0, i32 8 240 %x9 = extractelement <16 x i16> %0, i32 9 241 %x10 = extractelement <16 x i16> %0, i32 10 242 %x11 = extractelement <16 x i16> %0, i32 11 243 %x12 = extractelement <16 x i16> %0, i32 12 244 %x13 = extractelement <16 x i16> %0, i32 13 245 %x14 = extractelement <16 x i16> %0, i32 14 246 %x15 = extractelement <16 x i16> %0, i32 15 247 %trunc0 = trunc i16 %x0 to i8 248 %trunc1 = trunc i16 %x1 to i8 249 %trunc2 = trunc i16 %x2 to i8 250 %trunc3 = trunc i16 %x3 to i8 251 %trunc4 = trunc i16 %x4 to i8 252 %trunc5 = trunc i16 %x5 to i8 253 %trunc6 = trunc i16 %x6 to i8 254 %trunc7 = trunc i16 %x7 to i8 255 %trunc8 = trunc i16 %x8 to i8 256 %trunc9 = trunc i16 %x9 to i8 257 %trunc10 = trunc i16 %x10 to i8 258 %trunc11 = trunc i16 %x11 to i8 259 %trunc12 = trunc i16 %x12 to i8 260 %trunc13 = trunc i16 %x13 to i8 261 %trunc14 = trunc i16 %x14 to i8 262 %trunc15 = trunc i16 %x15 to i8 263 %ext0 = zext i8 %trunc0 to i16 264 %ext1 = zext i8 %trunc1 to i16 265 %ext2 = zext i8 %trunc2 to i16 266 %ext3 = zext i8 %trunc3 to i16 267 %ext4 = zext i8 %trunc4 to i16 268 %ext5 = zext i8 %trunc5 to i16 269 %ext6 = zext i8 %trunc6 to i16 270 %ext7 = zext i8 %trunc7 to i16 271 %ext8 = zext i8 %trunc8 to i16 272 %ext9 = zext i8 %trunc9 to i16 273 %ext10 = zext i8 %trunc10 to i16 274 %ext11 = zext i8 %trunc11 to i16 275 %ext12 = zext i8 %trunc12 to i16 276 %ext13 = zext i8 %trunc13 to i16 277 %ext14 = zext i8 %trunc14 to i16 278 %ext15 = zext i8 %trunc15 to i16 279 %v0 = insertelement <16 x i16> undef, i16 %ext0, i32 0 280 %v1 = insertelement <16 x i16> %v0, i16 %ext1, i32 1 281 %v2 = insertelement <16 x i16> %v1, i16 %ext2, i32 2 282 %v3 = insertelement <16 x i16> %v2, i16 %ext3, i32 3 283 %v4 = insertelement <16 x i16> %v3, i16 %ext4, i32 4 284 %v5 = insertelement <16 x i16> %v4, i16 %ext5, i32 5 285 %v6 = insertelement <16 x i16> %v5, i16 %ext6, i32 6 286 %v7 = insertelement <16 x i16> %v6, i16 %ext7, i32 7 287 %v8 = insertelement <16 x i16> %v7, i16 %ext8, i32 8 288 %v9 = insertelement <16 x i16> %v8, i16 %ext9, i32 9 289 %v10 = insertelement <16 x i16> %v9, i16 %ext10, i32 10 290 %v11 = insertelement <16 x i16> %v10, i16 %ext11, i32 11 291 %v12 = insertelement <16 x i16> %v11, i16 %ext12, i32 12 292 %v13 = insertelement <16 x i16> %v12, i16 %ext13, i32 13 293 %v14 = insertelement <16 x i16> %v13, i16 %ext14, i32 14 294 %v15 = insertelement <16 x i16> %v14, i16 %ext15, i32 15 295 ret <16 x i16> %v15 296} 297 298define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind { 299; SSE2-LABEL: _clearupper16xi8a: 300; SSE2: # %bb.0: 301; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 302; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 303; SSE2-NEXT: movd %eax, %xmm0 304; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 305; SSE2-NEXT: movd %eax, %xmm1 306; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 307; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 308; SSE2-NEXT: movd %eax, %xmm0 309; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 310; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 311; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 312; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 313; SSE2-NEXT: movd %eax, %xmm0 314; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 315; SSE2-NEXT: movd %eax, %xmm3 316; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 317; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 318; SSE2-NEXT: movd %eax, %xmm0 319; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 320; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 321; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 322; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 323; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 324; SSE2-NEXT: movd %eax, %xmm0 325; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 326; SSE2-NEXT: movd %eax, %xmm2 327; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 328; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 329; SSE2-NEXT: movd %eax, %xmm0 330; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 331; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 332; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 333; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 334; SSE2-NEXT: movd %eax, %xmm0 335; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 336; SSE2-NEXT: movd %eax, %xmm2 337; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 338; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 339; SSE2-NEXT: movd %eax, %xmm4 340; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 341; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 342; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 343; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 344; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 345; SSE2-NEXT: pand {{.*}}(%rip), %xmm0 346; SSE2-NEXT: retq 347; 348; SSE42-LABEL: _clearupper16xi8a: 349; SSE42: # %bb.0: 350; SSE42-NEXT: andps {{.*}}(%rip), %xmm0 351; SSE42-NEXT: retq 352; 353; AVX-LABEL: _clearupper16xi8a: 354; AVX: # %bb.0: 355; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 356; AVX-NEXT: retq 357 %x0 = extractelement <16 x i8> %0, i32 0 358 %x1 = extractelement <16 x i8> %0, i32 1 359 %x2 = extractelement <16 x i8> %0, i32 2 360 %x3 = extractelement <16 x i8> %0, i32 3 361 %x4 = extractelement <16 x i8> %0, i32 4 362 %x5 = extractelement <16 x i8> %0, i32 5 363 %x6 = extractelement <16 x i8> %0, i32 6 364 %x7 = extractelement <16 x i8> %0, i32 7 365 %x8 = extractelement <16 x i8> %0, i32 8 366 %x9 = extractelement <16 x i8> %0, i32 9 367 %x10 = extractelement <16 x i8> %0, i32 10 368 %x11 = extractelement <16 x i8> %0, i32 11 369 %x12 = extractelement <16 x i8> %0, i32 12 370 %x13 = extractelement <16 x i8> %0, i32 13 371 %x14 = extractelement <16 x i8> %0, i32 14 372 %x15 = extractelement <16 x i8> %0, i32 15 373 %trunc0 = trunc i8 %x0 to i4 374 %trunc1 = trunc i8 %x1 to i4 375 %trunc2 = trunc i8 %x2 to i4 376 %trunc3 = trunc i8 %x3 to i4 377 %trunc4 = trunc i8 %x4 to i4 378 %trunc5 = trunc i8 %x5 to i4 379 %trunc6 = trunc i8 %x6 to i4 380 %trunc7 = trunc i8 %x7 to i4 381 %trunc8 = trunc i8 %x8 to i4 382 %trunc9 = trunc i8 %x9 to i4 383 %trunc10 = trunc i8 %x10 to i4 384 %trunc11 = trunc i8 %x11 to i4 385 %trunc12 = trunc i8 %x12 to i4 386 %trunc13 = trunc i8 %x13 to i4 387 %trunc14 = trunc i8 %x14 to i4 388 %trunc15 = trunc i8 %x15 to i4 389 %ext0 = zext i4 %trunc0 to i8 390 %ext1 = zext i4 %trunc1 to i8 391 %ext2 = zext i4 %trunc2 to i8 392 %ext3 = zext i4 %trunc3 to i8 393 %ext4 = zext i4 %trunc4 to i8 394 %ext5 = zext i4 %trunc5 to i8 395 %ext6 = zext i4 %trunc6 to i8 396 %ext7 = zext i4 %trunc7 to i8 397 %ext8 = zext i4 %trunc8 to i8 398 %ext9 = zext i4 %trunc9 to i8 399 %ext10 = zext i4 %trunc10 to i8 400 %ext11 = zext i4 %trunc11 to i8 401 %ext12 = zext i4 %trunc12 to i8 402 %ext13 = zext i4 %trunc13 to i8 403 %ext14 = zext i4 %trunc14 to i8 404 %ext15 = zext i4 %trunc15 to i8 405 %v0 = insertelement <16 x i8> undef, i8 %ext0, i32 0 406 %v1 = insertelement <16 x i8> %v0, i8 %ext1, i32 1 407 %v2 = insertelement <16 x i8> %v1, i8 %ext2, i32 2 408 %v3 = insertelement <16 x i8> %v2, i8 %ext3, i32 3 409 %v4 = insertelement <16 x i8> %v3, i8 %ext4, i32 4 410 %v5 = insertelement <16 x i8> %v4, i8 %ext5, i32 5 411 %v6 = insertelement <16 x i8> %v5, i8 %ext6, i32 6 412 %v7 = insertelement <16 x i8> %v6, i8 %ext7, i32 7 413 %v8 = insertelement <16 x i8> %v7, i8 %ext8, i32 8 414 %v9 = insertelement <16 x i8> %v8, i8 %ext9, i32 9 415 %v10 = insertelement <16 x i8> %v9, i8 %ext10, i32 10 416 %v11 = insertelement <16 x i8> %v10, i8 %ext11, i32 11 417 %v12 = insertelement <16 x i8> %v11, i8 %ext12, i32 12 418 %v13 = insertelement <16 x i8> %v12, i8 %ext13, i32 13 419 %v14 = insertelement <16 x i8> %v13, i8 %ext14, i32 14 420 %v15 = insertelement <16 x i8> %v14, i8 %ext15, i32 15 421 ret <16 x i8> %v15 422} 423 424define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind { 425; SSE2-LABEL: _clearupper32xi8a: 426; SSE2: # %bb.0: 427; SSE2-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 428; SSE2-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 429; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 430; SSE2-NEXT: movd %eax, %xmm0 431; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 432; SSE2-NEXT: movd %eax, %xmm1 433; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 434; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 435; SSE2-NEXT: movd %eax, %xmm0 436; SSE2-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 437; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 438; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 439; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 440; SSE2-NEXT: movd %eax, %xmm0 441; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 442; SSE2-NEXT: movd %eax, %xmm3 443; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 444; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 445; SSE2-NEXT: movd %eax, %xmm0 446; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 447; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 448; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 449; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 450; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 451; SSE2-NEXT: movd %eax, %xmm0 452; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 453; SSE2-NEXT: movd %eax, %xmm2 454; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 455; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 456; SSE2-NEXT: movd %eax, %xmm0 457; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 458; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 459; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 460; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 461; SSE2-NEXT: movd %eax, %xmm0 462; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 463; SSE2-NEXT: movd %eax, %xmm2 464; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 465; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 466; SSE2-NEXT: movd %eax, %xmm4 467; SSE2-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 468; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 469; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 470; SSE2-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 471; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 472; SSE2-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 473; SSE2-NEXT: pand %xmm2, %xmm0 474; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 475; SSE2-NEXT: movd %eax, %xmm1 476; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 477; SSE2-NEXT: movd %eax, %xmm3 478; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 479; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 480; SSE2-NEXT: movd %eax, %xmm1 481; SSE2-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero 482; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] 483; SSE2-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] 484; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 485; SSE2-NEXT: movd %eax, %xmm1 486; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 487; SSE2-NEXT: movd %eax, %xmm5 488; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7] 489; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 490; SSE2-NEXT: movd %eax, %xmm1 491; SSE2-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 492; SSE2-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 493; SSE2-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3] 494; SSE2-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 495; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 496; SSE2-NEXT: movd %eax, %xmm1 497; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 498; SSE2-NEXT: movd %eax, %xmm4 499; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] 500; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 501; SSE2-NEXT: movd %eax, %xmm1 502; SSE2-NEXT: movd {{.*#+}} xmm5 = mem[0],zero,zero,zero 503; SSE2-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7] 504; SSE2-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3] 505; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 506; SSE2-NEXT: movd %eax, %xmm1 507; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 508; SSE2-NEXT: movd %eax, %xmm4 509; SSE2-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] 510; SSE2-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 511; SSE2-NEXT: movd %eax, %xmm6 512; SSE2-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 513; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7] 514; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 515; SSE2-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1] 516; SSE2-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 517; SSE2-NEXT: pand %xmm2, %xmm1 518; SSE2-NEXT: retq 519; 520; SSE42-LABEL: _clearupper32xi8a: 521; SSE42: # %bb.0: 522; SSE42-NEXT: movaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 523; SSE42-NEXT: andps %xmm2, %xmm0 524; SSE42-NEXT: andps %xmm2, %xmm1 525; SSE42-NEXT: retq 526; 527; AVX-LABEL: _clearupper32xi8a: 528; AVX: # %bb.0: 529; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 530; AVX-NEXT: retq 531 %x0 = extractelement <32 x i8> %0, i32 0 532 %x1 = extractelement <32 x i8> %0, i32 1 533 %x2 = extractelement <32 x i8> %0, i32 2 534 %x3 = extractelement <32 x i8> %0, i32 3 535 %x4 = extractelement <32 x i8> %0, i32 4 536 %x5 = extractelement <32 x i8> %0, i32 5 537 %x6 = extractelement <32 x i8> %0, i32 6 538 %x7 = extractelement <32 x i8> %0, i32 7 539 %x8 = extractelement <32 x i8> %0, i32 8 540 %x9 = extractelement <32 x i8> %0, i32 9 541 %x10 = extractelement <32 x i8> %0, i32 10 542 %x11 = extractelement <32 x i8> %0, i32 11 543 %x12 = extractelement <32 x i8> %0, i32 12 544 %x13 = extractelement <32 x i8> %0, i32 13 545 %x14 = extractelement <32 x i8> %0, i32 14 546 %x15 = extractelement <32 x i8> %0, i32 15 547 %x16 = extractelement <32 x i8> %0, i32 16 548 %x17 = extractelement <32 x i8> %0, i32 17 549 %x18 = extractelement <32 x i8> %0, i32 18 550 %x19 = extractelement <32 x i8> %0, i32 19 551 %x20 = extractelement <32 x i8> %0, i32 20 552 %x21 = extractelement <32 x i8> %0, i32 21 553 %x22 = extractelement <32 x i8> %0, i32 22 554 %x23 = extractelement <32 x i8> %0, i32 23 555 %x24 = extractelement <32 x i8> %0, i32 24 556 %x25 = extractelement <32 x i8> %0, i32 25 557 %x26 = extractelement <32 x i8> %0, i32 26 558 %x27 = extractelement <32 x i8> %0, i32 27 559 %x28 = extractelement <32 x i8> %0, i32 28 560 %x29 = extractelement <32 x i8> %0, i32 29 561 %x30 = extractelement <32 x i8> %0, i32 30 562 %x31 = extractelement <32 x i8> %0, i32 31 563 %trunc0 = trunc i8 %x0 to i4 564 %trunc1 = trunc i8 %x1 to i4 565 %trunc2 = trunc i8 %x2 to i4 566 %trunc3 = trunc i8 %x3 to i4 567 %trunc4 = trunc i8 %x4 to i4 568 %trunc5 = trunc i8 %x5 to i4 569 %trunc6 = trunc i8 %x6 to i4 570 %trunc7 = trunc i8 %x7 to i4 571 %trunc8 = trunc i8 %x8 to i4 572 %trunc9 = trunc i8 %x9 to i4 573 %trunc10 = trunc i8 %x10 to i4 574 %trunc11 = trunc i8 %x11 to i4 575 %trunc12 = trunc i8 %x12 to i4 576 %trunc13 = trunc i8 %x13 to i4 577 %trunc14 = trunc i8 %x14 to i4 578 %trunc15 = trunc i8 %x15 to i4 579 %trunc16 = trunc i8 %x16 to i4 580 %trunc17 = trunc i8 %x17 to i4 581 %trunc18 = trunc i8 %x18 to i4 582 %trunc19 = trunc i8 %x19 to i4 583 %trunc20 = trunc i8 %x20 to i4 584 %trunc21 = trunc i8 %x21 to i4 585 %trunc22 = trunc i8 %x22 to i4 586 %trunc23 = trunc i8 %x23 to i4 587 %trunc24 = trunc i8 %x24 to i4 588 %trunc25 = trunc i8 %x25 to i4 589 %trunc26 = trunc i8 %x26 to i4 590 %trunc27 = trunc i8 %x27 to i4 591 %trunc28 = trunc i8 %x28 to i4 592 %trunc29 = trunc i8 %x29 to i4 593 %trunc30 = trunc i8 %x30 to i4 594 %trunc31 = trunc i8 %x31 to i4 595 %ext0 = zext i4 %trunc0 to i8 596 %ext1 = zext i4 %trunc1 to i8 597 %ext2 = zext i4 %trunc2 to i8 598 %ext3 = zext i4 %trunc3 to i8 599 %ext4 = zext i4 %trunc4 to i8 600 %ext5 = zext i4 %trunc5 to i8 601 %ext6 = zext i4 %trunc6 to i8 602 %ext7 = zext i4 %trunc7 to i8 603 %ext8 = zext i4 %trunc8 to i8 604 %ext9 = zext i4 %trunc9 to i8 605 %ext10 = zext i4 %trunc10 to i8 606 %ext11 = zext i4 %trunc11 to i8 607 %ext12 = zext i4 %trunc12 to i8 608 %ext13 = zext i4 %trunc13 to i8 609 %ext14 = zext i4 %trunc14 to i8 610 %ext15 = zext i4 %trunc15 to i8 611 %ext16 = zext i4 %trunc16 to i8 612 %ext17 = zext i4 %trunc17 to i8 613 %ext18 = zext i4 %trunc18 to i8 614 %ext19 = zext i4 %trunc19 to i8 615 %ext20 = zext i4 %trunc20 to i8 616 %ext21 = zext i4 %trunc21 to i8 617 %ext22 = zext i4 %trunc22 to i8 618 %ext23 = zext i4 %trunc23 to i8 619 %ext24 = zext i4 %trunc24 to i8 620 %ext25 = zext i4 %trunc25 to i8 621 %ext26 = zext i4 %trunc26 to i8 622 %ext27 = zext i4 %trunc27 to i8 623 %ext28 = zext i4 %trunc28 to i8 624 %ext29 = zext i4 %trunc29 to i8 625 %ext30 = zext i4 %trunc30 to i8 626 %ext31 = zext i4 %trunc31 to i8 627 %v0 = insertelement <32 x i8> undef, i8 %ext0, i32 0 628 %v1 = insertelement <32 x i8> %v0, i8 %ext1, i32 1 629 %v2 = insertelement <32 x i8> %v1, i8 %ext2, i32 2 630 %v3 = insertelement <32 x i8> %v2, i8 %ext3, i32 3 631 %v4 = insertelement <32 x i8> %v3, i8 %ext4, i32 4 632 %v5 = insertelement <32 x i8> %v4, i8 %ext5, i32 5 633 %v6 = insertelement <32 x i8> %v5, i8 %ext6, i32 6 634 %v7 = insertelement <32 x i8> %v6, i8 %ext7, i32 7 635 %v8 = insertelement <32 x i8> %v7, i8 %ext8, i32 8 636 %v9 = insertelement <32 x i8> %v8, i8 %ext9, i32 9 637 %v10 = insertelement <32 x i8> %v9, i8 %ext10, i32 10 638 %v11 = insertelement <32 x i8> %v10, i8 %ext11, i32 11 639 %v12 = insertelement <32 x i8> %v11, i8 %ext12, i32 12 640 %v13 = insertelement <32 x i8> %v12, i8 %ext13, i32 13 641 %v14 = insertelement <32 x i8> %v13, i8 %ext14, i32 14 642 %v15 = insertelement <32 x i8> %v14, i8 %ext15, i32 15 643 %v16 = insertelement <32 x i8> %v15, i8 %ext16, i32 16 644 %v17 = insertelement <32 x i8> %v16, i8 %ext17, i32 17 645 %v18 = insertelement <32 x i8> %v17, i8 %ext18, i32 18 646 %v19 = insertelement <32 x i8> %v18, i8 %ext19, i32 19 647 %v20 = insertelement <32 x i8> %v19, i8 %ext20, i32 20 648 %v21 = insertelement <32 x i8> %v20, i8 %ext21, i32 21 649 %v22 = insertelement <32 x i8> %v21, i8 %ext22, i32 22 650 %v23 = insertelement <32 x i8> %v22, i8 %ext23, i32 23 651 %v24 = insertelement <32 x i8> %v23, i8 %ext24, i32 24 652 %v25 = insertelement <32 x i8> %v24, i8 %ext25, i32 25 653 %v26 = insertelement <32 x i8> %v25, i8 %ext26, i32 26 654 %v27 = insertelement <32 x i8> %v26, i8 %ext27, i32 27 655 %v28 = insertelement <32 x i8> %v27, i8 %ext28, i32 28 656 %v29 = insertelement <32 x i8> %v28, i8 %ext29, i32 29 657 %v30 = insertelement <32 x i8> %v29, i8 %ext30, i32 30 658 %v31 = insertelement <32 x i8> %v30, i8 %ext31, i32 31 659 ret <32 x i8> %v31 660} 661 662define <2 x i64> @_clearupper2xi64b(<2 x i64>) nounwind { 663; SSE2-LABEL: _clearupper2xi64b: 664; SSE2: # %bb.0: 665; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 666; SSE2-NEXT: retq 667; 668; SSE42-LABEL: _clearupper2xi64b: 669; SSE42: # %bb.0: 670; SSE42-NEXT: xorps %xmm1, %xmm1 671; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 672; SSE42-NEXT: retq 673; 674; AVX-LABEL: _clearupper2xi64b: 675; AVX: # %bb.0: 676; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 677; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 678; AVX-NEXT: retq 679 %x32 = bitcast <2 x i64> %0 to <4 x i32> 680 %r0 = insertelement <4 x i32> %x32, i32 zeroinitializer, i32 1 681 %r1 = insertelement <4 x i32> %r0, i32 zeroinitializer, i32 3 682 %r = bitcast <4 x i32> %r1 to <2 x i64> 683 ret <2 x i64> %r 684} 685 686define <4 x i64> @_clearupper4xi64b(<4 x i64>) nounwind { 687; SSE2-LABEL: _clearupper4xi64b: 688; SSE2: # %bb.0: 689; SSE2-NEXT: movaps {{.*#+}} xmm2 690; SSE2-NEXT: andps %xmm2, %xmm0 691; SSE2-NEXT: andps %xmm2, %xmm1 692; SSE2-NEXT: retq 693; 694; SSE42-LABEL: _clearupper4xi64b: 695; SSE42: # %bb.0: 696; SSE42-NEXT: xorps %xmm2, %xmm2 697; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 698; SSE42-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 699; SSE42-NEXT: retq 700; 701; AVX-LABEL: _clearupper4xi64b: 702; AVX: # %bb.0: 703; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 704; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 705; AVX-NEXT: retq 706 %x32 = bitcast <4 x i64> %0 to <8 x i32> 707 %r0 = insertelement <8 x i32> %x32, i32 zeroinitializer, i32 1 708 %r1 = insertelement <8 x i32> %r0, i32 zeroinitializer, i32 3 709 %r2 = insertelement <8 x i32> %r1, i32 zeroinitializer, i32 5 710 %r3 = insertelement <8 x i32> %r2, i32 zeroinitializer, i32 7 711 %r = bitcast <8 x i32> %r3 to <4 x i64> 712 ret <4 x i64> %r 713} 714 715define <4 x i32> @_clearupper4xi32b(<4 x i32>) nounwind { 716; SSE2-LABEL: _clearupper4xi32b: 717; SSE2: # %bb.0: 718; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 719; SSE2-NEXT: retq 720; 721; SSE42-LABEL: _clearupper4xi32b: 722; SSE42: # %bb.0: 723; SSE42-NEXT: pxor %xmm1, %xmm1 724; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 725; SSE42-NEXT: retq 726; 727; AVX-LABEL: _clearupper4xi32b: 728; AVX: # %bb.0: 729; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 730; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 731; AVX-NEXT: retq 732 %x16 = bitcast <4 x i32> %0 to <8 x i16> 733 %r0 = insertelement <8 x i16> %x16, i16 zeroinitializer, i32 1 734 %r1 = insertelement <8 x i16> %r0, i16 zeroinitializer, i32 3 735 %r2 = insertelement <8 x i16> %r1, i16 zeroinitializer, i32 5 736 %r3 = insertelement <8 x i16> %r2, i16 zeroinitializer, i32 7 737 %r = bitcast <8 x i16> %r3 to <4 x i32> 738 ret <4 x i32> %r 739} 740 741define <8 x i32> @_clearupper8xi32b(<8 x i32>) nounwind { 742; SSE2-LABEL: _clearupper8xi32b: 743; SSE2: # %bb.0: 744; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0] 745; SSE2-NEXT: andps %xmm2, %xmm0 746; SSE2-NEXT: andps %xmm2, %xmm1 747; SSE2-NEXT: retq 748; 749; SSE42-LABEL: _clearupper8xi32b: 750; SSE42: # %bb.0: 751; SSE42-NEXT: pxor %xmm2, %xmm2 752; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] 753; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] 754; SSE42-NEXT: retq 755; 756; AVX1-LABEL: _clearupper8xi32b: 757; AVX1: # %bb.0: 758; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 759; AVX1-NEXT: retq 760; 761; AVX2-LABEL: _clearupper8xi32b: 762; AVX2: # %bb.0: 763; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 764; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 765; AVX2-NEXT: retq 766 %x16 = bitcast <8 x i32> %0 to <16 x i16> 767 %r0 = insertelement <16 x i16> %x16, i16 zeroinitializer, i32 1 768 %r1 = insertelement <16 x i16> %r0, i16 zeroinitializer, i32 3 769 %r2 = insertelement <16 x i16> %r1, i16 zeroinitializer, i32 5 770 %r3 = insertelement <16 x i16> %r2, i16 zeroinitializer, i32 7 771 %r4 = insertelement <16 x i16> %r3, i16 zeroinitializer, i32 9 772 %r5 = insertelement <16 x i16> %r4, i16 zeroinitializer, i32 11 773 %r6 = insertelement <16 x i16> %r5, i16 zeroinitializer, i32 13 774 %r7 = insertelement <16 x i16> %r6, i16 zeroinitializer, i32 15 775 %r = bitcast <16 x i16> %r7 to <8 x i32> 776 ret <8 x i32> %r 777} 778 779define <8 x i16> @_clearupper8xi16b(<8 x i16>) nounwind { 780; SSE-LABEL: _clearupper8xi16b: 781; SSE: # %bb.0: 782; SSE-NEXT: andps {{.*}}(%rip), %xmm0 783; SSE-NEXT: retq 784; 785; AVX-LABEL: _clearupper8xi16b: 786; AVX: # %bb.0: 787; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 788; AVX-NEXT: retq 789 %x8 = bitcast <8 x i16> %0 to <16 x i8> 790 %r0 = insertelement <16 x i8> %x8, i8 zeroinitializer, i32 1 791 %r1 = insertelement <16 x i8> %r0, i8 zeroinitializer, i32 3 792 %r2 = insertelement <16 x i8> %r1, i8 zeroinitializer, i32 5 793 %r3 = insertelement <16 x i8> %r2, i8 zeroinitializer, i32 7 794 %r4 = insertelement <16 x i8> %r3, i8 zeroinitializer, i32 9 795 %r5 = insertelement <16 x i8> %r4, i8 zeroinitializer, i32 11 796 %r6 = insertelement <16 x i8> %r5, i8 zeroinitializer, i32 13 797 %r7 = insertelement <16 x i8> %r6, i8 zeroinitializer, i32 15 798 %r = bitcast <16 x i8> %r7 to <8 x i16> 799 ret <8 x i16> %r 800} 801 802define <16 x i16> @_clearupper16xi16b(<16 x i16>) nounwind { 803; SSE-LABEL: _clearupper16xi16b: 804; SSE: # %bb.0: 805; SSE-NEXT: movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 806; SSE-NEXT: andps %xmm2, %xmm0 807; SSE-NEXT: andps %xmm2, %xmm1 808; SSE-NEXT: retq 809; 810; AVX-LABEL: _clearupper16xi16b: 811; AVX: # %bb.0: 812; AVX-NEXT: vmovaps {{.*#+}} xmm1 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 813; AVX-NEXT: vandps %xmm1, %xmm0, %xmm2 814; AVX-NEXT: vextractf128 $1, %ymm0, %xmm0 815; AVX-NEXT: vandps %xmm1, %xmm0, %xmm0 816; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 817; AVX-NEXT: retq 818 %x8 = bitcast <16 x i16> %0 to <32 x i8> 819 %r0 = insertelement <32 x i8> %x8, i8 zeroinitializer, i32 1 820 %r1 = insertelement <32 x i8> %r0, i8 zeroinitializer, i32 3 821 %r2 = insertelement <32 x i8> %r1, i8 zeroinitializer, i32 5 822 %r3 = insertelement <32 x i8> %r2, i8 zeroinitializer, i32 7 823 %r4 = insertelement <32 x i8> %r3, i8 zeroinitializer, i32 9 824 %r5 = insertelement <32 x i8> %r4, i8 zeroinitializer, i32 11 825 %r6 = insertelement <32 x i8> %r5, i8 zeroinitializer, i32 13 826 %r7 = insertelement <32 x i8> %r6, i8 zeroinitializer, i32 15 827 %r8 = insertelement <32 x i8> %r7, i8 zeroinitializer, i32 17 828 %r9 = insertelement <32 x i8> %r8, i8 zeroinitializer, i32 19 829 %r10 = insertelement <32 x i8> %r9, i8 zeroinitializer, i32 21 830 %r11 = insertelement <32 x i8> %r10, i8 zeroinitializer, i32 23 831 %r12 = insertelement <32 x i8> %r11, i8 zeroinitializer, i32 25 832 %r13 = insertelement <32 x i8> %r12, i8 zeroinitializer, i32 27 833 %r14 = insertelement <32 x i8> %r13, i8 zeroinitializer, i32 29 834 %r15 = insertelement <32 x i8> %r14, i8 zeroinitializer, i32 31 835 %r = bitcast <32 x i8> %r15 to <16 x i16> 836 ret <16 x i16> %r 837} 838 839define <16 x i8> @_clearupper16xi8b(<16 x i8>) nounwind { 840; SSE2-LABEL: _clearupper16xi8b: 841; SSE2: # %bb.0: 842; SSE2-NEXT: pushq %rbx 843; SSE2-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 844; SSE2-NEXT: movq %xmm1, %r10 845; SSE2-NEXT: movq %r10, %r8 846; SSE2-NEXT: shrq $56, %r8 847; SSE2-NEXT: andl $15, %r8d 848; SSE2-NEXT: movq %r10, %r9 849; SSE2-NEXT: shrq $48, %r9 850; SSE2-NEXT: andl $15, %r9d 851; SSE2-NEXT: movq %r10, %rsi 852; SSE2-NEXT: shrq $40, %rsi 853; SSE2-NEXT: andl $15, %esi 854; SSE2-NEXT: movq %r10, %r11 855; SSE2-NEXT: shrq $32, %r11 856; SSE2-NEXT: andl $15, %r11d 857; SSE2-NEXT: movq %xmm0, %rax 858; SSE2-NEXT: movq %rax, %rdx 859; SSE2-NEXT: shrq $56, %rdx 860; SSE2-NEXT: andl $15, %edx 861; SSE2-NEXT: movq %rax, %rcx 862; SSE2-NEXT: shrq $48, %rcx 863; SSE2-NEXT: andl $15, %ecx 864; SSE2-NEXT: movq %rax, %rdi 865; SSE2-NEXT: shrq $40, %rdi 866; SSE2-NEXT: andl $15, %edi 867; SSE2-NEXT: movq %rax, %rbx 868; SSE2-NEXT: shrq $32, %rbx 869; SSE2-NEXT: andl $15, %ebx 870; SSE2-NEXT: shlq $32, %rbx 871; SSE2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 872; SSE2-NEXT: orq %rbx, %rax 873; SSE2-NEXT: shlq $40, %rdi 874; SSE2-NEXT: orq %rax, %rdi 875; SSE2-NEXT: shlq $48, %rcx 876; SSE2-NEXT: orq %rdi, %rcx 877; SSE2-NEXT: shlq $56, %rdx 878; SSE2-NEXT: orq %rcx, %rdx 879; SSE2-NEXT: shlq $32, %r11 880; SSE2-NEXT: andl $252645135, %r10d # imm = 0xF0F0F0F 881; SSE2-NEXT: orq %r11, %r10 882; SSE2-NEXT: shlq $40, %rsi 883; SSE2-NEXT: orq %r10, %rsi 884; SSE2-NEXT: shlq $48, %r9 885; SSE2-NEXT: orq %rsi, %r9 886; SSE2-NEXT: shlq $56, %r8 887; SSE2-NEXT: orq %r9, %r8 888; SSE2-NEXT: movq %rdx, %xmm0 889; SSE2-NEXT: movq %r8, %xmm1 890; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 891; SSE2-NEXT: popq %rbx 892; SSE2-NEXT: retq 893; 894; SSE42-LABEL: _clearupper16xi8b: 895; SSE42: # %bb.0: 896; SSE42-NEXT: pushq %rbx 897; SSE42-NEXT: pextrq $1, %xmm0, %r10 898; SSE42-NEXT: movq %r10, %r8 899; SSE42-NEXT: shrq $56, %r8 900; SSE42-NEXT: andl $15, %r8d 901; SSE42-NEXT: movq %r10, %r9 902; SSE42-NEXT: shrq $48, %r9 903; SSE42-NEXT: andl $15, %r9d 904; SSE42-NEXT: movq %r10, %rsi 905; SSE42-NEXT: shrq $40, %rsi 906; SSE42-NEXT: andl $15, %esi 907; SSE42-NEXT: movq %r10, %r11 908; SSE42-NEXT: shrq $32, %r11 909; SSE42-NEXT: andl $15, %r11d 910; SSE42-NEXT: movq %xmm0, %rax 911; SSE42-NEXT: movq %rax, %rdx 912; SSE42-NEXT: shrq $56, %rdx 913; SSE42-NEXT: andl $15, %edx 914; SSE42-NEXT: movq %rax, %rcx 915; SSE42-NEXT: shrq $48, %rcx 916; SSE42-NEXT: andl $15, %ecx 917; SSE42-NEXT: movq %rax, %rdi 918; SSE42-NEXT: shrq $40, %rdi 919; SSE42-NEXT: andl $15, %edi 920; SSE42-NEXT: movq %rax, %rbx 921; SSE42-NEXT: shrq $32, %rbx 922; SSE42-NEXT: andl $15, %ebx 923; SSE42-NEXT: shlq $32, %rbx 924; SSE42-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 925; SSE42-NEXT: orq %rbx, %rax 926; SSE42-NEXT: shlq $40, %rdi 927; SSE42-NEXT: orq %rax, %rdi 928; SSE42-NEXT: shlq $48, %rcx 929; SSE42-NEXT: orq %rdi, %rcx 930; SSE42-NEXT: shlq $56, %rdx 931; SSE42-NEXT: orq %rcx, %rdx 932; SSE42-NEXT: shlq $32, %r11 933; SSE42-NEXT: andl $252645135, %r10d # imm = 0xF0F0F0F 934; SSE42-NEXT: orq %r11, %r10 935; SSE42-NEXT: shlq $40, %rsi 936; SSE42-NEXT: orq %r10, %rsi 937; SSE42-NEXT: shlq $48, %r9 938; SSE42-NEXT: orq %rsi, %r9 939; SSE42-NEXT: shlq $56, %r8 940; SSE42-NEXT: orq %r9, %r8 941; SSE42-NEXT: movq %r8, %xmm1 942; SSE42-NEXT: movq %rdx, %xmm0 943; SSE42-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 944; SSE42-NEXT: popq %rbx 945; SSE42-NEXT: retq 946; 947; AVX-LABEL: _clearupper16xi8b: 948; AVX: # %bb.0: 949; AVX-NEXT: pushq %rbx 950; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 951; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %r9 952; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rdx 953; AVX-NEXT: movq %r9, %r8 954; AVX-NEXT: shrq $56, %r8 955; AVX-NEXT: andl $15, %r8d 956; AVX-NEXT: movq %r9, %r10 957; AVX-NEXT: shrq $48, %r10 958; AVX-NEXT: andl $15, %r10d 959; AVX-NEXT: movq %r9, %rsi 960; AVX-NEXT: shrq $40, %rsi 961; AVX-NEXT: andl $15, %esi 962; AVX-NEXT: movq %r9, %r11 963; AVX-NEXT: shrq $32, %r11 964; AVX-NEXT: andl $15, %r11d 965; AVX-NEXT: movq %rdx, %rdi 966; AVX-NEXT: shrq $56, %rdi 967; AVX-NEXT: andl $15, %edi 968; AVX-NEXT: movq %rdx, %rax 969; AVX-NEXT: shrq $48, %rax 970; AVX-NEXT: andl $15, %eax 971; AVX-NEXT: movq %rdx, %rcx 972; AVX-NEXT: shrq $40, %rcx 973; AVX-NEXT: andl $15, %ecx 974; AVX-NEXT: movq %rdx, %rbx 975; AVX-NEXT: shrq $32, %rbx 976; AVX-NEXT: andl $15, %ebx 977; AVX-NEXT: shlq $32, %rbx 978; AVX-NEXT: andl $252645135, %edx # imm = 0xF0F0F0F 979; AVX-NEXT: orq %rbx, %rdx 980; AVX-NEXT: shlq $40, %rcx 981; AVX-NEXT: orq %rdx, %rcx 982; AVX-NEXT: shlq $48, %rax 983; AVX-NEXT: orq %rcx, %rax 984; AVX-NEXT: shlq $56, %rdi 985; AVX-NEXT: orq %rax, %rdi 986; AVX-NEXT: movq %rdi, -{{[0-9]+}}(%rsp) 987; AVX-NEXT: shlq $32, %r11 988; AVX-NEXT: andl $252645135, %r9d # imm = 0xF0F0F0F 989; AVX-NEXT: orq %r11, %r9 990; AVX-NEXT: shlq $40, %rsi 991; AVX-NEXT: orq %r9, %rsi 992; AVX-NEXT: shlq $48, %r10 993; AVX-NEXT: orq %rsi, %r10 994; AVX-NEXT: shlq $56, %r8 995; AVX-NEXT: orq %r10, %r8 996; AVX-NEXT: movq %r8, -{{[0-9]+}}(%rsp) 997; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 998; AVX-NEXT: popq %rbx 999; AVX-NEXT: retq 1000 %x4 = bitcast <16 x i8> %0 to <32 x i4> 1001 %r0 = insertelement <32 x i4> %x4, i4 zeroinitializer, i32 1 1002 %r1 = insertelement <32 x i4> %r0, i4 zeroinitializer, i32 3 1003 %r2 = insertelement <32 x i4> %r1, i4 zeroinitializer, i32 5 1004 %r3 = insertelement <32 x i4> %r2, i4 zeroinitializer, i32 7 1005 %r4 = insertelement <32 x i4> %r3, i4 zeroinitializer, i32 9 1006 %r5 = insertelement <32 x i4> %r4, i4 zeroinitializer, i32 11 1007 %r6 = insertelement <32 x i4> %r5, i4 zeroinitializer, i32 13 1008 %r7 = insertelement <32 x i4> %r6, i4 zeroinitializer, i32 15 1009 %r8 = insertelement <32 x i4> %r7, i4 zeroinitializer, i32 17 1010 %r9 = insertelement <32 x i4> %r8, i4 zeroinitializer, i32 19 1011 %r10 = insertelement <32 x i4> %r9, i4 zeroinitializer, i32 21 1012 %r11 = insertelement <32 x i4> %r10, i4 zeroinitializer, i32 23 1013 %r12 = insertelement <32 x i4> %r11, i4 zeroinitializer, i32 25 1014 %r13 = insertelement <32 x i4> %r12, i4 zeroinitializer, i32 27 1015 %r14 = insertelement <32 x i4> %r13, i4 zeroinitializer, i32 29 1016 %r15 = insertelement <32 x i4> %r14, i4 zeroinitializer, i32 31 1017 %r = bitcast <32 x i4> %r15 to <16 x i8> 1018 ret <16 x i8> %r 1019} 1020 1021define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind { 1022; SSE2-LABEL: _clearupper32xi8b: 1023; SSE2: # %bb.0: 1024; SSE2-NEXT: pushq %rbx 1025; SSE2-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 1026; SSE2-NEXT: movq %xmm2, %r10 1027; SSE2-NEXT: movq %r10, %r8 1028; SSE2-NEXT: shrq $56, %r8 1029; SSE2-NEXT: andl $15, %r8d 1030; SSE2-NEXT: movq %r10, %r9 1031; SSE2-NEXT: shrq $48, %r9 1032; SSE2-NEXT: andl $15, %r9d 1033; SSE2-NEXT: movq %r10, %rsi 1034; SSE2-NEXT: shrq $40, %rsi 1035; SSE2-NEXT: andl $15, %esi 1036; SSE2-NEXT: movq %r10, %r11 1037; SSE2-NEXT: shrq $32, %r11 1038; SSE2-NEXT: andl $15, %r11d 1039; SSE2-NEXT: movq %xmm0, %rax 1040; SSE2-NEXT: movq %rax, %rdx 1041; SSE2-NEXT: shrq $56, %rdx 1042; SSE2-NEXT: andl $15, %edx 1043; SSE2-NEXT: movq %rax, %rcx 1044; SSE2-NEXT: shrq $48, %rcx 1045; SSE2-NEXT: andl $15, %ecx 1046; SSE2-NEXT: movq %rax, %rdi 1047; SSE2-NEXT: shrq $40, %rdi 1048; SSE2-NEXT: andl $15, %edi 1049; SSE2-NEXT: movq %rax, %rbx 1050; SSE2-NEXT: shrq $32, %rbx 1051; SSE2-NEXT: andl $15, %ebx 1052; SSE2-NEXT: shlq $32, %rbx 1053; SSE2-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 1054; SSE2-NEXT: orq %rbx, %rax 1055; SSE2-NEXT: shlq $40, %rdi 1056; SSE2-NEXT: orq %rax, %rdi 1057; SSE2-NEXT: shlq $48, %rcx 1058; SSE2-NEXT: orq %rdi, %rcx 1059; SSE2-NEXT: shlq $56, %rdx 1060; SSE2-NEXT: orq %rcx, %rdx 1061; SSE2-NEXT: shlq $32, %r11 1062; SSE2-NEXT: andl $252645135, %r10d # imm = 0xF0F0F0F 1063; SSE2-NEXT: orq %r11, %r10 1064; SSE2-NEXT: shlq $40, %rsi 1065; SSE2-NEXT: orq %r10, %rsi 1066; SSE2-NEXT: shlq $48, %r9 1067; SSE2-NEXT: orq %rsi, %r9 1068; SSE2-NEXT: shlq $56, %r8 1069; SSE2-NEXT: orq %r9, %r8 1070; SSE2-NEXT: movq %rdx, %xmm0 1071; SSE2-NEXT: movq %r8, %xmm2 1072; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 1073; SSE2-NEXT: popq %rbx 1074; SSE2-NEXT: retq 1075; 1076; SSE42-LABEL: _clearupper32xi8b: 1077; SSE42: # %bb.0: 1078; SSE42-NEXT: pushq %rbx 1079; SSE42-NEXT: pextrq $1, %xmm0, %r10 1080; SSE42-NEXT: movq %r10, %r8 1081; SSE42-NEXT: shrq $56, %r8 1082; SSE42-NEXT: andl $15, %r8d 1083; SSE42-NEXT: movq %r10, %r9 1084; SSE42-NEXT: shrq $48, %r9 1085; SSE42-NEXT: andl $15, %r9d 1086; SSE42-NEXT: movq %r10, %rsi 1087; SSE42-NEXT: shrq $40, %rsi 1088; SSE42-NEXT: andl $15, %esi 1089; SSE42-NEXT: movq %r10, %r11 1090; SSE42-NEXT: shrq $32, %r11 1091; SSE42-NEXT: andl $15, %r11d 1092; SSE42-NEXT: movq %xmm0, %rax 1093; SSE42-NEXT: movq %rax, %rdx 1094; SSE42-NEXT: shrq $56, %rdx 1095; SSE42-NEXT: andl $15, %edx 1096; SSE42-NEXT: movq %rax, %rcx 1097; SSE42-NEXT: shrq $48, %rcx 1098; SSE42-NEXT: andl $15, %ecx 1099; SSE42-NEXT: movq %rax, %rdi 1100; SSE42-NEXT: shrq $40, %rdi 1101; SSE42-NEXT: andl $15, %edi 1102; SSE42-NEXT: movq %rax, %rbx 1103; SSE42-NEXT: shrq $32, %rbx 1104; SSE42-NEXT: andl $15, %ebx 1105; SSE42-NEXT: shlq $32, %rbx 1106; SSE42-NEXT: andl $252645135, %eax # imm = 0xF0F0F0F 1107; SSE42-NEXT: orq %rbx, %rax 1108; SSE42-NEXT: shlq $40, %rdi 1109; SSE42-NEXT: orq %rax, %rdi 1110; SSE42-NEXT: shlq $48, %rcx 1111; SSE42-NEXT: orq %rdi, %rcx 1112; SSE42-NEXT: shlq $56, %rdx 1113; SSE42-NEXT: orq %rcx, %rdx 1114; SSE42-NEXT: shlq $32, %r11 1115; SSE42-NEXT: andl $252645135, %r10d # imm = 0xF0F0F0F 1116; SSE42-NEXT: orq %r11, %r10 1117; SSE42-NEXT: shlq $40, %rsi 1118; SSE42-NEXT: orq %r10, %rsi 1119; SSE42-NEXT: shlq $48, %r9 1120; SSE42-NEXT: orq %rsi, %r9 1121; SSE42-NEXT: shlq $56, %r8 1122; SSE42-NEXT: orq %r9, %r8 1123; SSE42-NEXT: movq %r8, %xmm2 1124; SSE42-NEXT: movq %rdx, %xmm0 1125; SSE42-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 1126; SSE42-NEXT: popq %rbx 1127; SSE42-NEXT: retq 1128; 1129; AVX1-LABEL: _clearupper32xi8b: 1130; AVX1: # %bb.0: 1131; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 1132; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %r9 1133; AVX1-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 1134; AVX1-NEXT: movq %r9, %r8 1135; AVX1-NEXT: shrq $56, %r8 1136; AVX1-NEXT: andl $15, %r8d 1137; AVX1-NEXT: movq %rcx, %rsi 1138; AVX1-NEXT: movq %rcx, %rdi 1139; AVX1-NEXT: movq %rcx, %rdx 1140; AVX1-NEXT: movq %rcx, %rax 1141; AVX1-NEXT: shrq $32, %rax 1142; AVX1-NEXT: andl $15, %eax 1143; AVX1-NEXT: shlq $32, %rax 1144; AVX1-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 1145; AVX1-NEXT: orq %rax, %rcx 1146; AVX1-NEXT: movq %r9, %rax 1147; AVX1-NEXT: shrq $48, %rax 1148; AVX1-NEXT: andl $15, %eax 1149; AVX1-NEXT: shrq $40, %rdx 1150; AVX1-NEXT: andl $15, %edx 1151; AVX1-NEXT: shlq $40, %rdx 1152; AVX1-NEXT: orq %rcx, %rdx 1153; AVX1-NEXT: movq %r9, %rcx 1154; AVX1-NEXT: shrq $40, %rcx 1155; AVX1-NEXT: andl $15, %ecx 1156; AVX1-NEXT: shrq $48, %rdi 1157; AVX1-NEXT: andl $15, %edi 1158; AVX1-NEXT: shlq $48, %rdi 1159; AVX1-NEXT: orq %rdx, %rdi 1160; AVX1-NEXT: movq %r9, %rdx 1161; AVX1-NEXT: shrq $32, %rdx 1162; AVX1-NEXT: andl $15, %edx 1163; AVX1-NEXT: shrq $56, %rsi 1164; AVX1-NEXT: andl $15, %esi 1165; AVX1-NEXT: shlq $56, %rsi 1166; AVX1-NEXT: orq %rdi, %rsi 1167; AVX1-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) 1168; AVX1-NEXT: shlq $32, %rdx 1169; AVX1-NEXT: andl $252645135, %r9d # imm = 0xF0F0F0F 1170; AVX1-NEXT: orq %rdx, %r9 1171; AVX1-NEXT: shlq $40, %rcx 1172; AVX1-NEXT: orq %r9, %rcx 1173; AVX1-NEXT: shlq $48, %rax 1174; AVX1-NEXT: orq %rcx, %rax 1175; AVX1-NEXT: shlq $56, %r8 1176; AVX1-NEXT: orq %rax, %r8 1177; AVX1-NEXT: movq %r8, -{{[0-9]+}}(%rsp) 1178; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1179; AVX1-NEXT: vmovq %xmm0, %rax 1180; AVX1-NEXT: movq %rax, %r8 1181; AVX1-NEXT: movq %rax, %r9 1182; AVX1-NEXT: movq %rax, %rsi 1183; AVX1-NEXT: movq %rax, %rdi 1184; AVX1-NEXT: movl %eax, %ecx 1185; AVX1-NEXT: movl %eax, %edx 1186; AVX1-NEXT: vmovd %eax, %xmm1 1187; AVX1-NEXT: shrl $8, %eax 1188; AVX1-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 1189; AVX1-NEXT: shrl $16, %edx 1190; AVX1-NEXT: vpinsrb $2, %edx, %xmm1, %xmm1 1191; AVX1-NEXT: shrl $24, %ecx 1192; AVX1-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1 1193; AVX1-NEXT: shrq $32, %rdi 1194; AVX1-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 1195; AVX1-NEXT: shrq $40, %rsi 1196; AVX1-NEXT: vpinsrb $5, %esi, %xmm1, %xmm1 1197; AVX1-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm2 1198; AVX1-NEXT: shrq $48, %r9 1199; AVX1-NEXT: vpinsrb $6, %r9d, %xmm1, %xmm1 1200; AVX1-NEXT: vpextrq $1, %xmm0, %rax 1201; AVX1-NEXT: shrq $56, %r8 1202; AVX1-NEXT: vpinsrb $7, %r8d, %xmm1, %xmm0 1203; AVX1-NEXT: movl %eax, %ecx 1204; AVX1-NEXT: shrl $8, %ecx 1205; AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 1206; AVX1-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 1207; AVX1-NEXT: movl %eax, %ecx 1208; AVX1-NEXT: shrl $16, %ecx 1209; AVX1-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 1210; AVX1-NEXT: movl %eax, %ecx 1211; AVX1-NEXT: shrl $24, %ecx 1212; AVX1-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 1213; AVX1-NEXT: movq %rax, %rcx 1214; AVX1-NEXT: shrq $32, %rcx 1215; AVX1-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 1216; AVX1-NEXT: movq %rax, %rcx 1217; AVX1-NEXT: shrq $40, %rcx 1218; AVX1-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 1219; AVX1-NEXT: movq %rax, %rcx 1220; AVX1-NEXT: shrq $48, %rcx 1221; AVX1-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 1222; AVX1-NEXT: vmovq %xmm2, %rcx 1223; AVX1-NEXT: shrq $56, %rax 1224; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 1225; AVX1-NEXT: movl %ecx, %eax 1226; AVX1-NEXT: shrl $8, %eax 1227; AVX1-NEXT: vmovd %ecx, %xmm1 1228; AVX1-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 1229; AVX1-NEXT: movl %ecx, %eax 1230; AVX1-NEXT: shrl $16, %eax 1231; AVX1-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 1232; AVX1-NEXT: movl %ecx, %eax 1233; AVX1-NEXT: shrl $24, %eax 1234; AVX1-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 1235; AVX1-NEXT: movq %rcx, %rax 1236; AVX1-NEXT: shrq $32, %rax 1237; AVX1-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 1238; AVX1-NEXT: movq %rcx, %rax 1239; AVX1-NEXT: shrq $40, %rax 1240; AVX1-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 1241; AVX1-NEXT: movq %rcx, %rax 1242; AVX1-NEXT: shrq $48, %rax 1243; AVX1-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 1244; AVX1-NEXT: vpextrq $1, %xmm2, %rax 1245; AVX1-NEXT: shrq $56, %rcx 1246; AVX1-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1 1247; AVX1-NEXT: movl %eax, %ecx 1248; AVX1-NEXT: shrl $8, %ecx 1249; AVX1-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 1250; AVX1-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1 1251; AVX1-NEXT: movl %eax, %ecx 1252; AVX1-NEXT: shrl $16, %ecx 1253; AVX1-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1 1254; AVX1-NEXT: movl %eax, %ecx 1255; AVX1-NEXT: shrl $24, %ecx 1256; AVX1-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1 1257; AVX1-NEXT: movq %rax, %rcx 1258; AVX1-NEXT: shrq $32, %rcx 1259; AVX1-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1 1260; AVX1-NEXT: movq %rax, %rcx 1261; AVX1-NEXT: shrq $40, %rcx 1262; AVX1-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1 1263; AVX1-NEXT: movq %rax, %rcx 1264; AVX1-NEXT: shrq $48, %rcx 1265; AVX1-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1 1266; AVX1-NEXT: shrq $56, %rax 1267; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 1268; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1269; AVX1-NEXT: retq 1270; 1271; AVX2-LABEL: _clearupper32xi8b: 1272; AVX2: # %bb.0: 1273; AVX2-NEXT: vmovdqa %xmm0, -{{[0-9]+}}(%rsp) 1274; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %r9 1275; AVX2-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 1276; AVX2-NEXT: movq %r9, %r8 1277; AVX2-NEXT: shrq $56, %r8 1278; AVX2-NEXT: andl $15, %r8d 1279; AVX2-NEXT: movq %rcx, %rsi 1280; AVX2-NEXT: movq %rcx, %rdi 1281; AVX2-NEXT: movq %rcx, %rdx 1282; AVX2-NEXT: movq %rcx, %rax 1283; AVX2-NEXT: shrq $32, %rax 1284; AVX2-NEXT: andl $15, %eax 1285; AVX2-NEXT: shlq $32, %rax 1286; AVX2-NEXT: andl $252645135, %ecx # imm = 0xF0F0F0F 1287; AVX2-NEXT: orq %rax, %rcx 1288; AVX2-NEXT: movq %r9, %rax 1289; AVX2-NEXT: shrq $48, %rax 1290; AVX2-NEXT: andl $15, %eax 1291; AVX2-NEXT: shrq $40, %rdx 1292; AVX2-NEXT: andl $15, %edx 1293; AVX2-NEXT: shlq $40, %rdx 1294; AVX2-NEXT: orq %rcx, %rdx 1295; AVX2-NEXT: movq %r9, %rcx 1296; AVX2-NEXT: shrq $40, %rcx 1297; AVX2-NEXT: andl $15, %ecx 1298; AVX2-NEXT: shrq $48, %rdi 1299; AVX2-NEXT: andl $15, %edi 1300; AVX2-NEXT: shlq $48, %rdi 1301; AVX2-NEXT: orq %rdx, %rdi 1302; AVX2-NEXT: movq %r9, %rdx 1303; AVX2-NEXT: shrq $32, %rdx 1304; AVX2-NEXT: andl $15, %edx 1305; AVX2-NEXT: shrq $56, %rsi 1306; AVX2-NEXT: andl $15, %esi 1307; AVX2-NEXT: shlq $56, %rsi 1308; AVX2-NEXT: orq %rdi, %rsi 1309; AVX2-NEXT: movq %rsi, -{{[0-9]+}}(%rsp) 1310; AVX2-NEXT: shlq $32, %rdx 1311; AVX2-NEXT: andl $252645135, %r9d # imm = 0xF0F0F0F 1312; AVX2-NEXT: orq %rdx, %r9 1313; AVX2-NEXT: shlq $40, %rcx 1314; AVX2-NEXT: orq %r9, %rcx 1315; AVX2-NEXT: shlq $48, %rax 1316; AVX2-NEXT: orq %rcx, %rax 1317; AVX2-NEXT: shlq $56, %r8 1318; AVX2-NEXT: orq %rax, %r8 1319; AVX2-NEXT: movq %r8, -{{[0-9]+}}(%rsp) 1320; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1321; AVX2-NEXT: vmovq %xmm0, %rax 1322; AVX2-NEXT: movq %rax, %r8 1323; AVX2-NEXT: movq %rax, %r9 1324; AVX2-NEXT: movq %rax, %rsi 1325; AVX2-NEXT: movq %rax, %rdi 1326; AVX2-NEXT: movl %eax, %ecx 1327; AVX2-NEXT: movl %eax, %edx 1328; AVX2-NEXT: vmovd %eax, %xmm1 1329; AVX2-NEXT: shrl $8, %eax 1330; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 1331; AVX2-NEXT: shrl $16, %edx 1332; AVX2-NEXT: vpinsrb $2, %edx, %xmm1, %xmm1 1333; AVX2-NEXT: shrl $24, %ecx 1334; AVX2-NEXT: vpinsrb $3, %ecx, %xmm1, %xmm1 1335; AVX2-NEXT: shrq $32, %rdi 1336; AVX2-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 1337; AVX2-NEXT: shrq $40, %rsi 1338; AVX2-NEXT: vpinsrb $5, %esi, %xmm1, %xmm1 1339; AVX2-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm2 1340; AVX2-NEXT: shrq $48, %r9 1341; AVX2-NEXT: vpinsrb $6, %r9d, %xmm1, %xmm1 1342; AVX2-NEXT: vpextrq $1, %xmm0, %rax 1343; AVX2-NEXT: shrq $56, %r8 1344; AVX2-NEXT: vpinsrb $7, %r8d, %xmm1, %xmm0 1345; AVX2-NEXT: movl %eax, %ecx 1346; AVX2-NEXT: shrl $8, %ecx 1347; AVX2-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 1348; AVX2-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 1349; AVX2-NEXT: movl %eax, %ecx 1350; AVX2-NEXT: shrl $16, %ecx 1351; AVX2-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 1352; AVX2-NEXT: movl %eax, %ecx 1353; AVX2-NEXT: shrl $24, %ecx 1354; AVX2-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 1355; AVX2-NEXT: movq %rax, %rcx 1356; AVX2-NEXT: shrq $32, %rcx 1357; AVX2-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 1358; AVX2-NEXT: movq %rax, %rcx 1359; AVX2-NEXT: shrq $40, %rcx 1360; AVX2-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 1361; AVX2-NEXT: movq %rax, %rcx 1362; AVX2-NEXT: shrq $48, %rcx 1363; AVX2-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 1364; AVX2-NEXT: vmovq %xmm2, %rcx 1365; AVX2-NEXT: shrq $56, %rax 1366; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 1367; AVX2-NEXT: movl %ecx, %eax 1368; AVX2-NEXT: shrl $8, %eax 1369; AVX2-NEXT: vmovd %ecx, %xmm1 1370; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 1371; AVX2-NEXT: movl %ecx, %eax 1372; AVX2-NEXT: shrl $16, %eax 1373; AVX2-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 1374; AVX2-NEXT: movl %ecx, %eax 1375; AVX2-NEXT: shrl $24, %eax 1376; AVX2-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 1377; AVX2-NEXT: movq %rcx, %rax 1378; AVX2-NEXT: shrq $32, %rax 1379; AVX2-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 1380; AVX2-NEXT: movq %rcx, %rax 1381; AVX2-NEXT: shrq $40, %rax 1382; AVX2-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 1383; AVX2-NEXT: movq %rcx, %rax 1384; AVX2-NEXT: shrq $48, %rax 1385; AVX2-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 1386; AVX2-NEXT: vpextrq $1, %xmm2, %rax 1387; AVX2-NEXT: shrq $56, %rcx 1388; AVX2-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1 1389; AVX2-NEXT: movl %eax, %ecx 1390; AVX2-NEXT: shrl $8, %ecx 1391; AVX2-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 1392; AVX2-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1 1393; AVX2-NEXT: movl %eax, %ecx 1394; AVX2-NEXT: shrl $16, %ecx 1395; AVX2-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1 1396; AVX2-NEXT: movl %eax, %ecx 1397; AVX2-NEXT: shrl $24, %ecx 1398; AVX2-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1 1399; AVX2-NEXT: movq %rax, %rcx 1400; AVX2-NEXT: shrq $32, %rcx 1401; AVX2-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1 1402; AVX2-NEXT: movq %rax, %rcx 1403; AVX2-NEXT: shrq $40, %rcx 1404; AVX2-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1 1405; AVX2-NEXT: movq %rax, %rcx 1406; AVX2-NEXT: shrq $48, %rcx 1407; AVX2-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1 1408; AVX2-NEXT: shrq $56, %rax 1409; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 1410; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1411; AVX2-NEXT: retq 1412 %x4 = bitcast <32 x i8> %0 to <64 x i4> 1413 %r0 = insertelement <64 x i4> %x4, i4 zeroinitializer, i32 1 1414 %r1 = insertelement <64 x i4> %r0, i4 zeroinitializer, i32 3 1415 %r2 = insertelement <64 x i4> %r1, i4 zeroinitializer, i32 5 1416 %r3 = insertelement <64 x i4> %r2, i4 zeroinitializer, i32 7 1417 %r4 = insertelement <64 x i4> %r3, i4 zeroinitializer, i32 9 1418 %r5 = insertelement <64 x i4> %r4, i4 zeroinitializer, i32 11 1419 %r6 = insertelement <64 x i4> %r5, i4 zeroinitializer, i32 13 1420 %r7 = insertelement <64 x i4> %r6, i4 zeroinitializer, i32 15 1421 %r8 = insertelement <64 x i4> %r7, i4 zeroinitializer, i32 17 1422 %r9 = insertelement <64 x i4> %r8, i4 zeroinitializer, i32 19 1423 %r10 = insertelement <64 x i4> %r9, i4 zeroinitializer, i32 21 1424 %r11 = insertelement <64 x i4> %r10, i4 zeroinitializer, i32 23 1425 %r12 = insertelement <64 x i4> %r11, i4 zeroinitializer, i32 25 1426 %r13 = insertelement <64 x i4> %r12, i4 zeroinitializer, i32 27 1427 %r14 = insertelement <64 x i4> %r13, i4 zeroinitializer, i32 29 1428 %r15 = insertelement <64 x i4> %r14, i4 zeroinitializer, i32 31 1429 %r16 = insertelement <64 x i4> %r15, i4 zeroinitializer, i32 33 1430 %r17 = insertelement <64 x i4> %r16, i4 zeroinitializer, i32 35 1431 %r18 = insertelement <64 x i4> %r17, i4 zeroinitializer, i32 37 1432 %r19 = insertelement <64 x i4> %r18, i4 zeroinitializer, i32 39 1433 %r20 = insertelement <64 x i4> %r19, i4 zeroinitializer, i32 41 1434 %r21 = insertelement <64 x i4> %r20, i4 zeroinitializer, i32 43 1435 %r22 = insertelement <64 x i4> %r21, i4 zeroinitializer, i32 45 1436 %r23 = insertelement <64 x i4> %r22, i4 zeroinitializer, i32 47 1437 %r24 = insertelement <64 x i4> %r23, i4 zeroinitializer, i32 49 1438 %r25 = insertelement <64 x i4> %r24, i4 zeroinitializer, i32 51 1439 %r26 = insertelement <64 x i4> %r25, i4 zeroinitializer, i32 53 1440 %r27 = insertelement <64 x i4> %r26, i4 zeroinitializer, i32 55 1441 %r28 = insertelement <64 x i4> %r27, i4 zeroinitializer, i32 57 1442 %r29 = insertelement <64 x i4> %r28, i4 zeroinitializer, i32 59 1443 %r30 = insertelement <64 x i4> %r29, i4 zeroinitializer, i32 61 1444 %r31 = insertelement <64 x i4> %r30, i4 zeroinitializer, i32 63 1445 %r = bitcast <64 x i4> %r15 to <32 x i8> 1446 ret <32 x i8> %r 1447} 1448 1449define <2 x i64> @_clearupper2xi64c(<2 x i64>) nounwind { 1450; SSE2-LABEL: _clearupper2xi64c: 1451; SSE2: # %bb.0: 1452; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 1453; SSE2-NEXT: retq 1454; 1455; SSE42-LABEL: _clearupper2xi64c: 1456; SSE42: # %bb.0: 1457; SSE42-NEXT: xorps %xmm1, %xmm1 1458; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 1459; SSE42-NEXT: retq 1460; 1461; AVX-LABEL: _clearupper2xi64c: 1462; AVX: # %bb.0: 1463; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 1464; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 1465; AVX-NEXT: retq 1466 %r = and <2 x i64> <i64 4294967295, i64 4294967295>, %0 1467 ret <2 x i64> %r 1468} 1469 1470define <4 x i64> @_clearupper4xi64c(<4 x i64>) nounwind { 1471; SSE2-LABEL: _clearupper4xi64c: 1472; SSE2: # %bb.0: 1473; SSE2-NEXT: movaps {{.*#+}} xmm2 = [4294967295,0,4294967295,0] 1474; SSE2-NEXT: andps %xmm2, %xmm0 1475; SSE2-NEXT: andps %xmm2, %xmm1 1476; SSE2-NEXT: retq 1477; 1478; SSE42-LABEL: _clearupper4xi64c: 1479; SSE42: # %bb.0: 1480; SSE42-NEXT: xorps %xmm2, %xmm2 1481; SSE42-NEXT: blendps {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3] 1482; SSE42-NEXT: blendps {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3] 1483; SSE42-NEXT: retq 1484; 1485; AVX-LABEL: _clearupper4xi64c: 1486; AVX: # %bb.0: 1487; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 1488; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1489; AVX-NEXT: retq 1490 %r = and <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>, %0 1491 ret <4 x i64> %r 1492} 1493 1494define <4 x i32> @_clearupper4xi32c(<4 x i32>) nounwind { 1495; SSE2-LABEL: _clearupper4xi32c: 1496; SSE2: # %bb.0: 1497; SSE2-NEXT: andps {{.*}}(%rip), %xmm0 1498; SSE2-NEXT: retq 1499; 1500; SSE42-LABEL: _clearupper4xi32c: 1501; SSE42: # %bb.0: 1502; SSE42-NEXT: pxor %xmm1, %xmm1 1503; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 1504; SSE42-NEXT: retq 1505; 1506; AVX-LABEL: _clearupper4xi32c: 1507; AVX: # %bb.0: 1508; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 1509; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 1510; AVX-NEXT: retq 1511 %r = and <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>, %0 1512 ret <4 x i32> %r 1513} 1514 1515define <8 x i32> @_clearupper8xi32c(<8 x i32>) nounwind { 1516; SSE2-LABEL: _clearupper8xi32c: 1517; SSE2: # %bb.0: 1518; SSE2-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0] 1519; SSE2-NEXT: andps %xmm2, %xmm0 1520; SSE2-NEXT: andps %xmm2, %xmm1 1521; SSE2-NEXT: retq 1522; 1523; SSE42-LABEL: _clearupper8xi32c: 1524; SSE42: # %bb.0: 1525; SSE42-NEXT: pxor %xmm2, %xmm2 1526; SSE42-NEXT: pblendw {{.*#+}} xmm0 = xmm0[0],xmm2[1],xmm0[2],xmm2[3],xmm0[4],xmm2[5],xmm0[6],xmm2[7] 1527; SSE42-NEXT: pblendw {{.*#+}} xmm1 = xmm1[0],xmm2[1],xmm1[2],xmm2[3],xmm1[4],xmm2[5],xmm1[6],xmm2[7] 1528; SSE42-NEXT: retq 1529; 1530; AVX1-LABEL: _clearupper8xi32c: 1531; AVX1: # %bb.0: 1532; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 1533; AVX1-NEXT: retq 1534; 1535; AVX2-LABEL: _clearupper8xi32c: 1536; AVX2: # %bb.0: 1537; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1538; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 1539; AVX2-NEXT: retq 1540 %r = and <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>, %0 1541 ret <8 x i32> %r 1542} 1543 1544define <8 x i16> @_clearupper8xi16c(<8 x i16>) nounwind { 1545; SSE-LABEL: _clearupper8xi16c: 1546; SSE: # %bb.0: 1547; SSE-NEXT: andps {{.*}}(%rip), %xmm0 1548; SSE-NEXT: retq 1549; 1550; AVX-LABEL: _clearupper8xi16c: 1551; AVX: # %bb.0: 1552; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 1553; AVX-NEXT: retq 1554 %r = and <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>, %0 1555 ret <8 x i16> %r 1556} 1557 1558define <16 x i16> @_clearupper16xi16c(<16 x i16>) nounwind { 1559; SSE-LABEL: _clearupper16xi16c: 1560; SSE: # %bb.0: 1561; SSE-NEXT: movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 1562; SSE-NEXT: andps %xmm2, %xmm0 1563; SSE-NEXT: andps %xmm2, %xmm1 1564; SSE-NEXT: retq 1565; 1566; AVX-LABEL: _clearupper16xi16c: 1567; AVX: # %bb.0: 1568; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 1569; AVX-NEXT: retq 1570 %r = and <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>, %0 1571 ret <16 x i16> %r 1572} 1573 1574define <16 x i8> @_clearupper16xi8c(<16 x i8>) nounwind { 1575; SSE-LABEL: _clearupper16xi8c: 1576; SSE: # %bb.0: 1577; SSE-NEXT: andps {{.*}}(%rip), %xmm0 1578; SSE-NEXT: retq 1579; 1580; AVX-LABEL: _clearupper16xi8c: 1581; AVX: # %bb.0: 1582; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 1583; AVX-NEXT: retq 1584 %r = and <16 x i8> <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>, %0 1585 ret <16 x i8> %r 1586} 1587 1588define <32 x i8> @_clearupper32xi8c(<32 x i8>) nounwind { 1589; SSE-LABEL: _clearupper32xi8c: 1590; SSE: # %bb.0: 1591; SSE-NEXT: movaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1592; SSE-NEXT: andps %xmm2, %xmm0 1593; SSE-NEXT: andps %xmm2, %xmm1 1594; SSE-NEXT: retq 1595; 1596; AVX-LABEL: _clearupper32xi8c: 1597; AVX: # %bb.0: 1598; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 1599; AVX-NEXT: retq 1600 %r = and <32 x i8> <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>, %0 1601 ret <32 x i8> %r 1602} 1603