1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown | FileCheck %s --check-prefix=SSE --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=AVX --check-prefix=AVX1 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=AVX --check-prefix=AVX2 5 6; 7; PR6455 'Clear Upper Bits' Patterns 8; 9 10define <2 x i64> @_clearupper2xi64a(<2 x i64>) nounwind { 11; SSE-LABEL: _clearupper2xi64a: 12; SSE: # BB#0: 13; SSE-NEXT: andps {{.*}}(%rip), %xmm0 14; SSE-NEXT: retq 15; 16; AVX1-LABEL: _clearupper2xi64a: 17; AVX1: # BB#0: 18; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 19; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 20; AVX1-NEXT: retq 21; 22; AVX2-LABEL: _clearupper2xi64a: 23; AVX2: # BB#0: 24; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 25; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 26; AVX2-NEXT: retq 27 %x0 = extractelement <2 x i64> %0, i32 0 28 %x1 = extractelement <2 x i64> %0, i32 1 29 %trunc0 = trunc i64 %x0 to i32 30 %trunc1 = trunc i64 %x1 to i32 31 %ext0 = zext i32 %trunc0 to i64 32 %ext1 = zext i32 %trunc1 to i64 33 %v0 = insertelement <2 x i64> undef, i64 %ext0, i32 0 34 %v1 = insertelement <2 x i64> %v0, i64 %ext1, i32 1 35 ret <2 x i64> %v1 36} 37 38define <4 x i64> @_clearupper4xi64a(<4 x i64>) nounwind { 39; SSE-LABEL: _clearupper4xi64a: 40; SSE: # BB#0: 41; SSE-NEXT: movaps {{.*#+}} xmm2 = [4294967295,4294967295] 42; SSE-NEXT: andps %xmm2, %xmm0 43; SSE-NEXT: andps %xmm2, %xmm1 44; SSE-NEXT: retq 45; 46; AVX1-LABEL: _clearupper4xi64a: 47; AVX1: # BB#0: 48; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1 49; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 50; AVX1-NEXT: retq 51; 52; AVX2-LABEL: _clearupper4xi64a: 53; AVX2: # BB#0: 54; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 55; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 56; AVX2-NEXT: retq 57 %x0 = extractelement <4 x i64> %0, i32 0 58 %x1 = extractelement <4 x i64> %0, i32 1 59 %x2 = extractelement <4 x i64> %0, i32 2 60 %x3 = extractelement <4 x i64> %0, i32 3 61 %trunc0 = trunc i64 %x0 to i32 62 %trunc1 = trunc i64 %x1 to i32 63 %trunc2 = trunc i64 %x2 to i32 64 %trunc3 = trunc i64 %x3 to i32 65 %ext0 = zext i32 %trunc0 to i64 66 %ext1 = zext i32 %trunc1 to i64 67 %ext2 = zext i32 %trunc2 to i64 68 %ext3 = zext i32 %trunc3 to i64 69 %v0 = insertelement <4 x i64> undef, i64 %ext0, i32 0 70 %v1 = insertelement <4 x i64> %v0, i64 %ext1, i32 1 71 %v2 = insertelement <4 x i64> %v1, i64 %ext2, i32 2 72 %v3 = insertelement <4 x i64> %v2, i64 %ext3, i32 3 73 ret <4 x i64> %v3 74} 75 76define <4 x i32> @_clearupper4xi32a(<4 x i32>) nounwind { 77; SSE-LABEL: _clearupper4xi32a: 78; SSE: # BB#0: 79; SSE-NEXT: andps {{.*}}(%rip), %xmm0 80; SSE-NEXT: retq 81; 82; AVX-LABEL: _clearupper4xi32a: 83; AVX: # BB#0: 84; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 85; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 86; AVX-NEXT: retq 87 %x0 = extractelement <4 x i32> %0, i32 0 88 %x1 = extractelement <4 x i32> %0, i32 1 89 %x2 = extractelement <4 x i32> %0, i32 2 90 %x3 = extractelement <4 x i32> %0, i32 3 91 %trunc0 = trunc i32 %x0 to i16 92 %trunc1 = trunc i32 %x1 to i16 93 %trunc2 = trunc i32 %x2 to i16 94 %trunc3 = trunc i32 %x3 to i16 95 %ext0 = zext i16 %trunc0 to i32 96 %ext1 = zext i16 %trunc1 to i32 97 %ext2 = zext i16 %trunc2 to i32 98 %ext3 = zext i16 %trunc3 to i32 99 %v0 = insertelement <4 x i32> undef, i32 %ext0, i32 0 100 %v1 = insertelement <4 x i32> %v0, i32 %ext1, i32 1 101 %v2 = insertelement <4 x i32> %v1, i32 %ext2, i32 2 102 %v3 = insertelement <4 x i32> %v2, i32 %ext3, i32 3 103 ret <4 x i32> %v3 104} 105 106define <8 x i32> @_clearupper8xi32a(<8 x i32>) nounwind { 107; SSE-LABEL: _clearupper8xi32a: 108; SSE: # BB#0: 109; SSE-NEXT: movaps {{.*#+}} xmm2 = [65535,65535,65535,65535] 110; SSE-NEXT: andps %xmm2, %xmm0 111; SSE-NEXT: andps %xmm2, %xmm1 112; SSE-NEXT: retq 113; 114; AVX1-LABEL: _clearupper8xi32a: 115; AVX1: # BB#0: 116; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 117; AVX1-NEXT: retq 118; 119; AVX2-LABEL: _clearupper8xi32a: 120; AVX2: # BB#0: 121; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 122; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 123; AVX2-NEXT: retq 124 %x0 = extractelement <8 x i32> %0, i32 0 125 %x1 = extractelement <8 x i32> %0, i32 1 126 %x2 = extractelement <8 x i32> %0, i32 2 127 %x3 = extractelement <8 x i32> %0, i32 3 128 %x4 = extractelement <8 x i32> %0, i32 4 129 %x5 = extractelement <8 x i32> %0, i32 5 130 %x6 = extractelement <8 x i32> %0, i32 6 131 %x7 = extractelement <8 x i32> %0, i32 7 132 %trunc0 = trunc i32 %x0 to i16 133 %trunc1 = trunc i32 %x1 to i16 134 %trunc2 = trunc i32 %x2 to i16 135 %trunc3 = trunc i32 %x3 to i16 136 %trunc4 = trunc i32 %x4 to i16 137 %trunc5 = trunc i32 %x5 to i16 138 %trunc6 = trunc i32 %x6 to i16 139 %trunc7 = trunc i32 %x7 to i16 140 %ext0 = zext i16 %trunc0 to i32 141 %ext1 = zext i16 %trunc1 to i32 142 %ext2 = zext i16 %trunc2 to i32 143 %ext3 = zext i16 %trunc3 to i32 144 %ext4 = zext i16 %trunc4 to i32 145 %ext5 = zext i16 %trunc5 to i32 146 %ext6 = zext i16 %trunc6 to i32 147 %ext7 = zext i16 %trunc7 to i32 148 %v0 = insertelement <8 x i32> undef, i32 %ext0, i32 0 149 %v1 = insertelement <8 x i32> %v0, i32 %ext1, i32 1 150 %v2 = insertelement <8 x i32> %v1, i32 %ext2, i32 2 151 %v3 = insertelement <8 x i32> %v2, i32 %ext3, i32 3 152 %v4 = insertelement <8 x i32> %v3, i32 %ext4, i32 4 153 %v5 = insertelement <8 x i32> %v4, i32 %ext5, i32 5 154 %v6 = insertelement <8 x i32> %v5, i32 %ext6, i32 6 155 %v7 = insertelement <8 x i32> %v6, i32 %ext7, i32 7 156 ret <8 x i32> %v7 157} 158 159define <8 x i16> @_clearupper8xi16a(<8 x i16>) nounwind { 160; SSE-LABEL: _clearupper8xi16a: 161; SSE: # BB#0: 162; SSE-NEXT: andps {{.*}}(%rip), %xmm0 163; SSE-NEXT: retq 164; 165; AVX-LABEL: _clearupper8xi16a: 166; AVX: # BB#0: 167; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 168; AVX-NEXT: retq 169 %x0 = extractelement <8 x i16> %0, i32 0 170 %x1 = extractelement <8 x i16> %0, i32 1 171 %x2 = extractelement <8 x i16> %0, i32 2 172 %x3 = extractelement <8 x i16> %0, i32 3 173 %x4 = extractelement <8 x i16> %0, i32 4 174 %x5 = extractelement <8 x i16> %0, i32 5 175 %x6 = extractelement <8 x i16> %0, i32 6 176 %x7 = extractelement <8 x i16> %0, i32 7 177 %trunc0 = trunc i16 %x0 to i8 178 %trunc1 = trunc i16 %x1 to i8 179 %trunc2 = trunc i16 %x2 to i8 180 %trunc3 = trunc i16 %x3 to i8 181 %trunc4 = trunc i16 %x4 to i8 182 %trunc5 = trunc i16 %x5 to i8 183 %trunc6 = trunc i16 %x6 to i8 184 %trunc7 = trunc i16 %x7 to i8 185 %ext0 = zext i8 %trunc0 to i16 186 %ext1 = zext i8 %trunc1 to i16 187 %ext2 = zext i8 %trunc2 to i16 188 %ext3 = zext i8 %trunc3 to i16 189 %ext4 = zext i8 %trunc4 to i16 190 %ext5 = zext i8 %trunc5 to i16 191 %ext6 = zext i8 %trunc6 to i16 192 %ext7 = zext i8 %trunc7 to i16 193 %v0 = insertelement <8 x i16> undef, i16 %ext0, i32 0 194 %v1 = insertelement <8 x i16> %v0, i16 %ext1, i32 1 195 %v2 = insertelement <8 x i16> %v1, i16 %ext2, i32 2 196 %v3 = insertelement <8 x i16> %v2, i16 %ext3, i32 3 197 %v4 = insertelement <8 x i16> %v3, i16 %ext4, i32 4 198 %v5 = insertelement <8 x i16> %v4, i16 %ext5, i32 5 199 %v6 = insertelement <8 x i16> %v5, i16 %ext6, i32 6 200 %v7 = insertelement <8 x i16> %v6, i16 %ext7, i32 7 201 ret <8 x i16> %v7 202} 203 204define <16 x i16> @_clearupper16xi16a(<16 x i16>) nounwind { 205; SSE-LABEL: _clearupper16xi16a: 206; SSE: # BB#0: 207; SSE-NEXT: movaps {{.*#+}} xmm2 = [255,255,255,255,255,255,255,255] 208; SSE-NEXT: andps %xmm2, %xmm0 209; SSE-NEXT: andps %xmm2, %xmm1 210; SSE-NEXT: retq 211; 212; AVX-LABEL: _clearupper16xi16a: 213; AVX: # BB#0: 214; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 215; AVX-NEXT: retq 216 %x0 = extractelement <16 x i16> %0, i32 0 217 %x1 = extractelement <16 x i16> %0, i32 1 218 %x2 = extractelement <16 x i16> %0, i32 2 219 %x3 = extractelement <16 x i16> %0, i32 3 220 %x4 = extractelement <16 x i16> %0, i32 4 221 %x5 = extractelement <16 x i16> %0, i32 5 222 %x6 = extractelement <16 x i16> %0, i32 6 223 %x7 = extractelement <16 x i16> %0, i32 7 224 %x8 = extractelement <16 x i16> %0, i32 8 225 %x9 = extractelement <16 x i16> %0, i32 9 226 %x10 = extractelement <16 x i16> %0, i32 10 227 %x11 = extractelement <16 x i16> %0, i32 11 228 %x12 = extractelement <16 x i16> %0, i32 12 229 %x13 = extractelement <16 x i16> %0, i32 13 230 %x14 = extractelement <16 x i16> %0, i32 14 231 %x15 = extractelement <16 x i16> %0, i32 15 232 %trunc0 = trunc i16 %x0 to i8 233 %trunc1 = trunc i16 %x1 to i8 234 %trunc2 = trunc i16 %x2 to i8 235 %trunc3 = trunc i16 %x3 to i8 236 %trunc4 = trunc i16 %x4 to i8 237 %trunc5 = trunc i16 %x5 to i8 238 %trunc6 = trunc i16 %x6 to i8 239 %trunc7 = trunc i16 %x7 to i8 240 %trunc8 = trunc i16 %x8 to i8 241 %trunc9 = trunc i16 %x9 to i8 242 %trunc10 = trunc i16 %x10 to i8 243 %trunc11 = trunc i16 %x11 to i8 244 %trunc12 = trunc i16 %x12 to i8 245 %trunc13 = trunc i16 %x13 to i8 246 %trunc14 = trunc i16 %x14 to i8 247 %trunc15 = trunc i16 %x15 to i8 248 %ext0 = zext i8 %trunc0 to i16 249 %ext1 = zext i8 %trunc1 to i16 250 %ext2 = zext i8 %trunc2 to i16 251 %ext3 = zext i8 %trunc3 to i16 252 %ext4 = zext i8 %trunc4 to i16 253 %ext5 = zext i8 %trunc5 to i16 254 %ext6 = zext i8 %trunc6 to i16 255 %ext7 = zext i8 %trunc7 to i16 256 %ext8 = zext i8 %trunc8 to i16 257 %ext9 = zext i8 %trunc9 to i16 258 %ext10 = zext i8 %trunc10 to i16 259 %ext11 = zext i8 %trunc11 to i16 260 %ext12 = zext i8 %trunc12 to i16 261 %ext13 = zext i8 %trunc13 to i16 262 %ext14 = zext i8 %trunc14 to i16 263 %ext15 = zext i8 %trunc15 to i16 264 %v0 = insertelement <16 x i16> undef, i16 %ext0, i32 0 265 %v1 = insertelement <16 x i16> %v0, i16 %ext1, i32 1 266 %v2 = insertelement <16 x i16> %v1, i16 %ext2, i32 2 267 %v3 = insertelement <16 x i16> %v2, i16 %ext3, i32 3 268 %v4 = insertelement <16 x i16> %v3, i16 %ext4, i32 4 269 %v5 = insertelement <16 x i16> %v4, i16 %ext5, i32 5 270 %v6 = insertelement <16 x i16> %v5, i16 %ext6, i32 6 271 %v7 = insertelement <16 x i16> %v6, i16 %ext7, i32 7 272 %v8 = insertelement <16 x i16> %v7, i16 %ext8, i32 8 273 %v9 = insertelement <16 x i16> %v8, i16 %ext9, i32 9 274 %v10 = insertelement <16 x i16> %v9, i16 %ext10, i32 10 275 %v11 = insertelement <16 x i16> %v10, i16 %ext11, i32 11 276 %v12 = insertelement <16 x i16> %v11, i16 %ext12, i32 12 277 %v13 = insertelement <16 x i16> %v12, i16 %ext13, i32 13 278 %v14 = insertelement <16 x i16> %v13, i16 %ext14, i32 14 279 %v15 = insertelement <16 x i16> %v14, i16 %ext15, i32 15 280 ret <16 x i16> %v15 281} 282 283define <16 x i8> @_clearupper16xi8a(<16 x i8>) nounwind { 284; SSE-LABEL: _clearupper16xi8a: 285; SSE: # BB#0: 286; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 287; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 288; SSE-NEXT: movd %eax, %xmm0 289; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 290; SSE-NEXT: movd %eax, %xmm1 291; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 292; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 293; SSE-NEXT: movd %eax, %xmm0 294; SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 295; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 296; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 297; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 298; SSE-NEXT: movd %eax, %xmm0 299; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 300; SSE-NEXT: movd %eax, %xmm3 301; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 302; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 303; SSE-NEXT: movd %eax, %xmm0 304; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 305; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 306; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 307; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 308; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 309; SSE-NEXT: movd %eax, %xmm0 310; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 311; SSE-NEXT: movd %eax, %xmm2 312; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 313; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 314; SSE-NEXT: movd %eax, %xmm0 315; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 316; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 317; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 318; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 319; SSE-NEXT: movd %eax, %xmm0 320; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 321; SSE-NEXT: movd %eax, %xmm2 322; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 323; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 324; SSE-NEXT: movd %eax, %xmm4 325; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 326; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 327; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 328; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 329; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 330; SSE-NEXT: pand {{.*}}(%rip), %xmm0 331; SSE-NEXT: retq 332; 333; AVX-LABEL: _clearupper16xi8a: 334; AVX: # BB#0: 335; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 336; AVX-NEXT: retq 337 %x0 = extractelement <16 x i8> %0, i32 0 338 %x1 = extractelement <16 x i8> %0, i32 1 339 %x2 = extractelement <16 x i8> %0, i32 2 340 %x3 = extractelement <16 x i8> %0, i32 3 341 %x4 = extractelement <16 x i8> %0, i32 4 342 %x5 = extractelement <16 x i8> %0, i32 5 343 %x6 = extractelement <16 x i8> %0, i32 6 344 %x7 = extractelement <16 x i8> %0, i32 7 345 %x8 = extractelement <16 x i8> %0, i32 8 346 %x9 = extractelement <16 x i8> %0, i32 9 347 %x10 = extractelement <16 x i8> %0, i32 10 348 %x11 = extractelement <16 x i8> %0, i32 11 349 %x12 = extractelement <16 x i8> %0, i32 12 350 %x13 = extractelement <16 x i8> %0, i32 13 351 %x14 = extractelement <16 x i8> %0, i32 14 352 %x15 = extractelement <16 x i8> %0, i32 15 353 %trunc0 = trunc i8 %x0 to i4 354 %trunc1 = trunc i8 %x1 to i4 355 %trunc2 = trunc i8 %x2 to i4 356 %trunc3 = trunc i8 %x3 to i4 357 %trunc4 = trunc i8 %x4 to i4 358 %trunc5 = trunc i8 %x5 to i4 359 %trunc6 = trunc i8 %x6 to i4 360 %trunc7 = trunc i8 %x7 to i4 361 %trunc8 = trunc i8 %x8 to i4 362 %trunc9 = trunc i8 %x9 to i4 363 %trunc10 = trunc i8 %x10 to i4 364 %trunc11 = trunc i8 %x11 to i4 365 %trunc12 = trunc i8 %x12 to i4 366 %trunc13 = trunc i8 %x13 to i4 367 %trunc14 = trunc i8 %x14 to i4 368 %trunc15 = trunc i8 %x15 to i4 369 %ext0 = zext i4 %trunc0 to i8 370 %ext1 = zext i4 %trunc1 to i8 371 %ext2 = zext i4 %trunc2 to i8 372 %ext3 = zext i4 %trunc3 to i8 373 %ext4 = zext i4 %trunc4 to i8 374 %ext5 = zext i4 %trunc5 to i8 375 %ext6 = zext i4 %trunc6 to i8 376 %ext7 = zext i4 %trunc7 to i8 377 %ext8 = zext i4 %trunc8 to i8 378 %ext9 = zext i4 %trunc9 to i8 379 %ext10 = zext i4 %trunc10 to i8 380 %ext11 = zext i4 %trunc11 to i8 381 %ext12 = zext i4 %trunc12 to i8 382 %ext13 = zext i4 %trunc13 to i8 383 %ext14 = zext i4 %trunc14 to i8 384 %ext15 = zext i4 %trunc15 to i8 385 %v0 = insertelement <16 x i8> undef, i8 %ext0, i32 0 386 %v1 = insertelement <16 x i8> %v0, i8 %ext1, i32 1 387 %v2 = insertelement <16 x i8> %v1, i8 %ext2, i32 2 388 %v3 = insertelement <16 x i8> %v2, i8 %ext3, i32 3 389 %v4 = insertelement <16 x i8> %v3, i8 %ext4, i32 4 390 %v5 = insertelement <16 x i8> %v4, i8 %ext5, i32 5 391 %v6 = insertelement <16 x i8> %v5, i8 %ext6, i32 6 392 %v7 = insertelement <16 x i8> %v6, i8 %ext7, i32 7 393 %v8 = insertelement <16 x i8> %v7, i8 %ext8, i32 8 394 %v9 = insertelement <16 x i8> %v8, i8 %ext9, i32 9 395 %v10 = insertelement <16 x i8> %v9, i8 %ext10, i32 10 396 %v11 = insertelement <16 x i8> %v10, i8 %ext11, i32 11 397 %v12 = insertelement <16 x i8> %v11, i8 %ext12, i32 12 398 %v13 = insertelement <16 x i8> %v12, i8 %ext13, i32 13 399 %v14 = insertelement <16 x i8> %v13, i8 %ext14, i32 14 400 %v15 = insertelement <16 x i8> %v14, i8 %ext15, i32 15 401 ret <16 x i8> %v15 402} 403 404define <32 x i8> @_clearupper32xi8a(<32 x i8>) nounwind { 405; SSE-LABEL: _clearupper32xi8a: 406; SSE: # BB#0: 407; SSE-NEXT: movaps %xmm0, -{{[0-9]+}}(%rsp) 408; SSE-NEXT: movaps %xmm1, -{{[0-9]+}}(%rsp) 409; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 410; SSE-NEXT: movd %eax, %xmm0 411; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 412; SSE-NEXT: movd %eax, %xmm1 413; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 414; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 415; SSE-NEXT: movd %eax, %xmm0 416; SSE-NEXT: movd {{.*#+}} xmm2 = mem[0],zero,zero,zero 417; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 418; SSE-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm1[0],xmm2[1],xmm1[1],xmm2[2],xmm1[2],xmm2[3],xmm1[3] 419; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 420; SSE-NEXT: movd %eax, %xmm0 421; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 422; SSE-NEXT: movd %eax, %xmm3 423; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 424; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 425; SSE-NEXT: movd %eax, %xmm0 426; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 427; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3],xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 428; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm3[0],xmm1[1],xmm3[1],xmm1[2],xmm3[2],xmm1[3],xmm3[3] 429; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1] 430; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 431; SSE-NEXT: movd %eax, %xmm0 432; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 433; SSE-NEXT: movd %eax, %xmm2 434; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 435; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 436; SSE-NEXT: movd %eax, %xmm0 437; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 438; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm0[0],xmm3[1],xmm0[1],xmm3[2],xmm0[2],xmm3[3],xmm0[3],xmm3[4],xmm0[4],xmm3[5],xmm0[5],xmm3[6],xmm0[6],xmm3[7],xmm0[7] 439; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm2[0],xmm3[1],xmm2[1],xmm3[2],xmm2[2],xmm3[3],xmm2[3] 440; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 441; SSE-NEXT: movd %eax, %xmm0 442; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 443; SSE-NEXT: movd %eax, %xmm2 444; SSE-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 445; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 446; SSE-NEXT: movd %eax, %xmm4 447; SSE-NEXT: movd {{.*#+}} xmm0 = mem[0],zero,zero,zero 448; SSE-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm4[0],xmm0[1],xmm4[1],xmm0[2],xmm4[2],xmm0[3],xmm4[3],xmm0[4],xmm4[4],xmm0[5],xmm4[5],xmm0[6],xmm4[6],xmm0[7],xmm4[7] 449; SSE-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 450; SSE-NEXT: punpckldq {{.*#+}} xmm0 = xmm0[0],xmm3[0],xmm0[1],xmm3[1] 451; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 452; SSE-NEXT: movdqa {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 453; SSE-NEXT: pand %xmm2, %xmm0 454; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 455; SSE-NEXT: movd %eax, %xmm1 456; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 457; SSE-NEXT: movd %eax, %xmm3 458; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 459; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 460; SSE-NEXT: movd %eax, %xmm1 461; SSE-NEXT: movd {{.*#+}} xmm4 = mem[0],zero,zero,zero 462; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] 463; SSE-NEXT: punpcklwd {{.*#+}} xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1],xmm4[2],xmm3[2],xmm4[3],xmm3[3] 464; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 465; SSE-NEXT: movd %eax, %xmm1 466; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 467; SSE-NEXT: movd %eax, %xmm5 468; SSE-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7] 469; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 470; SSE-NEXT: movd %eax, %xmm1 471; SSE-NEXT: movd {{.*#+}} xmm3 = mem[0],zero,zero,zero 472; SSE-NEXT: punpcklbw {{.*#+}} xmm3 = xmm3[0],xmm1[0],xmm3[1],xmm1[1],xmm3[2],xmm1[2],xmm3[3],xmm1[3],xmm3[4],xmm1[4],xmm3[5],xmm1[5],xmm3[6],xmm1[6],xmm3[7],xmm1[7] 473; SSE-NEXT: punpcklwd {{.*#+}} xmm3 = xmm3[0],xmm5[0],xmm3[1],xmm5[1],xmm3[2],xmm5[2],xmm3[3],xmm5[3] 474; SSE-NEXT: punpckldq {{.*#+}} xmm3 = xmm3[0],xmm4[0],xmm3[1],xmm4[1] 475; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 476; SSE-NEXT: movd %eax, %xmm1 477; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 478; SSE-NEXT: movd %eax, %xmm4 479; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] 480; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 481; SSE-NEXT: movd %eax, %xmm1 482; SSE-NEXT: movd {{.*#+}} xmm5 = mem[0],zero,zero,zero 483; SSE-NEXT: punpcklbw {{.*#+}} xmm5 = xmm5[0],xmm1[0],xmm5[1],xmm1[1],xmm5[2],xmm1[2],xmm5[3],xmm1[3],xmm5[4],xmm1[4],xmm5[5],xmm1[5],xmm5[6],xmm1[6],xmm5[7],xmm1[7] 484; SSE-NEXT: punpcklwd {{.*#+}} xmm5 = xmm5[0],xmm4[0],xmm5[1],xmm4[1],xmm5[2],xmm4[2],xmm5[3],xmm4[3] 485; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 486; SSE-NEXT: movd %eax, %xmm1 487; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 488; SSE-NEXT: movd %eax, %xmm4 489; SSE-NEXT: punpcklbw {{.*#+}} xmm4 = xmm4[0],xmm1[0],xmm4[1],xmm1[1],xmm4[2],xmm1[2],xmm4[3],xmm1[3],xmm4[4],xmm1[4],xmm4[5],xmm1[5],xmm4[6],xmm1[6],xmm4[7],xmm1[7] 490; SSE-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 491; SSE-NEXT: movd %eax, %xmm6 492; SSE-NEXT: movd {{.*#+}} xmm1 = mem[0],zero,zero,zero 493; SSE-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm6[0],xmm1[1],xmm6[1],xmm1[2],xmm6[2],xmm1[3],xmm6[3],xmm1[4],xmm6[4],xmm1[5],xmm6[5],xmm1[6],xmm6[6],xmm1[7],xmm6[7] 494; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 495; SSE-NEXT: punpckldq {{.*#+}} xmm1 = xmm1[0],xmm5[0],xmm1[1],xmm5[1] 496; SSE-NEXT: punpcklqdq {{.*#+}} xmm1 = xmm1[0],xmm3[0] 497; SSE-NEXT: pand %xmm2, %xmm1 498; SSE-NEXT: retq 499; 500; AVX-LABEL: _clearupper32xi8a: 501; AVX: # BB#0: 502; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 503; AVX-NEXT: retq 504 %x0 = extractelement <32 x i8> %0, i32 0 505 %x1 = extractelement <32 x i8> %0, i32 1 506 %x2 = extractelement <32 x i8> %0, i32 2 507 %x3 = extractelement <32 x i8> %0, i32 3 508 %x4 = extractelement <32 x i8> %0, i32 4 509 %x5 = extractelement <32 x i8> %0, i32 5 510 %x6 = extractelement <32 x i8> %0, i32 6 511 %x7 = extractelement <32 x i8> %0, i32 7 512 %x8 = extractelement <32 x i8> %0, i32 8 513 %x9 = extractelement <32 x i8> %0, i32 9 514 %x10 = extractelement <32 x i8> %0, i32 10 515 %x11 = extractelement <32 x i8> %0, i32 11 516 %x12 = extractelement <32 x i8> %0, i32 12 517 %x13 = extractelement <32 x i8> %0, i32 13 518 %x14 = extractelement <32 x i8> %0, i32 14 519 %x15 = extractelement <32 x i8> %0, i32 15 520 %x16 = extractelement <32 x i8> %0, i32 16 521 %x17 = extractelement <32 x i8> %0, i32 17 522 %x18 = extractelement <32 x i8> %0, i32 18 523 %x19 = extractelement <32 x i8> %0, i32 19 524 %x20 = extractelement <32 x i8> %0, i32 20 525 %x21 = extractelement <32 x i8> %0, i32 21 526 %x22 = extractelement <32 x i8> %0, i32 22 527 %x23 = extractelement <32 x i8> %0, i32 23 528 %x24 = extractelement <32 x i8> %0, i32 24 529 %x25 = extractelement <32 x i8> %0, i32 25 530 %x26 = extractelement <32 x i8> %0, i32 26 531 %x27 = extractelement <32 x i8> %0, i32 27 532 %x28 = extractelement <32 x i8> %0, i32 28 533 %x29 = extractelement <32 x i8> %0, i32 29 534 %x30 = extractelement <32 x i8> %0, i32 30 535 %x31 = extractelement <32 x i8> %0, i32 31 536 %trunc0 = trunc i8 %x0 to i4 537 %trunc1 = trunc i8 %x1 to i4 538 %trunc2 = trunc i8 %x2 to i4 539 %trunc3 = trunc i8 %x3 to i4 540 %trunc4 = trunc i8 %x4 to i4 541 %trunc5 = trunc i8 %x5 to i4 542 %trunc6 = trunc i8 %x6 to i4 543 %trunc7 = trunc i8 %x7 to i4 544 %trunc8 = trunc i8 %x8 to i4 545 %trunc9 = trunc i8 %x9 to i4 546 %trunc10 = trunc i8 %x10 to i4 547 %trunc11 = trunc i8 %x11 to i4 548 %trunc12 = trunc i8 %x12 to i4 549 %trunc13 = trunc i8 %x13 to i4 550 %trunc14 = trunc i8 %x14 to i4 551 %trunc15 = trunc i8 %x15 to i4 552 %trunc16 = trunc i8 %x16 to i4 553 %trunc17 = trunc i8 %x17 to i4 554 %trunc18 = trunc i8 %x18 to i4 555 %trunc19 = trunc i8 %x19 to i4 556 %trunc20 = trunc i8 %x20 to i4 557 %trunc21 = trunc i8 %x21 to i4 558 %trunc22 = trunc i8 %x22 to i4 559 %trunc23 = trunc i8 %x23 to i4 560 %trunc24 = trunc i8 %x24 to i4 561 %trunc25 = trunc i8 %x25 to i4 562 %trunc26 = trunc i8 %x26 to i4 563 %trunc27 = trunc i8 %x27 to i4 564 %trunc28 = trunc i8 %x28 to i4 565 %trunc29 = trunc i8 %x29 to i4 566 %trunc30 = trunc i8 %x30 to i4 567 %trunc31 = trunc i8 %x31 to i4 568 %ext0 = zext i4 %trunc0 to i8 569 %ext1 = zext i4 %trunc1 to i8 570 %ext2 = zext i4 %trunc2 to i8 571 %ext3 = zext i4 %trunc3 to i8 572 %ext4 = zext i4 %trunc4 to i8 573 %ext5 = zext i4 %trunc5 to i8 574 %ext6 = zext i4 %trunc6 to i8 575 %ext7 = zext i4 %trunc7 to i8 576 %ext8 = zext i4 %trunc8 to i8 577 %ext9 = zext i4 %trunc9 to i8 578 %ext10 = zext i4 %trunc10 to i8 579 %ext11 = zext i4 %trunc11 to i8 580 %ext12 = zext i4 %trunc12 to i8 581 %ext13 = zext i4 %trunc13 to i8 582 %ext14 = zext i4 %trunc14 to i8 583 %ext15 = zext i4 %trunc15 to i8 584 %ext16 = zext i4 %trunc16 to i8 585 %ext17 = zext i4 %trunc17 to i8 586 %ext18 = zext i4 %trunc18 to i8 587 %ext19 = zext i4 %trunc19 to i8 588 %ext20 = zext i4 %trunc20 to i8 589 %ext21 = zext i4 %trunc21 to i8 590 %ext22 = zext i4 %trunc22 to i8 591 %ext23 = zext i4 %trunc23 to i8 592 %ext24 = zext i4 %trunc24 to i8 593 %ext25 = zext i4 %trunc25 to i8 594 %ext26 = zext i4 %trunc26 to i8 595 %ext27 = zext i4 %trunc27 to i8 596 %ext28 = zext i4 %trunc28 to i8 597 %ext29 = zext i4 %trunc29 to i8 598 %ext30 = zext i4 %trunc30 to i8 599 %ext31 = zext i4 %trunc31 to i8 600 %v0 = insertelement <32 x i8> undef, i8 %ext0, i32 0 601 %v1 = insertelement <32 x i8> %v0, i8 %ext1, i32 1 602 %v2 = insertelement <32 x i8> %v1, i8 %ext2, i32 2 603 %v3 = insertelement <32 x i8> %v2, i8 %ext3, i32 3 604 %v4 = insertelement <32 x i8> %v3, i8 %ext4, i32 4 605 %v5 = insertelement <32 x i8> %v4, i8 %ext5, i32 5 606 %v6 = insertelement <32 x i8> %v5, i8 %ext6, i32 6 607 %v7 = insertelement <32 x i8> %v6, i8 %ext7, i32 7 608 %v8 = insertelement <32 x i8> %v7, i8 %ext8, i32 8 609 %v9 = insertelement <32 x i8> %v8, i8 %ext9, i32 9 610 %v10 = insertelement <32 x i8> %v9, i8 %ext10, i32 10 611 %v11 = insertelement <32 x i8> %v10, i8 %ext11, i32 11 612 %v12 = insertelement <32 x i8> %v11, i8 %ext12, i32 12 613 %v13 = insertelement <32 x i8> %v12, i8 %ext13, i32 13 614 %v14 = insertelement <32 x i8> %v13, i8 %ext14, i32 14 615 %v15 = insertelement <32 x i8> %v14, i8 %ext15, i32 15 616 %v16 = insertelement <32 x i8> %v15, i8 %ext16, i32 16 617 %v17 = insertelement <32 x i8> %v16, i8 %ext17, i32 17 618 %v18 = insertelement <32 x i8> %v17, i8 %ext18, i32 18 619 %v19 = insertelement <32 x i8> %v18, i8 %ext19, i32 19 620 %v20 = insertelement <32 x i8> %v19, i8 %ext20, i32 20 621 %v21 = insertelement <32 x i8> %v20, i8 %ext21, i32 21 622 %v22 = insertelement <32 x i8> %v21, i8 %ext22, i32 22 623 %v23 = insertelement <32 x i8> %v22, i8 %ext23, i32 23 624 %v24 = insertelement <32 x i8> %v23, i8 %ext24, i32 24 625 %v25 = insertelement <32 x i8> %v24, i8 %ext25, i32 25 626 %v26 = insertelement <32 x i8> %v25, i8 %ext26, i32 26 627 %v27 = insertelement <32 x i8> %v26, i8 %ext27, i32 27 628 %v28 = insertelement <32 x i8> %v27, i8 %ext28, i32 28 629 %v29 = insertelement <32 x i8> %v28, i8 %ext29, i32 29 630 %v30 = insertelement <32 x i8> %v29, i8 %ext30, i32 30 631 %v31 = insertelement <32 x i8> %v30, i8 %ext31, i32 31 632 ret <32 x i8> %v31 633} 634 635define <2 x i64> @_clearupper2xi64b(<2 x i64>) nounwind { 636; SSE-LABEL: _clearupper2xi64b: 637; SSE: # BB#0: 638; SSE-NEXT: andps {{.*}}(%rip), %xmm0 639; SSE-NEXT: retq 640; 641; AVX1-LABEL: _clearupper2xi64b: 642; AVX1: # BB#0: 643; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 644; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 645; AVX1-NEXT: retq 646; 647; AVX2-LABEL: _clearupper2xi64b: 648; AVX2: # BB#0: 649; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 650; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 651; AVX2-NEXT: retq 652 %x32 = bitcast <2 x i64> %0 to <4 x i32> 653 %r0 = insertelement <4 x i32> %x32, i32 zeroinitializer, i32 1 654 %r1 = insertelement <4 x i32> %r0, i32 zeroinitializer, i32 3 655 %r = bitcast <4 x i32> %r1 to <2 x i64> 656 ret <2 x i64> %r 657} 658 659define <4 x i64> @_clearupper4xi64b(<4 x i64>) nounwind { 660; SSE-LABEL: _clearupper4xi64b: 661; SSE: # BB#0: 662; SSE-NEXT: movaps {{.*#+}} xmm2 = [4294967295,0,4294967295,0] 663; SSE-NEXT: andps %xmm2, %xmm0 664; SSE-NEXT: andps %xmm2, %xmm1 665; SSE-NEXT: retq 666; 667; AVX1-LABEL: _clearupper4xi64b: 668; AVX1: # BB#0: 669; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1 670; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 671; AVX1-NEXT: retq 672; 673; AVX2-LABEL: _clearupper4xi64b: 674; AVX2: # BB#0: 675; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 676; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 677; AVX2-NEXT: retq 678 %x32 = bitcast <4 x i64> %0 to <8 x i32> 679 %r0 = insertelement <8 x i32> %x32, i32 zeroinitializer, i32 1 680 %r1 = insertelement <8 x i32> %r0, i32 zeroinitializer, i32 3 681 %r2 = insertelement <8 x i32> %r1, i32 zeroinitializer, i32 5 682 %r3 = insertelement <8 x i32> %r2, i32 zeroinitializer, i32 7 683 %r = bitcast <8 x i32> %r3 to <4 x i64> 684 ret <4 x i64> %r 685} 686 687define <4 x i32> @_clearupper4xi32b(<4 x i32>) nounwind { 688; SSE-LABEL: _clearupper4xi32b: 689; SSE: # BB#0: 690; SSE-NEXT: andps {{.*}}(%rip), %xmm0 691; SSE-NEXT: retq 692; 693; AVX-LABEL: _clearupper4xi32b: 694; AVX: # BB#0: 695; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 696; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 697; AVX-NEXT: retq 698 %x16 = bitcast <4 x i32> %0 to <8 x i16> 699 %r0 = insertelement <8 x i16> %x16, i16 zeroinitializer, i32 1 700 %r1 = insertelement <8 x i16> %r0, i16 zeroinitializer, i32 3 701 %r2 = insertelement <8 x i16> %r1, i16 zeroinitializer, i32 5 702 %r3 = insertelement <8 x i16> %r2, i16 zeroinitializer, i32 7 703 %r = bitcast <8 x i16> %r3 to <4 x i32> 704 ret <4 x i32> %r 705} 706 707define <8 x i32> @_clearupper8xi32b(<8 x i32>) nounwind { 708; SSE-LABEL: _clearupper8xi32b: 709; SSE: # BB#0: 710; SSE-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0] 711; SSE-NEXT: andps %xmm2, %xmm0 712; SSE-NEXT: andps %xmm2, %xmm1 713; SSE-NEXT: retq 714; 715; AVX1-LABEL: _clearupper8xi32b: 716; AVX1: # BB#0: 717; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 718; AVX1-NEXT: retq 719; 720; AVX2-LABEL: _clearupper8xi32b: 721; AVX2: # BB#0: 722; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 723; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 724; AVX2-NEXT: retq 725 %x16 = bitcast <8 x i32> %0 to <16 x i16> 726 %r0 = insertelement <16 x i16> %x16, i16 zeroinitializer, i32 1 727 %r1 = insertelement <16 x i16> %r0, i16 zeroinitializer, i32 3 728 %r2 = insertelement <16 x i16> %r1, i16 zeroinitializer, i32 5 729 %r3 = insertelement <16 x i16> %r2, i16 zeroinitializer, i32 7 730 %r4 = insertelement <16 x i16> %r3, i16 zeroinitializer, i32 9 731 %r5 = insertelement <16 x i16> %r4, i16 zeroinitializer, i32 11 732 %r6 = insertelement <16 x i16> %r5, i16 zeroinitializer, i32 13 733 %r7 = insertelement <16 x i16> %r6, i16 zeroinitializer, i32 15 734 %r = bitcast <16 x i16> %r7 to <8 x i32> 735 ret <8 x i32> %r 736} 737 738define <8 x i16> @_clearupper8xi16b(<8 x i16>) nounwind { 739; SSE-LABEL: _clearupper8xi16b: 740; SSE: # BB#0: 741; SSE-NEXT: andps {{.*}}(%rip), %xmm0 742; SSE-NEXT: retq 743; 744; AVX-LABEL: _clearupper8xi16b: 745; AVX: # BB#0: 746; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 747; AVX-NEXT: retq 748 %x8 = bitcast <8 x i16> %0 to <16 x i8> 749 %r0 = insertelement <16 x i8> %x8, i8 zeroinitializer, i32 1 750 %r1 = insertelement <16 x i8> %r0, i8 zeroinitializer, i32 3 751 %r2 = insertelement <16 x i8> %r1, i8 zeroinitializer, i32 5 752 %r3 = insertelement <16 x i8> %r2, i8 zeroinitializer, i32 7 753 %r4 = insertelement <16 x i8> %r3, i8 zeroinitializer, i32 9 754 %r5 = insertelement <16 x i8> %r4, i8 zeroinitializer, i32 11 755 %r6 = insertelement <16 x i8> %r5, i8 zeroinitializer, i32 13 756 %r7 = insertelement <16 x i8> %r6, i8 zeroinitializer, i32 15 757 %r = bitcast <16 x i8> %r7 to <8 x i16> 758 ret <8 x i16> %r 759} 760 761define <16 x i16> @_clearupper16xi16b(<16 x i16>) nounwind { 762; SSE-LABEL: _clearupper16xi16b: 763; SSE: # BB#0: 764; SSE-NEXT: movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 765; SSE-NEXT: andps %xmm2, %xmm0 766; SSE-NEXT: andps %xmm2, %xmm1 767; SSE-NEXT: retq 768; 769; AVX1-LABEL: _clearupper16xi16b: 770; AVX1: # BB#0: 771; AVX1-NEXT: vmovaps {{.*#+}} xmm1 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 772; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm2 773; AVX1-NEXT: vblendps {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm0[4,5,6,7] 774; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 775; AVX1-NEXT: vandps %xmm1, %xmm0, %xmm0 776; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm2, %ymm0 777; AVX1-NEXT: retq 778; 779; AVX2-LABEL: _clearupper16xi16b: 780; AVX2: # BB#0: 781; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 782; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm2 783; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0,1,2,3],ymm0[4,5,6,7] 784; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 785; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 786; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm2, %ymm0 787; AVX2-NEXT: retq 788 %x8 = bitcast <16 x i16> %0 to <32 x i8> 789 %r0 = insertelement <32 x i8> %x8, i8 zeroinitializer, i32 1 790 %r1 = insertelement <32 x i8> %r0, i8 zeroinitializer, i32 3 791 %r2 = insertelement <32 x i8> %r1, i8 zeroinitializer, i32 5 792 %r3 = insertelement <32 x i8> %r2, i8 zeroinitializer, i32 7 793 %r4 = insertelement <32 x i8> %r3, i8 zeroinitializer, i32 9 794 %r5 = insertelement <32 x i8> %r4, i8 zeroinitializer, i32 11 795 %r6 = insertelement <32 x i8> %r5, i8 zeroinitializer, i32 13 796 %r7 = insertelement <32 x i8> %r6, i8 zeroinitializer, i32 15 797 %r8 = insertelement <32 x i8> %r7, i8 zeroinitializer, i32 17 798 %r9 = insertelement <32 x i8> %r8, i8 zeroinitializer, i32 19 799 %r10 = insertelement <32 x i8> %r9, i8 zeroinitializer, i32 21 800 %r11 = insertelement <32 x i8> %r10, i8 zeroinitializer, i32 23 801 %r12 = insertelement <32 x i8> %r11, i8 zeroinitializer, i32 25 802 %r13 = insertelement <32 x i8> %r12, i8 zeroinitializer, i32 27 803 %r14 = insertelement <32 x i8> %r13, i8 zeroinitializer, i32 29 804 %r15 = insertelement <32 x i8> %r14, i8 zeroinitializer, i32 31 805 %r = bitcast <32 x i8> %r15 to <16 x i16> 806 ret <16 x i16> %r 807} 808 809define <16 x i8> @_clearupper16xi8b(<16 x i8>) nounwind { 810; SSE-LABEL: _clearupper16xi8b: 811; SSE: # BB#0: 812; SSE-NEXT: pushq %r14 813; SSE-NEXT: pushq %rbx 814; SSE-NEXT: pshufd {{.*#+}} xmm1 = xmm0[2,3,0,1] 815; SSE-NEXT: movq %xmm0, %rcx 816; SSE-NEXT: movq %rcx, %r8 817; SSE-NEXT: movq %rcx, %r9 818; SSE-NEXT: movq %rcx, %r10 819; SSE-NEXT: movq %rcx, %rax 820; SSE-NEXT: movq %rcx, %rdx 821; SSE-NEXT: movq %rcx, %rsi 822; SSE-NEXT: movq %rcx, %rdi 823; SSE-NEXT: andb $15, %cl 824; SSE-NEXT: movb %cl, -{{[0-9]+}}(%rsp) 825; SSE-NEXT: movq %xmm1, %rcx 826; SSE-NEXT: shrq $56, %rdi 827; SSE-NEXT: andb $15, %dil 828; SSE-NEXT: movb %dil, -{{[0-9]+}}(%rsp) 829; SSE-NEXT: movq %rcx, %r11 830; SSE-NEXT: shrq $48, %rsi 831; SSE-NEXT: andb $15, %sil 832; SSE-NEXT: movb %sil, -{{[0-9]+}}(%rsp) 833; SSE-NEXT: movq %rcx, %r14 834; SSE-NEXT: shrq $40, %rdx 835; SSE-NEXT: andb $15, %dl 836; SSE-NEXT: movb %dl, -{{[0-9]+}}(%rsp) 837; SSE-NEXT: movq %rcx, %rdx 838; SSE-NEXT: shrq $32, %rax 839; SSE-NEXT: andb $15, %al 840; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) 841; SSE-NEXT: movq %rcx, %rax 842; SSE-NEXT: shrq $24, %r10 843; SSE-NEXT: andb $15, %r10b 844; SSE-NEXT: movb %r10b, -{{[0-9]+}}(%rsp) 845; SSE-NEXT: movq %rcx, %rdi 846; SSE-NEXT: shrq $16, %r9 847; SSE-NEXT: andb $15, %r9b 848; SSE-NEXT: movb %r9b, -{{[0-9]+}}(%rsp) 849; SSE-NEXT: movq %rcx, %rsi 850; SSE-NEXT: shrq $8, %r8 851; SSE-NEXT: andb $15, %r8b 852; SSE-NEXT: movb %r8b, -{{[0-9]+}}(%rsp) 853; SSE-NEXT: movq %rcx, %rbx 854; SSE-NEXT: movb $0, -{{[0-9]+}}(%rsp) 855; SSE-NEXT: andb $15, %cl 856; SSE-NEXT: movb %cl, -{{[0-9]+}}(%rsp) 857; SSE-NEXT: shrq $56, %rbx 858; SSE-NEXT: andb $15, %bl 859; SSE-NEXT: movb %bl, -{{[0-9]+}}(%rsp) 860; SSE-NEXT: shrq $48, %rsi 861; SSE-NEXT: andb $15, %sil 862; SSE-NEXT: movb %sil, -{{[0-9]+}}(%rsp) 863; SSE-NEXT: shrq $40, %rdi 864; SSE-NEXT: andb $15, %dil 865; SSE-NEXT: movb %dil, -{{[0-9]+}}(%rsp) 866; SSE-NEXT: shrq $32, %rax 867; SSE-NEXT: andb $15, %al 868; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) 869; SSE-NEXT: shrq $24, %rdx 870; SSE-NEXT: andb $15, %dl 871; SSE-NEXT: movb %dl, -{{[0-9]+}}(%rsp) 872; SSE-NEXT: shrq $16, %r14 873; SSE-NEXT: andb $15, %r14b 874; SSE-NEXT: movb %r14b, -{{[0-9]+}}(%rsp) 875; SSE-NEXT: shrq $8, %r11 876; SSE-NEXT: andb $15, %r11b 877; SSE-NEXT: movb %r11b, -{{[0-9]+}}(%rsp) 878; SSE-NEXT: movb $0, -{{[0-9]+}}(%rsp) 879; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 880; SSE-NEXT: movq {{.*#+}} xmm1 = mem[0],zero 881; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 882; SSE-NEXT: popq %rbx 883; SSE-NEXT: popq %r14 884; SSE-NEXT: retq 885; 886; AVX-LABEL: _clearupper16xi8b: 887; AVX: # BB#0: 888; AVX-NEXT: pushq %rbp 889; AVX-NEXT: pushq %r15 890; AVX-NEXT: pushq %r14 891; AVX-NEXT: pushq %r13 892; AVX-NEXT: pushq %r12 893; AVX-NEXT: pushq %rbx 894; AVX-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 895; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rcx 896; AVX-NEXT: movq -{{[0-9]+}}(%rsp), %rdx 897; AVX-NEXT: movq %rcx, %r8 898; AVX-NEXT: movq %rcx, %r9 899; AVX-NEXT: movq %rcx, %r10 900; AVX-NEXT: movq %rcx, %r11 901; AVX-NEXT: movq %rcx, %r14 902; AVX-NEXT: movq %rcx, %r15 903; AVX-NEXT: movq %rdx, %r12 904; AVX-NEXT: movq %rdx, %r13 905; AVX-NEXT: movq %rdx, %rdi 906; AVX-NEXT: movq %rdx, %rax 907; AVX-NEXT: movq %rdx, %rsi 908; AVX-NEXT: movq %rdx, %rbx 909; AVX-NEXT: movq %rdx, %rbp 910; AVX-NEXT: andb $15, %dl 911; AVX-NEXT: movb %dl, -{{[0-9]+}}(%rsp) 912; AVX-NEXT: movq %rcx, %rdx 913; AVX-NEXT: andb $15, %cl 914; AVX-NEXT: movb %cl, -{{[0-9]+}}(%rsp) 915; AVX-NEXT: shrq $56, %rbp 916; AVX-NEXT: andb $15, %bpl 917; AVX-NEXT: movb %bpl, -{{[0-9]+}}(%rsp) 918; AVX-NEXT: shrq $48, %rbx 919; AVX-NEXT: andb $15, %bl 920; AVX-NEXT: movb %bl, -{{[0-9]+}}(%rsp) 921; AVX-NEXT: shrq $40, %rsi 922; AVX-NEXT: andb $15, %sil 923; AVX-NEXT: movb %sil, -{{[0-9]+}}(%rsp) 924; AVX-NEXT: shrq $32, %rax 925; AVX-NEXT: andb $15, %al 926; AVX-NEXT: movb %al, -{{[0-9]+}}(%rsp) 927; AVX-NEXT: shrq $24, %rdi 928; AVX-NEXT: andb $15, %dil 929; AVX-NEXT: movb %dil, -{{[0-9]+}}(%rsp) 930; AVX-NEXT: shrq $16, %r13 931; AVX-NEXT: andb $15, %r13b 932; AVX-NEXT: movb %r13b, -{{[0-9]+}}(%rsp) 933; AVX-NEXT: shrq $8, %r12 934; AVX-NEXT: andb $15, %r12b 935; AVX-NEXT: movb %r12b, -{{[0-9]+}}(%rsp) 936; AVX-NEXT: shrq $56, %rdx 937; AVX-NEXT: andb $15, %dl 938; AVX-NEXT: movb %dl, -{{[0-9]+}}(%rsp) 939; AVX-NEXT: shrq $48, %r15 940; AVX-NEXT: andb $15, %r15b 941; AVX-NEXT: movb %r15b, -{{[0-9]+}}(%rsp) 942; AVX-NEXT: shrq $40, %r14 943; AVX-NEXT: andb $15, %r14b 944; AVX-NEXT: movb %r14b, -{{[0-9]+}}(%rsp) 945; AVX-NEXT: shrq $32, %r11 946; AVX-NEXT: andb $15, %r11b 947; AVX-NEXT: movb %r11b, -{{[0-9]+}}(%rsp) 948; AVX-NEXT: shrq $24, %r10 949; AVX-NEXT: andb $15, %r10b 950; AVX-NEXT: movb %r10b, -{{[0-9]+}}(%rsp) 951; AVX-NEXT: shrq $16, %r9 952; AVX-NEXT: andb $15, %r9b 953; AVX-NEXT: movb %r9b, -{{[0-9]+}}(%rsp) 954; AVX-NEXT: shrq $8, %r8 955; AVX-NEXT: andb $15, %r8b 956; AVX-NEXT: movb %r8b, -{{[0-9]+}}(%rsp) 957; AVX-NEXT: movb $0, -{{[0-9]+}}(%rsp) 958; AVX-NEXT: vmovaps -{{[0-9]+}}(%rsp), %xmm0 959; AVX-NEXT: popq %rbx 960; AVX-NEXT: popq %r12 961; AVX-NEXT: popq %r13 962; AVX-NEXT: popq %r14 963; AVX-NEXT: popq %r15 964; AVX-NEXT: popq %rbp 965; AVX-NEXT: retq 966 %x4 = bitcast <16 x i8> %0 to <32 x i4> 967 %r0 = insertelement <32 x i4> %x4, i4 zeroinitializer, i32 1 968 %r1 = insertelement <32 x i4> %r0, i4 zeroinitializer, i32 3 969 %r2 = insertelement <32 x i4> %r1, i4 zeroinitializer, i32 5 970 %r3 = insertelement <32 x i4> %r2, i4 zeroinitializer, i32 7 971 %r4 = insertelement <32 x i4> %r3, i4 zeroinitializer, i32 9 972 %r5 = insertelement <32 x i4> %r4, i4 zeroinitializer, i32 11 973 %r6 = insertelement <32 x i4> %r5, i4 zeroinitializer, i32 13 974 %r7 = insertelement <32 x i4> %r6, i4 zeroinitializer, i32 15 975 %r8 = insertelement <32 x i4> %r7, i4 zeroinitializer, i32 17 976 %r9 = insertelement <32 x i4> %r8, i4 zeroinitializer, i32 19 977 %r10 = insertelement <32 x i4> %r9, i4 zeroinitializer, i32 21 978 %r11 = insertelement <32 x i4> %r10, i4 zeroinitializer, i32 23 979 %r12 = insertelement <32 x i4> %r11, i4 zeroinitializer, i32 25 980 %r13 = insertelement <32 x i4> %r12, i4 zeroinitializer, i32 27 981 %r14 = insertelement <32 x i4> %r13, i4 zeroinitializer, i32 29 982 %r15 = insertelement <32 x i4> %r14, i4 zeroinitializer, i32 31 983 %r = bitcast <32 x i4> %r15 to <16 x i8> 984 ret <16 x i8> %r 985} 986 987define <32 x i8> @_clearupper32xi8b(<32 x i8>) nounwind { 988; SSE-LABEL: _clearupper32xi8b: 989; SSE: # BB#0: 990; SSE-NEXT: pushq %r14 991; SSE-NEXT: pushq %rbx 992; SSE-NEXT: pshufd {{.*#+}} xmm2 = xmm0[2,3,0,1] 993; SSE-NEXT: movq %xmm0, %rcx 994; SSE-NEXT: movq %rcx, %r8 995; SSE-NEXT: movq %rcx, %r9 996; SSE-NEXT: movq %rcx, %r10 997; SSE-NEXT: movq %rcx, %rax 998; SSE-NEXT: movq %rcx, %rdx 999; SSE-NEXT: movq %rcx, %rsi 1000; SSE-NEXT: movq %rcx, %rdi 1001; SSE-NEXT: andb $15, %cl 1002; SSE-NEXT: movb %cl, -{{[0-9]+}}(%rsp) 1003; SSE-NEXT: movq %xmm2, %rcx 1004; SSE-NEXT: shrq $56, %rdi 1005; SSE-NEXT: andb $15, %dil 1006; SSE-NEXT: movb %dil, -{{[0-9]+}}(%rsp) 1007; SSE-NEXT: movq %rcx, %r11 1008; SSE-NEXT: shrq $48, %rsi 1009; SSE-NEXT: andb $15, %sil 1010; SSE-NEXT: movb %sil, -{{[0-9]+}}(%rsp) 1011; SSE-NEXT: movq %rcx, %r14 1012; SSE-NEXT: shrq $40, %rdx 1013; SSE-NEXT: andb $15, %dl 1014; SSE-NEXT: movb %dl, -{{[0-9]+}}(%rsp) 1015; SSE-NEXT: movq %rcx, %rdx 1016; SSE-NEXT: shrq $32, %rax 1017; SSE-NEXT: andb $15, %al 1018; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) 1019; SSE-NEXT: movq %rcx, %rax 1020; SSE-NEXT: shrq $24, %r10 1021; SSE-NEXT: andb $15, %r10b 1022; SSE-NEXT: movb %r10b, -{{[0-9]+}}(%rsp) 1023; SSE-NEXT: movq %rcx, %rdi 1024; SSE-NEXT: shrq $16, %r9 1025; SSE-NEXT: andb $15, %r9b 1026; SSE-NEXT: movb %r9b, -{{[0-9]+}}(%rsp) 1027; SSE-NEXT: movq %rcx, %rsi 1028; SSE-NEXT: shrq $8, %r8 1029; SSE-NEXT: andb $15, %r8b 1030; SSE-NEXT: movb %r8b, -{{[0-9]+}}(%rsp) 1031; SSE-NEXT: movq %rcx, %rbx 1032; SSE-NEXT: movb $0, -{{[0-9]+}}(%rsp) 1033; SSE-NEXT: andb $15, %cl 1034; SSE-NEXT: movb %cl, -{{[0-9]+}}(%rsp) 1035; SSE-NEXT: shrq $56, %rbx 1036; SSE-NEXT: andb $15, %bl 1037; SSE-NEXT: movb %bl, -{{[0-9]+}}(%rsp) 1038; SSE-NEXT: shrq $48, %rsi 1039; SSE-NEXT: andb $15, %sil 1040; SSE-NEXT: movb %sil, -{{[0-9]+}}(%rsp) 1041; SSE-NEXT: shrq $40, %rdi 1042; SSE-NEXT: andb $15, %dil 1043; SSE-NEXT: movb %dil, -{{[0-9]+}}(%rsp) 1044; SSE-NEXT: shrq $32, %rax 1045; SSE-NEXT: andb $15, %al 1046; SSE-NEXT: movb %al, -{{[0-9]+}}(%rsp) 1047; SSE-NEXT: shrq $24, %rdx 1048; SSE-NEXT: andb $15, %dl 1049; SSE-NEXT: movb %dl, -{{[0-9]+}}(%rsp) 1050; SSE-NEXT: shrq $16, %r14 1051; SSE-NEXT: andb $15, %r14b 1052; SSE-NEXT: movb %r14b, -{{[0-9]+}}(%rsp) 1053; SSE-NEXT: shrq $8, %r11 1054; SSE-NEXT: andb $15, %r11b 1055; SSE-NEXT: movb %r11b, -{{[0-9]+}}(%rsp) 1056; SSE-NEXT: movb $0, -{{[0-9]+}}(%rsp) 1057; SSE-NEXT: movq {{.*#+}} xmm0 = mem[0],zero 1058; SSE-NEXT: movq {{.*#+}} xmm2 = mem[0],zero 1059; SSE-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm2[0] 1060; SSE-NEXT: popq %rbx 1061; SSE-NEXT: popq %r14 1062; SSE-NEXT: retq 1063; 1064; AVX1-LABEL: _clearupper32xi8b: 1065; AVX1: # BB#0: 1066; AVX1-NEXT: pushq %rbp 1067; AVX1-NEXT: pushq %r15 1068; AVX1-NEXT: pushq %r14 1069; AVX1-NEXT: pushq %r13 1070; AVX1-NEXT: pushq %r12 1071; AVX1-NEXT: pushq %rbx 1072; AVX1-NEXT: vmovq %xmm0, %rcx 1073; AVX1-NEXT: movq %rcx, %r8 1074; AVX1-NEXT: movq %rcx, %r9 1075; AVX1-NEXT: movq %rcx, %r10 1076; AVX1-NEXT: movq %rcx, %r11 1077; AVX1-NEXT: movq %rcx, %r14 1078; AVX1-NEXT: movq %rcx, %r15 1079; AVX1-NEXT: vpextrq $1, %xmm0, %rdx 1080; AVX1-NEXT: movq %rdx, %r12 1081; AVX1-NEXT: movq %rdx, %r13 1082; AVX1-NEXT: movq %rdx, %rbx 1083; AVX1-NEXT: movq %rdx, %rax 1084; AVX1-NEXT: movq %rdx, %rdi 1085; AVX1-NEXT: movq %rdx, %rsi 1086; AVX1-NEXT: movq %rdx, %rbp 1087; AVX1-NEXT: andb $15, %dl 1088; AVX1-NEXT: movb %dl, -{{[0-9]+}}(%rsp) 1089; AVX1-NEXT: movq %rcx, %rdx 1090; AVX1-NEXT: andb $15, %cl 1091; AVX1-NEXT: movb %cl, -{{[0-9]+}}(%rsp) 1092; AVX1-NEXT: shrq $56, %rbp 1093; AVX1-NEXT: andb $15, %bpl 1094; AVX1-NEXT: movb %bpl, -{{[0-9]+}}(%rsp) 1095; AVX1-NEXT: shrq $48, %rsi 1096; AVX1-NEXT: andb $15, %sil 1097; AVX1-NEXT: movb %sil, -{{[0-9]+}}(%rsp) 1098; AVX1-NEXT: shrq $40, %rdi 1099; AVX1-NEXT: andb $15, %dil 1100; AVX1-NEXT: movb %dil, -{{[0-9]+}}(%rsp) 1101; AVX1-NEXT: shrq $32, %rax 1102; AVX1-NEXT: andb $15, %al 1103; AVX1-NEXT: movb %al, -{{[0-9]+}}(%rsp) 1104; AVX1-NEXT: shrq $24, %rbx 1105; AVX1-NEXT: andb $15, %bl 1106; AVX1-NEXT: movb %bl, -{{[0-9]+}}(%rsp) 1107; AVX1-NEXT: shrq $16, %r13 1108; AVX1-NEXT: andb $15, %r13b 1109; AVX1-NEXT: movb %r13b, -{{[0-9]+}}(%rsp) 1110; AVX1-NEXT: shrq $8, %r12 1111; AVX1-NEXT: andb $15, %r12b 1112; AVX1-NEXT: movb %r12b, -{{[0-9]+}}(%rsp) 1113; AVX1-NEXT: shrq $8, %r8 1114; AVX1-NEXT: shrq $16, %r9 1115; AVX1-NEXT: shrq $24, %r10 1116; AVX1-NEXT: shrq $32, %r11 1117; AVX1-NEXT: shrq $40, %r14 1118; AVX1-NEXT: shrq $48, %r15 1119; AVX1-NEXT: shrq $56, %rdx 1120; AVX1-NEXT: andb $15, %dl 1121; AVX1-NEXT: movb %dl, -{{[0-9]+}}(%rsp) 1122; AVX1-NEXT: andb $15, %r15b 1123; AVX1-NEXT: movb %r15b, -{{[0-9]+}}(%rsp) 1124; AVX1-NEXT: andb $15, %r14b 1125; AVX1-NEXT: movb %r14b, -{{[0-9]+}}(%rsp) 1126; AVX1-NEXT: andb $15, %r11b 1127; AVX1-NEXT: movb %r11b, -{{[0-9]+}}(%rsp) 1128; AVX1-NEXT: andb $15, %r10b 1129; AVX1-NEXT: movb %r10b, -{{[0-9]+}}(%rsp) 1130; AVX1-NEXT: andb $15, %r9b 1131; AVX1-NEXT: movb %r9b, -{{[0-9]+}}(%rsp) 1132; AVX1-NEXT: andb $15, %r8b 1133; AVX1-NEXT: movb %r8b, -{{[0-9]+}}(%rsp) 1134; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 1135; AVX1-NEXT: vmovq %xmm0, %rax 1136; AVX1-NEXT: movq %rax, %rcx 1137; AVX1-NEXT: movq %rax, %rdx 1138; AVX1-NEXT: movq %rax, %rsi 1139; AVX1-NEXT: movq %rax, %rdi 1140; AVX1-NEXT: movl %eax, %ebp 1141; AVX1-NEXT: movl %eax, %ebx 1142; AVX1-NEXT: vmovd %eax, %xmm1 1143; AVX1-NEXT: shrl $8, %eax 1144; AVX1-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 1145; AVX1-NEXT: shrl $16, %ebx 1146; AVX1-NEXT: vpinsrb $2, %ebx, %xmm1, %xmm1 1147; AVX1-NEXT: shrl $24, %ebp 1148; AVX1-NEXT: vpinsrb $3, %ebp, %xmm1, %xmm1 1149; AVX1-NEXT: shrq $32, %rdi 1150; AVX1-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 1151; AVX1-NEXT: shrq $40, %rsi 1152; AVX1-NEXT: vpinsrb $5, %esi, %xmm1, %xmm1 1153; AVX1-NEXT: movb $0, -{{[0-9]+}}(%rsp) 1154; AVX1-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm2 1155; AVX1-NEXT: shrq $48, %rdx 1156; AVX1-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1 1157; AVX1-NEXT: vpextrq $1, %xmm0, %rax 1158; AVX1-NEXT: shrq $56, %rcx 1159; AVX1-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm0 1160; AVX1-NEXT: movl %eax, %ecx 1161; AVX1-NEXT: shrl $8, %ecx 1162; AVX1-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 1163; AVX1-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 1164; AVX1-NEXT: movl %eax, %ecx 1165; AVX1-NEXT: shrl $16, %ecx 1166; AVX1-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 1167; AVX1-NEXT: movl %eax, %ecx 1168; AVX1-NEXT: shrl $24, %ecx 1169; AVX1-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 1170; AVX1-NEXT: movq %rax, %rcx 1171; AVX1-NEXT: shrq $32, %rcx 1172; AVX1-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 1173; AVX1-NEXT: movq %rax, %rcx 1174; AVX1-NEXT: shrq $40, %rcx 1175; AVX1-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 1176; AVX1-NEXT: movq %rax, %rcx 1177; AVX1-NEXT: shrq $48, %rcx 1178; AVX1-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 1179; AVX1-NEXT: vmovq %xmm2, %rcx 1180; AVX1-NEXT: shrq $56, %rax 1181; AVX1-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 1182; AVX1-NEXT: movl %ecx, %eax 1183; AVX1-NEXT: shrl $8, %eax 1184; AVX1-NEXT: vmovd %ecx, %xmm1 1185; AVX1-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 1186; AVX1-NEXT: movl %ecx, %eax 1187; AVX1-NEXT: shrl $16, %eax 1188; AVX1-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 1189; AVX1-NEXT: movl %ecx, %eax 1190; AVX1-NEXT: shrl $24, %eax 1191; AVX1-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 1192; AVX1-NEXT: movq %rcx, %rax 1193; AVX1-NEXT: shrq $32, %rax 1194; AVX1-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 1195; AVX1-NEXT: movq %rcx, %rax 1196; AVX1-NEXT: shrq $40, %rax 1197; AVX1-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 1198; AVX1-NEXT: movq %rcx, %rax 1199; AVX1-NEXT: shrq $48, %rax 1200; AVX1-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 1201; AVX1-NEXT: vpextrq $1, %xmm2, %rax 1202; AVX1-NEXT: shrq $56, %rcx 1203; AVX1-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1 1204; AVX1-NEXT: movl %eax, %ecx 1205; AVX1-NEXT: shrl $8, %ecx 1206; AVX1-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 1207; AVX1-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1 1208; AVX1-NEXT: movl %eax, %ecx 1209; AVX1-NEXT: shrl $16, %ecx 1210; AVX1-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1 1211; AVX1-NEXT: movl %eax, %ecx 1212; AVX1-NEXT: shrl $24, %ecx 1213; AVX1-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1 1214; AVX1-NEXT: movq %rax, %rcx 1215; AVX1-NEXT: shrq $32, %rcx 1216; AVX1-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1 1217; AVX1-NEXT: movq %rax, %rcx 1218; AVX1-NEXT: shrq $40, %rcx 1219; AVX1-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1 1220; AVX1-NEXT: movq %rax, %rcx 1221; AVX1-NEXT: shrq $48, %rcx 1222; AVX1-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1 1223; AVX1-NEXT: shrq $56, %rax 1224; AVX1-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 1225; AVX1-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 1226; AVX1-NEXT: popq %rbx 1227; AVX1-NEXT: popq %r12 1228; AVX1-NEXT: popq %r13 1229; AVX1-NEXT: popq %r14 1230; AVX1-NEXT: popq %r15 1231; AVX1-NEXT: popq %rbp 1232; AVX1-NEXT: retq 1233; 1234; AVX2-LABEL: _clearupper32xi8b: 1235; AVX2: # BB#0: 1236; AVX2-NEXT: pushq %rbp 1237; AVX2-NEXT: pushq %r15 1238; AVX2-NEXT: pushq %r14 1239; AVX2-NEXT: pushq %r13 1240; AVX2-NEXT: pushq %r12 1241; AVX2-NEXT: pushq %rbx 1242; AVX2-NEXT: vmovq %xmm0, %rcx 1243; AVX2-NEXT: movq %rcx, %r8 1244; AVX2-NEXT: movq %rcx, %r9 1245; AVX2-NEXT: movq %rcx, %r10 1246; AVX2-NEXT: movq %rcx, %r11 1247; AVX2-NEXT: movq %rcx, %r14 1248; AVX2-NEXT: movq %rcx, %r15 1249; AVX2-NEXT: vpextrq $1, %xmm0, %rdx 1250; AVX2-NEXT: movq %rdx, %r12 1251; AVX2-NEXT: movq %rdx, %r13 1252; AVX2-NEXT: movq %rdx, %rbx 1253; AVX2-NEXT: movq %rdx, %rax 1254; AVX2-NEXT: movq %rdx, %rdi 1255; AVX2-NEXT: movq %rdx, %rsi 1256; AVX2-NEXT: movq %rdx, %rbp 1257; AVX2-NEXT: andb $15, %dl 1258; AVX2-NEXT: movb %dl, -{{[0-9]+}}(%rsp) 1259; AVX2-NEXT: movq %rcx, %rdx 1260; AVX2-NEXT: andb $15, %cl 1261; AVX2-NEXT: movb %cl, -{{[0-9]+}}(%rsp) 1262; AVX2-NEXT: shrq $56, %rbp 1263; AVX2-NEXT: andb $15, %bpl 1264; AVX2-NEXT: movb %bpl, -{{[0-9]+}}(%rsp) 1265; AVX2-NEXT: shrq $48, %rsi 1266; AVX2-NEXT: andb $15, %sil 1267; AVX2-NEXT: movb %sil, -{{[0-9]+}}(%rsp) 1268; AVX2-NEXT: shrq $40, %rdi 1269; AVX2-NEXT: andb $15, %dil 1270; AVX2-NEXT: movb %dil, -{{[0-9]+}}(%rsp) 1271; AVX2-NEXT: shrq $32, %rax 1272; AVX2-NEXT: andb $15, %al 1273; AVX2-NEXT: movb %al, -{{[0-9]+}}(%rsp) 1274; AVX2-NEXT: shrq $24, %rbx 1275; AVX2-NEXT: andb $15, %bl 1276; AVX2-NEXT: movb %bl, -{{[0-9]+}}(%rsp) 1277; AVX2-NEXT: shrq $16, %r13 1278; AVX2-NEXT: andb $15, %r13b 1279; AVX2-NEXT: movb %r13b, -{{[0-9]+}}(%rsp) 1280; AVX2-NEXT: shrq $8, %r12 1281; AVX2-NEXT: andb $15, %r12b 1282; AVX2-NEXT: movb %r12b, -{{[0-9]+}}(%rsp) 1283; AVX2-NEXT: shrq $8, %r8 1284; AVX2-NEXT: shrq $16, %r9 1285; AVX2-NEXT: shrq $24, %r10 1286; AVX2-NEXT: shrq $32, %r11 1287; AVX2-NEXT: shrq $40, %r14 1288; AVX2-NEXT: shrq $48, %r15 1289; AVX2-NEXT: shrq $56, %rdx 1290; AVX2-NEXT: andb $15, %dl 1291; AVX2-NEXT: movb %dl, -{{[0-9]+}}(%rsp) 1292; AVX2-NEXT: andb $15, %r15b 1293; AVX2-NEXT: movb %r15b, -{{[0-9]+}}(%rsp) 1294; AVX2-NEXT: andb $15, %r14b 1295; AVX2-NEXT: movb %r14b, -{{[0-9]+}}(%rsp) 1296; AVX2-NEXT: andb $15, %r11b 1297; AVX2-NEXT: movb %r11b, -{{[0-9]+}}(%rsp) 1298; AVX2-NEXT: andb $15, %r10b 1299; AVX2-NEXT: movb %r10b, -{{[0-9]+}}(%rsp) 1300; AVX2-NEXT: andb $15, %r9b 1301; AVX2-NEXT: movb %r9b, -{{[0-9]+}}(%rsp) 1302; AVX2-NEXT: andb $15, %r8b 1303; AVX2-NEXT: movb %r8b, -{{[0-9]+}}(%rsp) 1304; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm0 1305; AVX2-NEXT: vmovq %xmm0, %rax 1306; AVX2-NEXT: movq %rax, %rcx 1307; AVX2-NEXT: movq %rax, %rdx 1308; AVX2-NEXT: movq %rax, %rsi 1309; AVX2-NEXT: movq %rax, %rdi 1310; AVX2-NEXT: movl %eax, %ebp 1311; AVX2-NEXT: movl %eax, %ebx 1312; AVX2-NEXT: vmovd %eax, %xmm1 1313; AVX2-NEXT: shrl $8, %eax 1314; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 1315; AVX2-NEXT: shrl $16, %ebx 1316; AVX2-NEXT: vpinsrb $2, %ebx, %xmm1, %xmm1 1317; AVX2-NEXT: shrl $24, %ebp 1318; AVX2-NEXT: vpinsrb $3, %ebp, %xmm1, %xmm1 1319; AVX2-NEXT: shrq $32, %rdi 1320; AVX2-NEXT: vpinsrb $4, %edi, %xmm1, %xmm1 1321; AVX2-NEXT: shrq $40, %rsi 1322; AVX2-NEXT: vpinsrb $5, %esi, %xmm1, %xmm1 1323; AVX2-NEXT: movb $0, -{{[0-9]+}}(%rsp) 1324; AVX2-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm2 1325; AVX2-NEXT: shrq $48, %rdx 1326; AVX2-NEXT: vpinsrb $6, %edx, %xmm1, %xmm1 1327; AVX2-NEXT: vpextrq $1, %xmm0, %rax 1328; AVX2-NEXT: shrq $56, %rcx 1329; AVX2-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm0 1330; AVX2-NEXT: movl %eax, %ecx 1331; AVX2-NEXT: shrl $8, %ecx 1332; AVX2-NEXT: vpinsrb $8, %eax, %xmm0, %xmm0 1333; AVX2-NEXT: vpinsrb $9, %ecx, %xmm0, %xmm0 1334; AVX2-NEXT: movl %eax, %ecx 1335; AVX2-NEXT: shrl $16, %ecx 1336; AVX2-NEXT: vpinsrb $10, %ecx, %xmm0, %xmm0 1337; AVX2-NEXT: movl %eax, %ecx 1338; AVX2-NEXT: shrl $24, %ecx 1339; AVX2-NEXT: vpinsrb $11, %ecx, %xmm0, %xmm0 1340; AVX2-NEXT: movq %rax, %rcx 1341; AVX2-NEXT: shrq $32, %rcx 1342; AVX2-NEXT: vpinsrb $12, %ecx, %xmm0, %xmm0 1343; AVX2-NEXT: movq %rax, %rcx 1344; AVX2-NEXT: shrq $40, %rcx 1345; AVX2-NEXT: vpinsrb $13, %ecx, %xmm0, %xmm0 1346; AVX2-NEXT: movq %rax, %rcx 1347; AVX2-NEXT: shrq $48, %rcx 1348; AVX2-NEXT: vpinsrb $14, %ecx, %xmm0, %xmm0 1349; AVX2-NEXT: vmovq %xmm2, %rcx 1350; AVX2-NEXT: shrq $56, %rax 1351; AVX2-NEXT: vpinsrb $15, %eax, %xmm0, %xmm0 1352; AVX2-NEXT: movl %ecx, %eax 1353; AVX2-NEXT: shrl $8, %eax 1354; AVX2-NEXT: vmovd %ecx, %xmm1 1355; AVX2-NEXT: vpinsrb $1, %eax, %xmm1, %xmm1 1356; AVX2-NEXT: movl %ecx, %eax 1357; AVX2-NEXT: shrl $16, %eax 1358; AVX2-NEXT: vpinsrb $2, %eax, %xmm1, %xmm1 1359; AVX2-NEXT: movl %ecx, %eax 1360; AVX2-NEXT: shrl $24, %eax 1361; AVX2-NEXT: vpinsrb $3, %eax, %xmm1, %xmm1 1362; AVX2-NEXT: movq %rcx, %rax 1363; AVX2-NEXT: shrq $32, %rax 1364; AVX2-NEXT: vpinsrb $4, %eax, %xmm1, %xmm1 1365; AVX2-NEXT: movq %rcx, %rax 1366; AVX2-NEXT: shrq $40, %rax 1367; AVX2-NEXT: vpinsrb $5, %eax, %xmm1, %xmm1 1368; AVX2-NEXT: movq %rcx, %rax 1369; AVX2-NEXT: shrq $48, %rax 1370; AVX2-NEXT: vpinsrb $6, %eax, %xmm1, %xmm1 1371; AVX2-NEXT: vpextrq $1, %xmm2, %rax 1372; AVX2-NEXT: shrq $56, %rcx 1373; AVX2-NEXT: vpinsrb $7, %ecx, %xmm1, %xmm1 1374; AVX2-NEXT: movl %eax, %ecx 1375; AVX2-NEXT: shrl $8, %ecx 1376; AVX2-NEXT: vpinsrb $8, %eax, %xmm1, %xmm1 1377; AVX2-NEXT: vpinsrb $9, %ecx, %xmm1, %xmm1 1378; AVX2-NEXT: movl %eax, %ecx 1379; AVX2-NEXT: shrl $16, %ecx 1380; AVX2-NEXT: vpinsrb $10, %ecx, %xmm1, %xmm1 1381; AVX2-NEXT: movl %eax, %ecx 1382; AVX2-NEXT: shrl $24, %ecx 1383; AVX2-NEXT: vpinsrb $11, %ecx, %xmm1, %xmm1 1384; AVX2-NEXT: movq %rax, %rcx 1385; AVX2-NEXT: shrq $32, %rcx 1386; AVX2-NEXT: vpinsrb $12, %ecx, %xmm1, %xmm1 1387; AVX2-NEXT: movq %rax, %rcx 1388; AVX2-NEXT: shrq $40, %rcx 1389; AVX2-NEXT: vpinsrb $13, %ecx, %xmm1, %xmm1 1390; AVX2-NEXT: movq %rax, %rcx 1391; AVX2-NEXT: shrq $48, %rcx 1392; AVX2-NEXT: vpinsrb $14, %ecx, %xmm1, %xmm1 1393; AVX2-NEXT: shrq $56, %rax 1394; AVX2-NEXT: vpinsrb $15, %eax, %xmm1, %xmm1 1395; AVX2-NEXT: vinserti128 $1, %xmm0, %ymm1, %ymm0 1396; AVX2-NEXT: popq %rbx 1397; AVX2-NEXT: popq %r12 1398; AVX2-NEXT: popq %r13 1399; AVX2-NEXT: popq %r14 1400; AVX2-NEXT: popq %r15 1401; AVX2-NEXT: popq %rbp 1402; AVX2-NEXT: retq 1403 %x4 = bitcast <32 x i8> %0 to <64 x i4> 1404 %r0 = insertelement <64 x i4> %x4, i4 zeroinitializer, i32 1 1405 %r1 = insertelement <64 x i4> %r0, i4 zeroinitializer, i32 3 1406 %r2 = insertelement <64 x i4> %r1, i4 zeroinitializer, i32 5 1407 %r3 = insertelement <64 x i4> %r2, i4 zeroinitializer, i32 7 1408 %r4 = insertelement <64 x i4> %r3, i4 zeroinitializer, i32 9 1409 %r5 = insertelement <64 x i4> %r4, i4 zeroinitializer, i32 11 1410 %r6 = insertelement <64 x i4> %r5, i4 zeroinitializer, i32 13 1411 %r7 = insertelement <64 x i4> %r6, i4 zeroinitializer, i32 15 1412 %r8 = insertelement <64 x i4> %r7, i4 zeroinitializer, i32 17 1413 %r9 = insertelement <64 x i4> %r8, i4 zeroinitializer, i32 19 1414 %r10 = insertelement <64 x i4> %r9, i4 zeroinitializer, i32 21 1415 %r11 = insertelement <64 x i4> %r10, i4 zeroinitializer, i32 23 1416 %r12 = insertelement <64 x i4> %r11, i4 zeroinitializer, i32 25 1417 %r13 = insertelement <64 x i4> %r12, i4 zeroinitializer, i32 27 1418 %r14 = insertelement <64 x i4> %r13, i4 zeroinitializer, i32 29 1419 %r15 = insertelement <64 x i4> %r14, i4 zeroinitializer, i32 31 1420 %r16 = insertelement <64 x i4> %r15, i4 zeroinitializer, i32 33 1421 %r17 = insertelement <64 x i4> %r16, i4 zeroinitializer, i32 35 1422 %r18 = insertelement <64 x i4> %r17, i4 zeroinitializer, i32 37 1423 %r19 = insertelement <64 x i4> %r18, i4 zeroinitializer, i32 39 1424 %r20 = insertelement <64 x i4> %r19, i4 zeroinitializer, i32 41 1425 %r21 = insertelement <64 x i4> %r20, i4 zeroinitializer, i32 43 1426 %r22 = insertelement <64 x i4> %r21, i4 zeroinitializer, i32 45 1427 %r23 = insertelement <64 x i4> %r22, i4 zeroinitializer, i32 47 1428 %r24 = insertelement <64 x i4> %r23, i4 zeroinitializer, i32 49 1429 %r25 = insertelement <64 x i4> %r24, i4 zeroinitializer, i32 51 1430 %r26 = insertelement <64 x i4> %r25, i4 zeroinitializer, i32 53 1431 %r27 = insertelement <64 x i4> %r26, i4 zeroinitializer, i32 55 1432 %r28 = insertelement <64 x i4> %r27, i4 zeroinitializer, i32 57 1433 %r29 = insertelement <64 x i4> %r28, i4 zeroinitializer, i32 59 1434 %r30 = insertelement <64 x i4> %r29, i4 zeroinitializer, i32 61 1435 %r31 = insertelement <64 x i4> %r30, i4 zeroinitializer, i32 63 1436 %r = bitcast <64 x i4> %r15 to <32 x i8> 1437 ret <32 x i8> %r 1438} 1439 1440define <2 x i64> @_clearupper2xi64c(<2 x i64>) nounwind { 1441; SSE-LABEL: _clearupper2xi64c: 1442; SSE: # BB#0: 1443; SSE-NEXT: andps {{.*}}(%rip), %xmm0 1444; SSE-NEXT: retq 1445; 1446; AVX1-LABEL: _clearupper2xi64c: 1447; AVX1: # BB#0: 1448; AVX1-NEXT: vpxor %xmm1, %xmm1, %xmm1 1449; AVX1-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3],xmm0[4,5],xmm1[6,7] 1450; AVX1-NEXT: retq 1451; 1452; AVX2-LABEL: _clearupper2xi64c: 1453; AVX2: # BB#0: 1454; AVX2-NEXT: vpxor %xmm1, %xmm1, %xmm1 1455; AVX2-NEXT: vpblendd {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3] 1456; AVX2-NEXT: retq 1457 %r = and <2 x i64> <i64 4294967295, i64 4294967295>, %0 1458 ret <2 x i64> %r 1459} 1460 1461define <4 x i64> @_clearupper4xi64c(<4 x i64>) nounwind { 1462; SSE-LABEL: _clearupper4xi64c: 1463; SSE: # BB#0: 1464; SSE-NEXT: movaps {{.*#+}} xmm2 = [4294967295,0,4294967295,0] 1465; SSE-NEXT: andps %xmm2, %xmm0 1466; SSE-NEXT: andps %xmm2, %xmm1 1467; SSE-NEXT: retq 1468; 1469; AVX1-LABEL: _clearupper4xi64c: 1470; AVX1: # BB#0: 1471; AVX1-NEXT: vxorps %ymm1, %ymm1, %ymm1 1472; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1473; AVX1-NEXT: retq 1474; 1475; AVX2-LABEL: _clearupper4xi64c: 1476; AVX2: # BB#0: 1477; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 1478; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] 1479; AVX2-NEXT: retq 1480 %r = and <4 x i64> <i64 4294967295, i64 4294967295, i64 4294967295, i64 4294967295>, %0 1481 ret <4 x i64> %r 1482} 1483 1484define <4 x i32> @_clearupper4xi32c(<4 x i32>) nounwind { 1485; SSE-LABEL: _clearupper4xi32c: 1486; SSE: # BB#0: 1487; SSE-NEXT: andps {{.*}}(%rip), %xmm0 1488; SSE-NEXT: retq 1489; 1490; AVX-LABEL: _clearupper4xi32c: 1491; AVX: # BB#0: 1492; AVX-NEXT: vpxor %xmm1, %xmm1, %xmm1 1493; AVX-NEXT: vpblendw {{.*#+}} xmm0 = xmm0[0],xmm1[1],xmm0[2],xmm1[3],xmm0[4],xmm1[5],xmm0[6],xmm1[7] 1494; AVX-NEXT: retq 1495 %r = and <4 x i32> <i32 65535, i32 65535, i32 65535, i32 65535>, %0 1496 ret <4 x i32> %r 1497} 1498 1499define <8 x i32> @_clearupper8xi32c(<8 x i32>) nounwind { 1500; SSE-LABEL: _clearupper8xi32c: 1501; SSE: # BB#0: 1502; SSE-NEXT: movaps {{.*#+}} xmm2 = [65535,0,65535,0,65535,0,65535,0] 1503; SSE-NEXT: andps %xmm2, %xmm0 1504; SSE-NEXT: andps %xmm2, %xmm1 1505; SSE-NEXT: retq 1506; 1507; AVX1-LABEL: _clearupper8xi32c: 1508; AVX1: # BB#0: 1509; AVX1-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 1510; AVX1-NEXT: retq 1511; 1512; AVX2-LABEL: _clearupper8xi32c: 1513; AVX2: # BB#0: 1514; AVX2-NEXT: vpxor %ymm1, %ymm1, %ymm1 1515; AVX2-NEXT: vpblendw {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7],ymm0[8],ymm1[9],ymm0[10],ymm1[11],ymm0[12],ymm1[13],ymm0[14],ymm1[15] 1516; AVX2-NEXT: retq 1517 %r = and <8 x i32> <i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535, i32 65535>, %0 1518 ret <8 x i32> %r 1519} 1520 1521define <8 x i16> @_clearupper8xi16c(<8 x i16>) nounwind { 1522; SSE-LABEL: _clearupper8xi16c: 1523; SSE: # BB#0: 1524; SSE-NEXT: andps {{.*}}(%rip), %xmm0 1525; SSE-NEXT: retq 1526; 1527; AVX-LABEL: _clearupper8xi16c: 1528; AVX: # BB#0: 1529; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 1530; AVX-NEXT: retq 1531 %r = and <8 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>, %0 1532 ret <8 x i16> %r 1533} 1534 1535define <16 x i16> @_clearupper16xi16c(<16 x i16>) nounwind { 1536; SSE-LABEL: _clearupper16xi16c: 1537; SSE: # BB#0: 1538; SSE-NEXT: movaps {{.*#+}} xmm2 = [255,0,255,0,255,0,255,0,255,0,255,0,255,0,255,0] 1539; SSE-NEXT: andps %xmm2, %xmm0 1540; SSE-NEXT: andps %xmm2, %xmm1 1541; SSE-NEXT: retq 1542; 1543; AVX-LABEL: _clearupper16xi16c: 1544; AVX: # BB#0: 1545; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 1546; AVX-NEXT: retq 1547 %r = and <16 x i16> <i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255, i16 255>, %0 1548 ret <16 x i16> %r 1549} 1550 1551define <16 x i8> @_clearupper16xi8c(<16 x i8>) nounwind { 1552; SSE-LABEL: _clearupper16xi8c: 1553; SSE: # BB#0: 1554; SSE-NEXT: andps {{.*}}(%rip), %xmm0 1555; SSE-NEXT: retq 1556; 1557; AVX-LABEL: _clearupper16xi8c: 1558; AVX: # BB#0: 1559; AVX-NEXT: vandps {{.*}}(%rip), %xmm0, %xmm0 1560; AVX-NEXT: retq 1561 %r = and <16 x i8> <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>, %0 1562 ret <16 x i8> %r 1563} 1564 1565define <32 x i8> @_clearupper32xi8c(<32 x i8>) nounwind { 1566; SSE-LABEL: _clearupper32xi8c: 1567; SSE: # BB#0: 1568; SSE-NEXT: movaps {{.*#+}} xmm2 = [15,15,15,15,15,15,15,15,15,15,15,15,15,15,15,15] 1569; SSE-NEXT: andps %xmm2, %xmm0 1570; SSE-NEXT: andps %xmm2, %xmm1 1571; SSE-NEXT: retq 1572; 1573; AVX-LABEL: _clearupper32xi8c: 1574; AVX: # BB#0: 1575; AVX-NEXT: vandps {{.*}}(%rip), %ymm0, %ymm0 1576; AVX-NEXT: retq 1577 %r = and <32 x i8> <i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15, i8 15>, %0 1578 ret <32 x i8> %r 1579} 1580