1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefix=SSE2-SSSE3 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+ssse3 | FileCheck %s --check-prefix=SSE2-SSSE3 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX12,AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX12,AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512 7 8; 9; 128-bit vectors 10; 11 12define i1 @bitcast_v2i64_to_v2i1(<2 x i64> %a0) nounwind { 13; SSE2-SSSE3-LABEL: bitcast_v2i64_to_v2i1: 14; SSE2-SSSE3: # %bb.0: 15; SSE2-SSSE3-NEXT: movmskpd %xmm0, %ecx 16; SSE2-SSSE3-NEXT: movl %ecx, %eax 17; SSE2-SSSE3-NEXT: shrb %al 18; SSE2-SSSE3-NEXT: addb %cl, %al 19; SSE2-SSSE3-NEXT: retq 20; 21; AVX12-LABEL: bitcast_v2i64_to_v2i1: 22; AVX12: # %bb.0: 23; AVX12-NEXT: vmovmskpd %xmm0, %ecx 24; AVX12-NEXT: movl %ecx, %eax 25; AVX12-NEXT: shrb %al 26; AVX12-NEXT: addb %cl, %al 27; AVX12-NEXT: retq 28; 29; AVX512-LABEL: bitcast_v2i64_to_v2i1: 30; AVX512: # %bb.0: 31; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 32; AVX512-NEXT: vpcmpgtq %xmm0, %xmm1, %k0 33; AVX512-NEXT: kshiftrw $1, %k0, %k1 34; AVX512-NEXT: kmovd %k1, %ecx 35; AVX512-NEXT: kmovd %k0, %eax 36; AVX512-NEXT: addb %cl, %al 37; AVX512-NEXT: # kill: def $al killed $al killed $eax 38; AVX512-NEXT: retq 39 %1 = icmp slt <2 x i64> %a0, zeroinitializer 40 %2 = bitcast <2 x i1> %1 to <2 x i1> 41 %3 = extractelement <2 x i1> %2, i32 0 42 %4 = extractelement <2 x i1> %2, i32 1 43 %5 = add i1 %3, %4 44 ret i1 %5 45} 46 47define i2 @bitcast_v4i32_to_v2i2(<4 x i32> %a0) nounwind { 48; SSE2-SSSE3-LABEL: bitcast_v4i32_to_v2i2: 49; SSE2-SSSE3: # %bb.0: 50; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax 51; SSE2-SSSE3-NEXT: movl %eax, %ecx 52; SSE2-SSSE3-NEXT: shrb $2, %cl 53; SSE2-SSSE3-NEXT: andb $3, %al 54; SSE2-SSSE3-NEXT: addb %cl, %al 55; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 56; SSE2-SSSE3-NEXT: retq 57; 58; AVX-LABEL: bitcast_v4i32_to_v2i2: 59; AVX: # %bb.0: 60; AVX-NEXT: vmovmskps %xmm0, %eax 61; AVX-NEXT: movl %eax, %ecx 62; AVX-NEXT: shrb $2, %cl 63; AVX-NEXT: andb $3, %al 64; AVX-NEXT: addb %cl, %al 65; AVX-NEXT: # kill: def $al killed $al killed $eax 66; AVX-NEXT: retq 67 %1 = icmp slt <4 x i32> %a0, zeroinitializer 68 %2 = bitcast <4 x i1> %1 to <2 x i2> 69 %3 = extractelement <2 x i2> %2, i32 0 70 %4 = extractelement <2 x i2> %2, i32 1 71 %5 = add i2 %3, %4 72 ret i2 %5 73} 74 75define i4 @bitcast_v8i16_to_v2i4(<8 x i16> %a0) nounwind { 76; SSE2-SSSE3-LABEL: bitcast_v8i16_to_v2i4: 77; SSE2-SSSE3: # %bb.0: 78; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0 79; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 80; SSE2-SSSE3-NEXT: movl %eax, %ecx 81; SSE2-SSSE3-NEXT: shrb $4, %cl 82; SSE2-SSSE3-NEXT: andb $15, %al 83; SSE2-SSSE3-NEXT: addb %cl, %al 84; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 85; SSE2-SSSE3-NEXT: retq 86; 87; AVX12-LABEL: bitcast_v8i16_to_v2i4: 88; AVX12: # %bb.0: 89; AVX12-NEXT: vpacksswb %xmm0, %xmm0, %xmm0 90; AVX12-NEXT: vpmovmskb %xmm0, %eax 91; AVX12-NEXT: movl %eax, %ecx 92; AVX12-NEXT: shrb $4, %cl 93; AVX12-NEXT: andb $15, %al 94; AVX12-NEXT: addb %cl, %al 95; AVX12-NEXT: # kill: def $al killed $al killed $eax 96; AVX12-NEXT: retq 97; 98; AVX512-LABEL: bitcast_v8i16_to_v2i4: 99; AVX512: # %bb.0: 100; AVX512-NEXT: vpmovw2m %xmm0, %k0 101; AVX512-NEXT: kmovd %k0, %eax 102; AVX512-NEXT: movl %eax, %ecx 103; AVX512-NEXT: shrb $4, %cl 104; AVX512-NEXT: andb $15, %al 105; AVX512-NEXT: addb %cl, %al 106; AVX512-NEXT: # kill: def $al killed $al killed $eax 107; AVX512-NEXT: retq 108 %1 = icmp slt <8 x i16> %a0, zeroinitializer 109 %2 = bitcast <8 x i1> %1 to <2 x i4> 110 %3 = extractelement <2 x i4> %2, i32 0 111 %4 = extractelement <2 x i4> %2, i32 1 112 %5 = add i4 %3, %4 113 ret i4 %5 114} 115 116define i8 @bitcast_v16i8_to_v2i8(<16 x i8> %a0) nounwind { 117; SSE2-SSSE3-LABEL: bitcast_v16i8_to_v2i8: 118; SSE2-SSSE3: # %bb.0: 119; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 120; SSE2-SSSE3-NEXT: movd %eax, %xmm0 121; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 122; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 123; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al 124; SSE2-SSSE3-NEXT: retq 125; 126; AVX-LABEL: bitcast_v16i8_to_v2i8: 127; AVX: # %bb.0: 128; AVX-NEXT: vpmovmskb %xmm0, %ecx 129; AVX-NEXT: movl %ecx, %eax 130; AVX-NEXT: shrl $8, %eax 131; AVX-NEXT: addb %cl, %al 132; AVX-NEXT: # kill: def $al killed $al killed $eax 133; AVX-NEXT: retq 134 %1 = icmp slt <16 x i8> %a0, zeroinitializer 135 %2 = bitcast <16 x i1> %1 to <2 x i8> 136 %3 = extractelement <2 x i8> %2, i32 0 137 %4 = extractelement <2 x i8> %2, i32 1 138 %5 = add i8 %3, %4 139 ret i8 %5 140} 141 142; 143; 256-bit vectors 144; 145 146define i2 @bitcast_v4i64_to_v2i2(<4 x i64> %a0) nounwind { 147; SSE2-SSSE3-LABEL: bitcast_v4i64_to_v2i2: 148; SSE2-SSSE3: # %bb.0: 149; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0 150; SSE2-SSSE3-NEXT: movmskps %xmm0, %eax 151; SSE2-SSSE3-NEXT: movl %eax, %ecx 152; SSE2-SSSE3-NEXT: shrb $2, %cl 153; SSE2-SSSE3-NEXT: andb $3, %al 154; SSE2-SSSE3-NEXT: addb %cl, %al 155; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 156; SSE2-SSSE3-NEXT: retq 157; 158; AVX-LABEL: bitcast_v4i64_to_v2i2: 159; AVX: # %bb.0: 160; AVX-NEXT: vmovmskpd %ymm0, %eax 161; AVX-NEXT: movl %eax, %ecx 162; AVX-NEXT: shrb $2, %cl 163; AVX-NEXT: andb $3, %al 164; AVX-NEXT: addb %cl, %al 165; AVX-NEXT: # kill: def $al killed $al killed $eax 166; AVX-NEXT: vzeroupper 167; AVX-NEXT: retq 168 %1 = icmp slt <4 x i64> %a0, zeroinitializer 169 %2 = bitcast <4 x i1> %1 to <2 x i2> 170 %3 = extractelement <2 x i2> %2, i32 0 171 %4 = extractelement <2 x i2> %2, i32 1 172 %5 = add i2 %3, %4 173 ret i2 %5 174} 175 176define i4 @bitcast_v8i32_to_v2i4(<8 x i32> %a0) nounwind { 177; SSE2-SSSE3-LABEL: bitcast_v8i32_to_v2i4: 178; SSE2-SSSE3: # %bb.0: 179; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0 180; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0 181; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 182; SSE2-SSSE3-NEXT: movl %eax, %ecx 183; SSE2-SSSE3-NEXT: shrb $4, %cl 184; SSE2-SSSE3-NEXT: andb $15, %al 185; SSE2-SSSE3-NEXT: addb %cl, %al 186; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 187; SSE2-SSSE3-NEXT: retq 188; 189; AVX-LABEL: bitcast_v8i32_to_v2i4: 190; AVX: # %bb.0: 191; AVX-NEXT: vmovmskps %ymm0, %eax 192; AVX-NEXT: movl %eax, %ecx 193; AVX-NEXT: shrb $4, %cl 194; AVX-NEXT: andb $15, %al 195; AVX-NEXT: addb %cl, %al 196; AVX-NEXT: # kill: def $al killed $al killed $eax 197; AVX-NEXT: vzeroupper 198; AVX-NEXT: retq 199 %1 = icmp slt <8 x i32> %a0, zeroinitializer 200 %2 = bitcast <8 x i1> %1 to <2 x i4> 201 %3 = extractelement <2 x i4> %2, i32 0 202 %4 = extractelement <2 x i4> %2, i32 1 203 %5 = add i4 %3, %4 204 ret i4 %5 205} 206 207define i8 @bitcast_v16i16_to_v2i8(<16 x i16> %a0) nounwind { 208; SSE2-SSSE3-LABEL: bitcast_v16i16_to_v2i8: 209; SSE2-SSSE3: # %bb.0: 210; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0 211; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 212; SSE2-SSSE3-NEXT: movd %eax, %xmm0 213; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 214; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 215; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al 216; SSE2-SSSE3-NEXT: retq 217; 218; AVX1-LABEL: bitcast_v16i16_to_v2i8: 219; AVX1: # %bb.0: 220; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 221; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 222; AVX1-NEXT: vpmovmskb %xmm0, %ecx 223; AVX1-NEXT: movl %ecx, %eax 224; AVX1-NEXT: shrl $8, %eax 225; AVX1-NEXT: addb %cl, %al 226; AVX1-NEXT: # kill: def $al killed $al killed $eax 227; AVX1-NEXT: vzeroupper 228; AVX1-NEXT: retq 229; 230; AVX2-LABEL: bitcast_v16i16_to_v2i8: 231; AVX2: # %bb.0: 232; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 233; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 234; AVX2-NEXT: vpmovmskb %xmm0, %ecx 235; AVX2-NEXT: movl %ecx, %eax 236; AVX2-NEXT: shrl $8, %eax 237; AVX2-NEXT: addb %cl, %al 238; AVX2-NEXT: # kill: def $al killed $al killed $eax 239; AVX2-NEXT: vzeroupper 240; AVX2-NEXT: retq 241; 242; AVX512-LABEL: bitcast_v16i16_to_v2i8: 243; AVX512: # %bb.0: 244; AVX512-NEXT: vpmovw2m %ymm0, %k0 245; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) 246; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0 247; AVX512-NEXT: vmovd %xmm0, %ecx 248; AVX512-NEXT: vpextrb $1, %xmm0, %eax 249; AVX512-NEXT: addb %cl, %al 250; AVX512-NEXT: # kill: def $al killed $al killed $eax 251; AVX512-NEXT: vzeroupper 252; AVX512-NEXT: retq 253 %1 = icmp slt <16 x i16> %a0, zeroinitializer 254 %2 = bitcast <16 x i1> %1 to <2 x i8> 255 %3 = extractelement <2 x i8> %2, i32 0 256 %4 = extractelement <2 x i8> %2, i32 1 257 %5 = add i8 %3, %4 258 ret i8 %5 259} 260 261define i16 @bitcast_v32i8_to_v2i16(<32 x i8> %a0) nounwind { 262; SSE2-SSSE3-LABEL: bitcast_v32i8_to_v2i16: 263; SSE2-SSSE3: # %bb.0: 264; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %ecx 265; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 266; SSE2-SSSE3-NEXT: addl %ecx, %eax 267; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax 268; SSE2-SSSE3-NEXT: retq 269; 270; AVX1-LABEL: bitcast_v32i8_to_v2i16: 271; AVX1: # %bb.0: 272; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 273; AVX1-NEXT: vpmovmskb %xmm1, %ecx 274; AVX1-NEXT: vpmovmskb %xmm0, %eax 275; AVX1-NEXT: addl %ecx, %eax 276; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 277; AVX1-NEXT: vzeroupper 278; AVX1-NEXT: retq 279; 280; AVX2-LABEL: bitcast_v32i8_to_v2i16: 281; AVX2: # %bb.0: 282; AVX2-NEXT: vpmovmskb %ymm0, %ecx 283; AVX2-NEXT: movl %ecx, %eax 284; AVX2-NEXT: shrl $16, %eax 285; AVX2-NEXT: addl %ecx, %eax 286; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 287; AVX2-NEXT: vzeroupper 288; AVX2-NEXT: retq 289; 290; AVX512-LABEL: bitcast_v32i8_to_v2i16: 291; AVX512: # %bb.0: 292; AVX512-NEXT: vpmovmskb %ymm0, %ecx 293; AVX512-NEXT: movl %ecx, %eax 294; AVX512-NEXT: shrl $16, %eax 295; AVX512-NEXT: addl %ecx, %eax 296; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 297; AVX512-NEXT: vzeroupper 298; AVX512-NEXT: retq 299 %1 = icmp slt <32 x i8> %a0, zeroinitializer 300 %2 = bitcast <32 x i1> %1 to <2 x i16> 301 %3 = extractelement <2 x i16> %2, i32 0 302 %4 = extractelement <2 x i16> %2, i32 1 303 %5 = add i16 %3, %4 304 ret i16 %5 305} 306 307; 308; 512-bit vectors 309; 310 311define i4 @bitcast_v8i64_to_v2i4(<8 x i64> %a0) nounwind { 312; SSE2-SSSE3-LABEL: bitcast_v8i64_to_v2i4: 313; SSE2-SSSE3: # %bb.0: 314; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm2 315; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0 316; SSE2-SSSE3-NEXT: packssdw %xmm2, %xmm0 317; SSE2-SSSE3-NEXT: packsswb %xmm0, %xmm0 318; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 319; SSE2-SSSE3-NEXT: movl %eax, %ecx 320; SSE2-SSSE3-NEXT: shrb $4, %cl 321; SSE2-SSSE3-NEXT: andb $15, %al 322; SSE2-SSSE3-NEXT: addb %cl, %al 323; SSE2-SSSE3-NEXT: # kill: def $al killed $al killed $eax 324; SSE2-SSSE3-NEXT: retq 325; 326; AVX1-LABEL: bitcast_v8i64_to_v2i4: 327; AVX1: # %bb.0: 328; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 329; AVX1-NEXT: vpxor %xmm3, %xmm3, %xmm3 330; AVX1-NEXT: vpcmpgtq %xmm2, %xmm3, %xmm2 331; AVX1-NEXT: vpcmpgtq %xmm0, %xmm3, %xmm0 332; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 333; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 334; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 335; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 336; AVX1-NEXT: vmovmskps %ymm0, %eax 337; AVX1-NEXT: movl %eax, %ecx 338; AVX1-NEXT: shrb $4, %cl 339; AVX1-NEXT: andb $15, %al 340; AVX1-NEXT: addb %cl, %al 341; AVX1-NEXT: # kill: def $al killed $al killed $eax 342; AVX1-NEXT: vzeroupper 343; AVX1-NEXT: retq 344; 345; AVX2-LABEL: bitcast_v8i64_to_v2i4: 346; AVX2: # %bb.0: 347; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 348; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 349; AVX2-NEXT: vmovmskps %ymm0, %eax 350; AVX2-NEXT: movl %eax, %ecx 351; AVX2-NEXT: shrb $4, %cl 352; AVX2-NEXT: andb $15, %al 353; AVX2-NEXT: addb %cl, %al 354; AVX2-NEXT: # kill: def $al killed $al killed $eax 355; AVX2-NEXT: vzeroupper 356; AVX2-NEXT: retq 357; 358; AVX512-LABEL: bitcast_v8i64_to_v2i4: 359; AVX512: # %bb.0: 360; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 361; AVX512-NEXT: vpcmpgtq %zmm0, %zmm1, %k0 362; AVX512-NEXT: kmovd %k0, %eax 363; AVX512-NEXT: movl %eax, %ecx 364; AVX512-NEXT: shrb $4, %cl 365; AVX512-NEXT: andb $15, %al 366; AVX512-NEXT: addb %cl, %al 367; AVX512-NEXT: # kill: def $al killed $al killed $eax 368; AVX512-NEXT: vzeroupper 369; AVX512-NEXT: retq 370 %1 = icmp slt <8 x i64> %a0, zeroinitializer 371 %2 = bitcast <8 x i1> %1 to <2 x i4> 372 %3 = extractelement <2 x i4> %2, i32 0 373 %4 = extractelement <2 x i4> %2, i32 1 374 %5 = add i4 %3, %4 375 ret i4 %5 376} 377 378define i8 @bitcast_v16i32_to_v2i8(<16 x i32> %a0) nounwind { 379; SSE2-SSSE3-LABEL: bitcast_v16i32_to_v2i8: 380; SSE2-SSSE3: # %bb.0: 381; SSE2-SSSE3-NEXT: packssdw %xmm3, %xmm2 382; SSE2-SSSE3-NEXT: packssdw %xmm1, %xmm0 383; SSE2-SSSE3-NEXT: packsswb %xmm2, %xmm0 384; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 385; SSE2-SSSE3-NEXT: movd %eax, %xmm0 386; SSE2-SSSE3-NEXT: movdqa %xmm0, -{{[0-9]+}}(%rsp) 387; SSE2-SSSE3-NEXT: movzbl -{{[0-9]+}}(%rsp), %eax 388; SSE2-SSSE3-NEXT: addb -{{[0-9]+}}(%rsp), %al 389; SSE2-SSSE3-NEXT: retq 390; 391; AVX1-LABEL: bitcast_v16i32_to_v2i8: 392; AVX1: # %bb.0: 393; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 394; AVX1-NEXT: vpackssdw %xmm2, %xmm1, %xmm1 395; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm2 396; AVX1-NEXT: vpackssdw %xmm2, %xmm0, %xmm0 397; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 398; AVX1-NEXT: vpmovmskb %xmm0, %ecx 399; AVX1-NEXT: movl %ecx, %eax 400; AVX1-NEXT: shrl $8, %eax 401; AVX1-NEXT: addb %cl, %al 402; AVX1-NEXT: # kill: def $al killed $al killed $eax 403; AVX1-NEXT: vzeroupper 404; AVX1-NEXT: retq 405; 406; AVX2-LABEL: bitcast_v16i32_to_v2i8: 407; AVX2: # %bb.0: 408; AVX2-NEXT: vpxor %xmm2, %xmm2, %xmm2 409; AVX2-NEXT: vpcmpgtd %ymm1, %ymm2, %ymm1 410; AVX2-NEXT: vpcmpgtd %ymm0, %ymm2, %ymm0 411; AVX2-NEXT: vpackssdw %ymm1, %ymm0, %ymm0 412; AVX2-NEXT: vextracti128 $1, %ymm0, %xmm1 413; AVX2-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 414; AVX2-NEXT: vpshufd {{.*#+}} xmm0 = xmm0[0,2,1,3] 415; AVX2-NEXT: vpmovmskb %xmm0, %ecx 416; AVX2-NEXT: movl %ecx, %eax 417; AVX2-NEXT: shrl $8, %eax 418; AVX2-NEXT: addb %cl, %al 419; AVX2-NEXT: # kill: def $al killed $al killed $eax 420; AVX2-NEXT: vzeroupper 421; AVX2-NEXT: retq 422; 423; AVX512-LABEL: bitcast_v16i32_to_v2i8: 424; AVX512: # %bb.0: 425; AVX512-NEXT: vpxor %xmm1, %xmm1, %xmm1 426; AVX512-NEXT: vpcmpgtd %zmm0, %zmm1, %k0 427; AVX512-NEXT: kmovw %k0, -{{[0-9]+}}(%rsp) 428; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0 429; AVX512-NEXT: vmovd %xmm0, %ecx 430; AVX512-NEXT: vpextrb $1, %xmm0, %eax 431; AVX512-NEXT: addb %cl, %al 432; AVX512-NEXT: # kill: def $al killed $al killed $eax 433; AVX512-NEXT: vzeroupper 434; AVX512-NEXT: retq 435 %1 = icmp slt <16 x i32> %a0, zeroinitializer 436 %2 = bitcast <16 x i1> %1 to <2 x i8> 437 %3 = extractelement <2 x i8> %2, i32 0 438 %4 = extractelement <2 x i8> %2, i32 1 439 %5 = add i8 %3, %4 440 ret i8 %5 441} 442 443define i16 @bitcast_v32i16_to_v2i16(<32 x i16> %a0) nounwind { 444; SSE2-SSSE3-LABEL: bitcast_v32i16_to_v2i16: 445; SSE2-SSSE3: # %bb.0: 446; SSE2-SSSE3-NEXT: packsswb %xmm3, %xmm2 447; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %ecx 448; SSE2-SSSE3-NEXT: packsswb %xmm1, %xmm0 449; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 450; SSE2-SSSE3-NEXT: addl %ecx, %eax 451; SSE2-SSSE3-NEXT: # kill: def $ax killed $ax killed $eax 452; SSE2-SSSE3-NEXT: retq 453; 454; AVX1-LABEL: bitcast_v32i16_to_v2i16: 455; AVX1: # %bb.0: 456; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm2 457; AVX1-NEXT: vpacksswb %xmm2, %xmm1, %xmm1 458; AVX1-NEXT: vpmovmskb %xmm1, %ecx 459; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm1 460; AVX1-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 461; AVX1-NEXT: vpmovmskb %xmm0, %eax 462; AVX1-NEXT: addl %ecx, %eax 463; AVX1-NEXT: # kill: def $ax killed $ax killed $eax 464; AVX1-NEXT: vzeroupper 465; AVX1-NEXT: retq 466; 467; AVX2-LABEL: bitcast_v32i16_to_v2i16: 468; AVX2: # %bb.0: 469; AVX2-NEXT: vpacksswb %ymm1, %ymm0, %ymm0 470; AVX2-NEXT: vpermq {{.*#+}} ymm0 = ymm0[0,2,1,3] 471; AVX2-NEXT: vpmovmskb %ymm0, %ecx 472; AVX2-NEXT: movl %ecx, %eax 473; AVX2-NEXT: shrl $16, %eax 474; AVX2-NEXT: addl %ecx, %eax 475; AVX2-NEXT: # kill: def $ax killed $ax killed $eax 476; AVX2-NEXT: vzeroupper 477; AVX2-NEXT: retq 478; 479; AVX512-LABEL: bitcast_v32i16_to_v2i16: 480; AVX512: # %bb.0: 481; AVX512-NEXT: vpmovw2m %zmm0, %k0 482; AVX512-NEXT: kmovd %k0, -{{[0-9]+}}(%rsp) 483; AVX512-NEXT: vmovdqa -{{[0-9]+}}(%rsp), %xmm0 484; AVX512-NEXT: vmovd %xmm0, %ecx 485; AVX512-NEXT: vpextrw $1, %xmm0, %eax 486; AVX512-NEXT: addl %ecx, %eax 487; AVX512-NEXT: # kill: def $ax killed $ax killed $eax 488; AVX512-NEXT: vzeroupper 489; AVX512-NEXT: retq 490 %1 = icmp slt <32 x i16> %a0, zeroinitializer 491 %2 = bitcast <32 x i1> %1 to <2 x i16> 492 %3 = extractelement <2 x i16> %2, i32 0 493 %4 = extractelement <2 x i16> %2, i32 1 494 %5 = add i16 %3, %4 495 ret i16 %5 496} 497 498define i32 @bitcast_v64i8_to_v2i32(<64 x i8> %a0) nounwind { 499; SSE2-SSSE3-LABEL: bitcast_v64i8_to_v2i32: 500; SSE2-SSSE3: # %bb.0: 501; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 502; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %ecx 503; SSE2-SSSE3-NEXT: shll $16, %ecx 504; SSE2-SSSE3-NEXT: orl %eax, %ecx 505; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %eax 506; SSE2-SSSE3-NEXT: pmovmskb %xmm3, %edx 507; SSE2-SSSE3-NEXT: shll $16, %edx 508; SSE2-SSSE3-NEXT: orl %eax, %edx 509; SSE2-SSSE3-NEXT: shlq $32, %rdx 510; SSE2-SSSE3-NEXT: orq %rcx, %rdx 511; SSE2-SSSE3-NEXT: movq %rdx, %xmm0 512; SSE2-SSSE3-NEXT: pshufd {{.*#+}} xmm0 = xmm0[1,1,1,1] 513; SSE2-SSSE3-NEXT: movd %xmm0, %eax 514; SSE2-SSSE3-NEXT: addl %ecx, %eax 515; SSE2-SSSE3-NEXT: retq 516; 517; AVX1-LABEL: bitcast_v64i8_to_v2i32: 518; AVX1: # %bb.0: 519; AVX1-NEXT: vpmovmskb %xmm1, %eax 520; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm1 521; AVX1-NEXT: vpmovmskb %xmm1, %ecx 522; AVX1-NEXT: shll $16, %ecx 523; AVX1-NEXT: orl %eax, %ecx 524; AVX1-NEXT: vpmovmskb %xmm0, %edx 525; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 526; AVX1-NEXT: vpmovmskb %xmm0, %eax 527; AVX1-NEXT: shll $16, %eax 528; AVX1-NEXT: orl %edx, %eax 529; AVX1-NEXT: addl %ecx, %eax 530; AVX1-NEXT: vzeroupper 531; AVX1-NEXT: retq 532; 533; AVX2-LABEL: bitcast_v64i8_to_v2i32: 534; AVX2: # %bb.0: 535; AVX2-NEXT: vpmovmskb %ymm1, %ecx 536; AVX2-NEXT: vpmovmskb %ymm0, %eax 537; AVX2-NEXT: addl %ecx, %eax 538; AVX2-NEXT: vzeroupper 539; AVX2-NEXT: retq 540; 541; AVX512-LABEL: bitcast_v64i8_to_v2i32: 542; AVX512: # %bb.0: 543; AVX512-NEXT: vpmovb2m %zmm0, %k0 544; AVX512-NEXT: kmovq %k0, -{{[0-9]+}}(%rsp) 545; AVX512-NEXT: movl -{{[0-9]+}}(%rsp), %eax 546; AVX512-NEXT: addl -{{[0-9]+}}(%rsp), %eax 547; AVX512-NEXT: vzeroupper 548; AVX512-NEXT: retq 549 %1 = icmp slt <64 x i8> %a0, zeroinitializer 550 %2 = bitcast <64 x i1> %1 to <2 x i32> 551 %3 = extractelement <2 x i32> %2, i32 0 552 %4 = extractelement <2 x i32> %2, i32 1 553 %5 = add i32 %3, %4 554 ret i32 %5 555} 556 557define i64 @bitcast_v128i8_to_v2i64(<128 x i8> %a0) nounwind { 558; SSE2-SSSE3-LABEL: bitcast_v128i8_to_v2i64: 559; SSE2-SSSE3: # %bb.0: 560; SSE2-SSSE3-NEXT: pmovmskb %xmm4, %eax 561; SSE2-SSSE3-NEXT: pmovmskb %xmm5, %ecx 562; SSE2-SSSE3-NEXT: shll $16, %ecx 563; SSE2-SSSE3-NEXT: orl %eax, %ecx 564; SSE2-SSSE3-NEXT: pmovmskb %xmm6, %eax 565; SSE2-SSSE3-NEXT: pmovmskb %xmm7, %edx 566; SSE2-SSSE3-NEXT: shll $16, %edx 567; SSE2-SSSE3-NEXT: orl %eax, %edx 568; SSE2-SSSE3-NEXT: shlq $32, %rdx 569; SSE2-SSSE3-NEXT: orq %rcx, %rdx 570; SSE2-SSSE3-NEXT: pmovmskb %xmm0, %eax 571; SSE2-SSSE3-NEXT: pmovmskb %xmm1, %ecx 572; SSE2-SSSE3-NEXT: shll $16, %ecx 573; SSE2-SSSE3-NEXT: orl %eax, %ecx 574; SSE2-SSSE3-NEXT: pmovmskb %xmm2, %esi 575; SSE2-SSSE3-NEXT: pmovmskb %xmm3, %eax 576; SSE2-SSSE3-NEXT: shll $16, %eax 577; SSE2-SSSE3-NEXT: orl %esi, %eax 578; SSE2-SSSE3-NEXT: shlq $32, %rax 579; SSE2-SSSE3-NEXT: orq %rcx, %rax 580; SSE2-SSSE3-NEXT: addq %rdx, %rax 581; SSE2-SSSE3-NEXT: retq 582; 583; AVX1-LABEL: bitcast_v128i8_to_v2i64: 584; AVX1: # %bb.0: 585; AVX1-NEXT: vpmovmskb %xmm2, %eax 586; AVX1-NEXT: vextractf128 $1, %ymm2, %xmm2 587; AVX1-NEXT: vpmovmskb %xmm2, %edx 588; AVX1-NEXT: shll $16, %edx 589; AVX1-NEXT: orl %eax, %edx 590; AVX1-NEXT: vpmovmskb %xmm3, %eax 591; AVX1-NEXT: vextractf128 $1, %ymm3, %xmm2 592; AVX1-NEXT: vpmovmskb %xmm2, %ecx 593; AVX1-NEXT: shll $16, %ecx 594; AVX1-NEXT: orl %eax, %ecx 595; AVX1-NEXT: shlq $32, %rcx 596; AVX1-NEXT: orq %rdx, %rcx 597; AVX1-NEXT: vpmovmskb %xmm0, %eax 598; AVX1-NEXT: vextractf128 $1, %ymm0, %xmm0 599; AVX1-NEXT: vpmovmskb %xmm0, %edx 600; AVX1-NEXT: shll $16, %edx 601; AVX1-NEXT: orl %eax, %edx 602; AVX1-NEXT: vpmovmskb %xmm1, %esi 603; AVX1-NEXT: vextractf128 $1, %ymm1, %xmm0 604; AVX1-NEXT: vpmovmskb %xmm0, %eax 605; AVX1-NEXT: shll $16, %eax 606; AVX1-NEXT: orl %esi, %eax 607; AVX1-NEXT: shlq $32, %rax 608; AVX1-NEXT: orq %rdx, %rax 609; AVX1-NEXT: addq %rcx, %rax 610; AVX1-NEXT: vzeroupper 611; AVX1-NEXT: retq 612; 613; AVX2-LABEL: bitcast_v128i8_to_v2i64: 614; AVX2: # %bb.0: 615; AVX2-NEXT: vpmovmskb %ymm3, %eax 616; AVX2-NEXT: shlq $32, %rax 617; AVX2-NEXT: vpmovmskb %ymm2, %ecx 618; AVX2-NEXT: orq %rax, %rcx 619; AVX2-NEXT: vpmovmskb %ymm1, %edx 620; AVX2-NEXT: shlq $32, %rdx 621; AVX2-NEXT: vpmovmskb %ymm0, %eax 622; AVX2-NEXT: orq %rdx, %rax 623; AVX2-NEXT: addq %rcx, %rax 624; AVX2-NEXT: vzeroupper 625; AVX2-NEXT: retq 626; 627; AVX512-LABEL: bitcast_v128i8_to_v2i64: 628; AVX512: # %bb.0: 629; AVX512-NEXT: vpmovb2m %zmm1, %k0 630; AVX512-NEXT: kmovq %k0, %rcx 631; AVX512-NEXT: vpmovb2m %zmm0, %k0 632; AVX512-NEXT: kmovq %k0, %rax 633; AVX512-NEXT: addq %rcx, %rax 634; AVX512-NEXT: vzeroupper 635; AVX512-NEXT: retq 636 %1 = icmp slt <128 x i8> %a0, zeroinitializer 637 %2 = bitcast <128 x i1> %1 to <2 x i64> 638 %3 = extractelement <2 x i64> %2, i32 0 639 %4 = extractelement <2 x i64> %2, i32 1 640 %5 = add i64 %3, %4 641 ret i64 %5 642} 643