1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-- -mattr=+sse2 | FileCheck %s --check-prefix=SSE2 3; RUN: llc < %s -mtriple=x86_64-- -mattr=+avx2 | FileCheck %s --check-prefix=AVX2 4 5define <8 x i32> @zext_and_v8i32(<8 x i16> %x, <8 x i16> %y) { 6; SSE2-LABEL: zext_and_v8i32: 7; SSE2: # %bb.0: 8; SSE2-NEXT: pand %xmm0, %xmm1 9; SSE2-NEXT: pxor %xmm2, %xmm2 10; SSE2-NEXT: movdqa %xmm1, %xmm0 11; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 12; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 13; SSE2-NEXT: retq 14; 15; AVX2-LABEL: zext_and_v8i32: 16; AVX2: # %bb.0: 17; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 18; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 19; AVX2-NEXT: retq 20 %xz = zext <8 x i16> %x to <8 x i32> 21 %yz = zext <8 x i16> %y to <8 x i32> 22 %r = and <8 x i32> %xz, %yz 23 ret <8 x i32> %r 24} 25 26define <8 x i32> @zext_or_v8i32(<8 x i16> %x, <8 x i16> %y) { 27; SSE2-LABEL: zext_or_v8i32: 28; SSE2: # %bb.0: 29; SSE2-NEXT: por %xmm0, %xmm1 30; SSE2-NEXT: pxor %xmm2, %xmm2 31; SSE2-NEXT: movdqa %xmm1, %xmm0 32; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 33; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 34; SSE2-NEXT: retq 35; 36; AVX2-LABEL: zext_or_v8i32: 37; AVX2: # %bb.0: 38; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 39; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 40; AVX2-NEXT: retq 41 %xz = zext <8 x i16> %x to <8 x i32> 42 %yz = zext <8 x i16> %y to <8 x i32> 43 %r = or <8 x i32> %xz, %yz 44 ret <8 x i32> %r 45} 46 47define <8 x i32> @zext_xor_v8i32(<8 x i16> %x, <8 x i16> %y) { 48; SSE2-LABEL: zext_xor_v8i32: 49; SSE2: # %bb.0: 50; SSE2-NEXT: pxor %xmm0, %xmm1 51; SSE2-NEXT: pxor %xmm2, %xmm2 52; SSE2-NEXT: movdqa %xmm1, %xmm0 53; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 54; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 55; SSE2-NEXT: retq 56; 57; AVX2-LABEL: zext_xor_v8i32: 58; AVX2: # %bb.0: 59; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 60; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 61; AVX2-NEXT: retq 62 %xz = zext <8 x i16> %x to <8 x i32> 63 %yz = zext <8 x i16> %y to <8 x i32> 64 %r = xor <8 x i32> %xz, %yz 65 ret <8 x i32> %r 66} 67 68define <8 x i32> @sext_and_v8i32(<8 x i16> %x, <8 x i16> %y) { 69; SSE2-LABEL: sext_and_v8i32: 70; SSE2: # %bb.0: 71; SSE2-NEXT: pand %xmm1, %xmm0 72; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 73; SSE2-NEXT: psrad $16, %xmm2 74; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 75; SSE2-NEXT: psrad $16, %xmm1 76; SSE2-NEXT: movdqa %xmm2, %xmm0 77; SSE2-NEXT: retq 78; 79; AVX2-LABEL: sext_and_v8i32: 80; AVX2: # %bb.0: 81; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 82; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 83; AVX2-NEXT: retq 84 %xs = sext <8 x i16> %x to <8 x i32> 85 %ys = sext <8 x i16> %y to <8 x i32> 86 %r = and <8 x i32> %xs, %ys 87 ret <8 x i32> %r 88} 89 90define <8 x i32> @sext_or_v8i32(<8 x i16> %x, <8 x i16> %y) { 91; SSE2-LABEL: sext_or_v8i32: 92; SSE2: # %bb.0: 93; SSE2-NEXT: por %xmm1, %xmm0 94; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 95; SSE2-NEXT: psrad $16, %xmm2 96; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 97; SSE2-NEXT: psrad $16, %xmm1 98; SSE2-NEXT: movdqa %xmm2, %xmm0 99; SSE2-NEXT: retq 100; 101; AVX2-LABEL: sext_or_v8i32: 102; AVX2: # %bb.0: 103; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 104; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 105; AVX2-NEXT: retq 106 %xs = sext <8 x i16> %x to <8 x i32> 107 %ys = sext <8 x i16> %y to <8 x i32> 108 %r = or <8 x i32> %xs, %ys 109 ret <8 x i32> %r 110} 111 112define <8 x i32> @sext_xor_v8i32(<8 x i16> %x, <8 x i16> %y) { 113; SSE2-LABEL: sext_xor_v8i32: 114; SSE2: # %bb.0: 115; SSE2-NEXT: pxor %xmm1, %xmm0 116; SSE2-NEXT: punpcklwd {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3] 117; SSE2-NEXT: psrad $16, %xmm2 118; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm0[4],xmm1[5],xmm0[5],xmm1[6],xmm0[6],xmm1[7],xmm0[7] 119; SSE2-NEXT: psrad $16, %xmm1 120; SSE2-NEXT: movdqa %xmm2, %xmm0 121; SSE2-NEXT: retq 122; 123; AVX2-LABEL: sext_xor_v8i32: 124; AVX2: # %bb.0: 125; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 126; AVX2-NEXT: vpmovsxwd %xmm0, %ymm0 127; AVX2-NEXT: retq 128 %xs = sext <8 x i16> %x to <8 x i32> 129 %ys = sext <8 x i16> %y to <8 x i32> 130 %r = xor <8 x i32> %xs, %ys 131 ret <8 x i32> %r 132} 133 134define <8 x i16> @zext_and_v8i16(<8 x i8> %x, <8 x i8> %y) { 135; SSE2-LABEL: zext_and_v8i16: 136; SSE2: # %bb.0: 137; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7] 138; SSE2-NEXT: pxor %xmm2, %xmm2 139; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 140; SSE2-NEXT: pand %xmm1, %xmm0 141; SSE2-NEXT: retq 142; 143; AVX2-LABEL: zext_and_v8i16: 144; AVX2: # %bb.0: 145; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 146; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 147; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 148; AVX2-NEXT: retq 149 %xz = zext <8 x i8> %x to <8 x i16> 150 %yz = zext <8 x i8> %y to <8 x i16> 151 %r = and <8 x i16> %xz, %yz 152 ret <8 x i16> %r 153} 154 155define <8 x i16> @zext_or_v8i16(<8 x i8> %x, <8 x i8> %y) { 156; SSE2-LABEL: zext_or_v8i16: 157; SSE2: # %bb.0: 158; SSE2-NEXT: pxor %xmm2, %xmm2 159; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 160; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 161; SSE2-NEXT: por %xmm1, %xmm0 162; SSE2-NEXT: retq 163; 164; AVX2-LABEL: zext_or_v8i16: 165; AVX2: # %bb.0: 166; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 167; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 168; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 169; AVX2-NEXT: retq 170 %xz = zext <8 x i8> %x to <8 x i16> 171 %yz = zext <8 x i8> %y to <8 x i16> 172 %r = or <8 x i16> %xz, %yz 173 ret <8 x i16> %r 174} 175 176define <8 x i16> @zext_xor_v8i16(<8 x i8> %x, <8 x i8> %y) { 177; SSE2-LABEL: zext_xor_v8i16: 178; SSE2: # %bb.0: 179; SSE2-NEXT: pxor %xmm2, %xmm2 180; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3],xmm0[4],xmm2[4],xmm0[5],xmm2[5],xmm0[6],xmm2[6],xmm0[7],xmm2[7] 181; SSE2-NEXT: punpcklbw {{.*#+}} xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1],xmm1[2],xmm2[2],xmm1[3],xmm2[3],xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 182; SSE2-NEXT: pxor %xmm1, %xmm0 183; SSE2-NEXT: retq 184; 185; AVX2-LABEL: zext_xor_v8i16: 186; AVX2: # %bb.0: 187; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 188; AVX2-NEXT: vpmovzxbw {{.*#+}} xmm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 189; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 190; AVX2-NEXT: retq 191 %xz = zext <8 x i8> %x to <8 x i16> 192 %yz = zext <8 x i8> %y to <8 x i16> 193 %r = xor <8 x i16> %xz, %yz 194 ret <8 x i16> %r 195} 196 197define <8 x i16> @sext_and_v8i16(<8 x i8> %x, <8 x i8> %y) { 198; SSE2-LABEL: sext_and_v8i16: 199; SSE2: # %bb.0: 200; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 201; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 202; SSE2-NEXT: pand %xmm2, %xmm0 203; SSE2-NEXT: psraw $8, %xmm0 204; SSE2-NEXT: retq 205; 206; AVX2-LABEL: sext_and_v8i16: 207; AVX2: # %bb.0: 208; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0 209; AVX2-NEXT: vpmovsxbw %xmm1, %xmm1 210; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 211; AVX2-NEXT: retq 212 %xs = sext <8 x i8> %x to <8 x i16> 213 %ys = sext <8 x i8> %y to <8 x i16> 214 %r = and <8 x i16> %xs, %ys 215 ret <8 x i16> %r 216} 217 218define <8 x i16> @sext_or_v8i16(<8 x i8> %x, <8 x i8> %y) { 219; SSE2-LABEL: sext_or_v8i16: 220; SSE2: # %bb.0: 221; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 222; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 223; SSE2-NEXT: por %xmm2, %xmm0 224; SSE2-NEXT: psraw $8, %xmm0 225; SSE2-NEXT: retq 226; 227; AVX2-LABEL: sext_or_v8i16: 228; AVX2: # %bb.0: 229; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0 230; AVX2-NEXT: vpmovsxbw %xmm1, %xmm1 231; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 232; AVX2-NEXT: retq 233 %xs = sext <8 x i8> %x to <8 x i16> 234 %ys = sext <8 x i8> %y to <8 x i16> 235 %r = or <8 x i16> %xs, %ys 236 ret <8 x i16> %r 237} 238 239define <8 x i16> @sext_xor_v8i16(<8 x i8> %x, <8 x i8> %y) { 240; SSE2-LABEL: sext_xor_v8i16: 241; SSE2: # %bb.0: 242; SSE2-NEXT: punpcklbw {{.*#+}} xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1],xmm2[2],xmm0[2],xmm2[3],xmm0[3],xmm2[4],xmm0[4],xmm2[5],xmm0[5],xmm2[6],xmm0[6],xmm2[7],xmm0[7] 243; SSE2-NEXT: punpcklbw {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7] 244; SSE2-NEXT: pxor %xmm2, %xmm0 245; SSE2-NEXT: psraw $8, %xmm0 246; SSE2-NEXT: retq 247; 248; AVX2-LABEL: sext_xor_v8i16: 249; AVX2: # %bb.0: 250; AVX2-NEXT: vpmovsxbw %xmm0, %xmm0 251; AVX2-NEXT: vpmovsxbw %xmm1, %xmm1 252; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 253; AVX2-NEXT: retq 254 %xs = sext <8 x i8> %x to <8 x i16> 255 %ys = sext <8 x i8> %y to <8 x i16> 256 %r = xor <8 x i16> %xs, %ys 257 ret <8 x i16> %r 258} 259 260define <8 x i32> @bool_zext_and(<8 x i1> %x, <8 x i1> %y) { 261; SSE2-LABEL: bool_zext_and: 262; SSE2: # %bb.0: 263; SSE2-NEXT: movdqa %xmm0, %xmm3 264; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4,4,5,5,6,6,7,7] 265; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 266; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 267; SSE2-NEXT: pxor %xmm4, %xmm4 268; SSE2-NEXT: movdqa %xmm1, %xmm2 269; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4],xmm4[4],xmm2[5],xmm4[5],xmm2[6],xmm4[6],xmm2[7],xmm4[7] 270; SSE2-NEXT: pand %xmm3, %xmm2 271; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm4[0],xmm1[1],xmm4[1],xmm1[2],xmm4[2],xmm1[3],xmm4[3] 272; SSE2-NEXT: pand %xmm1, %xmm0 273; SSE2-NEXT: movdqa %xmm2, %xmm1 274; SSE2-NEXT: retq 275; 276; AVX2-LABEL: bool_zext_and: 277; AVX2: # %bb.0: 278; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 279; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 280; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero 281; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 282; AVX2-NEXT: retq 283 %xz = zext <8 x i1> %x to <8 x i32> 284 %yz = zext <8 x i1> %y to <8 x i32> 285 %r = and <8 x i32> %xz, %yz 286 ret <8 x i32> %r 287} 288 289define <8 x i32> @bool_zext_or(<8 x i1> %x, <8 x i1> %y) { 290; SSE2-LABEL: bool_zext_or: 291; SSE2: # %bb.0: 292; SSE2-NEXT: por %xmm0, %xmm1 293; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 294; SSE2-NEXT: pxor %xmm2, %xmm2 295; SSE2-NEXT: movdqa %xmm1, %xmm0 296; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 297; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 298; SSE2-NEXT: retq 299; 300; AVX2-LABEL: bool_zext_or: 301; AVX2: # %bb.0: 302; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 303; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 304; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 305; AVX2-NEXT: retq 306 %xz = zext <8 x i1> %x to <8 x i32> 307 %yz = zext <8 x i1> %y to <8 x i32> 308 %r = or <8 x i32> %xz, %yz 309 ret <8 x i32> %r 310} 311 312define <8 x i32> @bool_zext_xor(<8 x i1> %x, <8 x i1> %y) { 313; SSE2-LABEL: bool_zext_xor: 314; SSE2: # %bb.0: 315; SSE2-NEXT: pxor %xmm0, %xmm1 316; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 317; SSE2-NEXT: pxor %xmm2, %xmm2 318; SSE2-NEXT: movdqa %xmm1, %xmm0 319; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0],xmm2[0],xmm0[1],xmm2[1],xmm0[2],xmm2[2],xmm0[3],xmm2[3] 320; SSE2-NEXT: punpckhwd {{.*#+}} xmm1 = xmm1[4],xmm2[4],xmm1[5],xmm2[5],xmm1[6],xmm2[6],xmm1[7],xmm2[7] 321; SSE2-NEXT: retq 322; 323; AVX2-LABEL: bool_zext_xor: 324; AVX2: # %bb.0: 325; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 326; AVX2-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 327; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 328; AVX2-NEXT: retq 329 %xz = zext <8 x i1> %x to <8 x i32> 330 %yz = zext <8 x i1> %y to <8 x i32> 331 %r = xor <8 x i32> %xz, %yz 332 ret <8 x i32> %r 333} 334 335define <8 x i32> @bool_sext_and(<8 x i1> %x, <8 x i1> %y) { 336; SSE2-LABEL: bool_sext_and: 337; SSE2: # %bb.0: 338; SSE2-NEXT: movdqa %xmm1, %xmm3 339; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4,4,5,5,6,6,7,7] 340; SSE2-NEXT: movdqa %xmm0, %xmm2 341; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7] 342; SSE2-NEXT: pand %xmm3, %xmm2 343; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 344; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 345; SSE2-NEXT: pand %xmm1, %xmm0 346; SSE2-NEXT: pslld $31, %xmm0 347; SSE2-NEXT: psrad $31, %xmm0 348; SSE2-NEXT: pslld $31, %xmm2 349; SSE2-NEXT: psrad $31, %xmm2 350; SSE2-NEXT: movdqa %xmm2, %xmm1 351; SSE2-NEXT: retq 352; 353; AVX2-LABEL: bool_sext_and: 354; AVX2: # %bb.0: 355; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 356; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 357; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 358; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 359; AVX2-NEXT: retq 360 %xs = sext <8 x i1> %x to <8 x i32> 361 %ys = sext <8 x i1> %y to <8 x i32> 362 %r = and <8 x i32> %xs, %ys 363 ret <8 x i32> %r 364} 365 366define <8 x i32> @bool_sext_or(<8 x i1> %x, <8 x i1> %y) { 367; SSE2-LABEL: bool_sext_or: 368; SSE2: # %bb.0: 369; SSE2-NEXT: movdqa %xmm1, %xmm3 370; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4,4,5,5,6,6,7,7] 371; SSE2-NEXT: movdqa %xmm0, %xmm2 372; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7] 373; SSE2-NEXT: por %xmm3, %xmm2 374; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 375; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 376; SSE2-NEXT: por %xmm1, %xmm0 377; SSE2-NEXT: pslld $31, %xmm0 378; SSE2-NEXT: psrad $31, %xmm0 379; SSE2-NEXT: pslld $31, %xmm2 380; SSE2-NEXT: psrad $31, %xmm2 381; SSE2-NEXT: movdqa %xmm2, %xmm1 382; SSE2-NEXT: retq 383; 384; AVX2-LABEL: bool_sext_or: 385; AVX2: # %bb.0: 386; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 387; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 388; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 389; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 390; AVX2-NEXT: retq 391 %xs = sext <8 x i1> %x to <8 x i32> 392 %ys = sext <8 x i1> %y to <8 x i32> 393 %r = or <8 x i32> %xs, %ys 394 ret <8 x i32> %r 395} 396 397define <8 x i32> @bool_sext_xor(<8 x i1> %x, <8 x i1> %y) { 398; SSE2-LABEL: bool_sext_xor: 399; SSE2: # %bb.0: 400; SSE2-NEXT: movdqa %xmm1, %xmm3 401; SSE2-NEXT: punpckhwd {{.*#+}} xmm3 = xmm3[4,4,5,5,6,6,7,7] 402; SSE2-NEXT: movdqa %xmm0, %xmm2 403; SSE2-NEXT: punpckhwd {{.*#+}} xmm2 = xmm2[4,4,5,5,6,6,7,7] 404; SSE2-NEXT: pxor %xmm3, %xmm2 405; SSE2-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0,0,1,1,2,2,3,3] 406; SSE2-NEXT: punpcklwd {{.*#+}} xmm0 = xmm0[0,0,1,1,2,2,3,3] 407; SSE2-NEXT: pxor %xmm1, %xmm0 408; SSE2-NEXT: pslld $31, %xmm0 409; SSE2-NEXT: psrad $31, %xmm0 410; SSE2-NEXT: pslld $31, %xmm2 411; SSE2-NEXT: psrad $31, %xmm2 412; SSE2-NEXT: movdqa %xmm2, %xmm1 413; SSE2-NEXT: retq 414; 415; AVX2-LABEL: bool_sext_xor: 416; AVX2: # %bb.0: 417; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 418; AVX2-NEXT: vpmovzxwd {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero 419; AVX2-NEXT: vpslld $31, %ymm0, %ymm0 420; AVX2-NEXT: vpsrad $31, %ymm0, %ymm0 421; AVX2-NEXT: retq 422 %xs = sext <8 x i1> %x to <8 x i32> 423 %ys = sext <8 x i1> %y to <8 x i32> 424 %r = xor <8 x i32> %xs, %ys 425 ret <8 x i32> %r 426} 427 428