1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE4 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512f | FileCheck %s --check-prefixes=AVX,AVX512 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512bw | FileCheck %s --check-prefixes=AVX,AVX512 8 9; trunc(concat(x,y)) -> pack 10 11define <8 x i16> @trunc_concat_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) nounwind { 12; SSE-LABEL: trunc_concat_packssdw_128: 13; SSE: # %bb.0: 14; SSE-NEXT: psrad $17, %xmm0 15; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 16; SSE-NEXT: packssdw %xmm1, %xmm0 17; SSE-NEXT: retq 18; 19; AVX1-LABEL: trunc_concat_packssdw_128: 20; AVX1: # %bb.0: 21; AVX1-NEXT: vpsrad $17, %xmm0, %xmm0 22; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 23; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 24; AVX1-NEXT: retq 25; 26; AVX2-LABEL: trunc_concat_packssdw_128: 27; AVX2: # %bb.0: 28; AVX2-NEXT: vpsrad $17, %xmm0, %xmm0 29; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [15,15,15,15] 30; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 31; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 32; AVX2-NEXT: retq 33; 34; AVX512-LABEL: trunc_concat_packssdw_128: 35; AVX512: # %bb.0: 36; AVX512-NEXT: vpsrad $17, %xmm0, %xmm0 37; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 38; AVX512-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 39; AVX512-NEXT: retq 40 %1 = ashr <4 x i32> %a0, <i32 17, i32 17, i32 17, i32 17> 41 %2 = and <4 x i32> %a1, <i32 15, i32 15, i32 15, i32 15> 42 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 43 %4 = trunc <8 x i32> %3 to <8 x i16> 44 ret <8 x i16> %4 45} 46 47define <8 x i16> @trunc_concat_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) nounwind { 48; SSE2-LABEL: trunc_concat_packusdw_128: 49; SSE2: # %bb.0: 50; SSE2-NEXT: psrld $17, %xmm0 51; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 52; SSE2-NEXT: packssdw %xmm1, %xmm0 53; SSE2-NEXT: retq 54; 55; SSE4-LABEL: trunc_concat_packusdw_128: 56; SSE4: # %bb.0: 57; SSE4-NEXT: psrld $17, %xmm0 58; SSE4-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 59; SSE4-NEXT: packusdw %xmm1, %xmm0 60; SSE4-NEXT: retq 61; 62; AVX1-LABEL: trunc_concat_packusdw_128: 63; AVX1: # %bb.0: 64; AVX1-NEXT: vpsrld $17, %xmm0, %xmm0 65; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 66; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 67; AVX1-NEXT: retq 68; 69; AVX2-LABEL: trunc_concat_packusdw_128: 70; AVX2: # %bb.0: 71; AVX2-NEXT: vpsrld $17, %xmm0, %xmm0 72; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [15,15,15,15] 73; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 74; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 75; AVX2-NEXT: retq 76; 77; AVX512-LABEL: trunc_concat_packusdw_128: 78; AVX512: # %bb.0: 79; AVX512-NEXT: vpsrld $17, %xmm0, %xmm0 80; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 81; AVX512-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 82; AVX512-NEXT: retq 83 %1 = lshr <4 x i32> %a0, <i32 17, i32 17, i32 17, i32 17> 84 %2 = and <4 x i32> %a1, <i32 15, i32 15, i32 15, i32 15> 85 %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 86 %4 = trunc <8 x i32> %3 to <8 x i16> 87 ret <8 x i16> %4 88} 89 90define <16 x i8> @trunc_concat_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) nounwind { 91; SSE-LABEL: trunc_concat_packsswb_128: 92; SSE: # %bb.0: 93; SSE-NEXT: psraw $15, %xmm0 94; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 95; SSE-NEXT: packsswb %xmm1, %xmm0 96; SSE-NEXT: retq 97; 98; AVX-LABEL: trunc_concat_packsswb_128: 99; AVX: # %bb.0: 100; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 101; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 102; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 103; AVX-NEXT: retq 104 %1 = ashr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 105 %2 = and <8 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 106 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 107 %4 = trunc <16 x i16> %3 to <16 x i8> 108 ret <16 x i8> %4 109} 110 111define <16 x i8> @trunc_concat_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) nounwind { 112; SSE-LABEL: trunc_concat_packuswb_128: 113; SSE: # %bb.0: 114; SSE-NEXT: psrlw $15, %xmm0 115; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 116; SSE-NEXT: packuswb %xmm1, %xmm0 117; SSE-NEXT: retq 118; 119; AVX-LABEL: trunc_concat_packuswb_128: 120; AVX: # %bb.0: 121; AVX-NEXT: vpsrlw $15, %xmm0, %xmm0 122; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 123; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 124; AVX-NEXT: retq 125 %1 = lshr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 126 %2 = and <8 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 127 %3 = shufflevector <8 x i16> %1, <8 x i16> %2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 128 %4 = trunc <16 x i16> %3 to <16 x i8> 129 ret <16 x i8> %4 130} 131 132; concat(trunc(x),trunc(y)) -> pack 133 134define <8 x i16> @concat_trunc_packssdw_128(<4 x i32> %a0, <4 x i32> %a1) nounwind { 135; SSE2-LABEL: concat_trunc_packssdw_128: 136; SSE2: # %bb.0: 137; SSE2-NEXT: psrad $17, %xmm0 138; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 139; SSE2-NEXT: packssdw %xmm0, %xmm0 140; SSE2-NEXT: packuswb %xmm1, %xmm1 141; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 142; SSE2-NEXT: retq 143; 144; SSE4-LABEL: concat_trunc_packssdw_128: 145; SSE4: # %bb.0: 146; SSE4-NEXT: psrad $17, %xmm0 147; SSE4-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 148; SSE4-NEXT: packssdw %xmm1, %xmm0 149; SSE4-NEXT: retq 150; 151; AVX1-LABEL: concat_trunc_packssdw_128: 152; AVX1: # %bb.0: 153; AVX1-NEXT: vpsrad $17, %xmm0, %xmm0 154; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 155; AVX1-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 156; AVX1-NEXT: retq 157; 158; AVX2-LABEL: concat_trunc_packssdw_128: 159; AVX2: # %bb.0: 160; AVX2-NEXT: vpsrad $17, %xmm0, %xmm0 161; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [15,15,15,15] 162; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 163; AVX2-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 164; AVX2-NEXT: retq 165; 166; AVX512-LABEL: concat_trunc_packssdw_128: 167; AVX512: # %bb.0: 168; AVX512-NEXT: vpsrad $17, %xmm0, %xmm0 169; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 170; AVX512-NEXT: vpackssdw %xmm1, %xmm0, %xmm0 171; AVX512-NEXT: retq 172 %1 = ashr <4 x i32> %a0, <i32 17, i32 17, i32 17, i32 17> 173 %2 = and <4 x i32> %a1, <i32 15, i32 15, i32 15, i32 15> 174 %3 = trunc <4 x i32> %1 to <4 x i16> 175 %4 = trunc <4 x i32> %2 to <4 x i16> 176 %5 = shufflevector <4 x i16> %3, <4 x i16> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 177 ret <8 x i16> %5 178} 179 180define <8 x i16> @concat_trunc_packusdw_128(<4 x i32> %a0, <4 x i32> %a1) nounwind { 181; SSE2-LABEL: concat_trunc_packusdw_128: 182; SSE2: # %bb.0: 183; SSE2-NEXT: psrld $17, %xmm0 184; SSE2-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 185; SSE2-NEXT: packssdw %xmm0, %xmm0 186; SSE2-NEXT: packuswb %xmm1, %xmm1 187; SSE2-NEXT: punpcklqdq {{.*#+}} xmm0 = xmm0[0],xmm1[0] 188; SSE2-NEXT: retq 189; 190; SSE4-LABEL: concat_trunc_packusdw_128: 191; SSE4: # %bb.0: 192; SSE4-NEXT: psrld $17, %xmm0 193; SSE4-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 194; SSE4-NEXT: packusdw %xmm1, %xmm0 195; SSE4-NEXT: retq 196; 197; AVX1-LABEL: concat_trunc_packusdw_128: 198; AVX1: # %bb.0: 199; AVX1-NEXT: vpsrld $17, %xmm0, %xmm0 200; AVX1-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 201; AVX1-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 202; AVX1-NEXT: retq 203; 204; AVX2-LABEL: concat_trunc_packusdw_128: 205; AVX2: # %bb.0: 206; AVX2-NEXT: vpsrld $17, %xmm0, %xmm0 207; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm2 = [15,15,15,15] 208; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 209; AVX2-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 210; AVX2-NEXT: retq 211; 212; AVX512-LABEL: concat_trunc_packusdw_128: 213; AVX512: # %bb.0: 214; AVX512-NEXT: vpsrld $17, %xmm0, %xmm0 215; AVX512-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm1, %xmm1 216; AVX512-NEXT: vpackusdw %xmm1, %xmm0, %xmm0 217; AVX512-NEXT: retq 218 %1 = lshr <4 x i32> %a0, <i32 17, i32 17, i32 17, i32 17> 219 %2 = and <4 x i32> %a1, <i32 15, i32 15, i32 15, i32 15> 220 %3 = trunc <4 x i32> %1 to <4 x i16> 221 %4 = trunc <4 x i32> %2 to <4 x i16> 222 %5 = shufflevector <4 x i16> %3, <4 x i16> %4, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 223 ret <8 x i16> %5 224} 225 226define <16 x i8> @concat_trunc_packsswb_128(<8 x i16> %a0, <8 x i16> %a1) nounwind { 227; SSE-LABEL: concat_trunc_packsswb_128: 228; SSE: # %bb.0: 229; SSE-NEXT: psraw $15, %xmm0 230; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 231; SSE-NEXT: packsswb %xmm1, %xmm0 232; SSE-NEXT: retq 233; 234; AVX-LABEL: concat_trunc_packsswb_128: 235; AVX: # %bb.0: 236; AVX-NEXT: vpsraw $15, %xmm0, %xmm0 237; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 238; AVX-NEXT: vpacksswb %xmm1, %xmm0, %xmm0 239; AVX-NEXT: retq 240 %1 = ashr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 241 %2 = and <8 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 242 %3 = trunc <8 x i16> %1 to <8 x i8> 243 %4 = trunc <8 x i16> %2 to <8 x i8> 244 %5 = shufflevector <8 x i8> %3, <8 x i8> %4, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 245 ret <16 x i8> %5 246} 247 248define <16 x i8> @concat_trunc_packuswb_128(<8 x i16> %a0, <8 x i16> %a1) nounwind { 249; SSE-LABEL: concat_trunc_packuswb_128: 250; SSE: # %bb.0: 251; SSE-NEXT: psrlw $15, %xmm0 252; SSE-NEXT: pand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1 253; SSE-NEXT: packuswb %xmm1, %xmm0 254; SSE-NEXT: retq 255; 256; AVX-LABEL: concat_trunc_packuswb_128: 257; AVX: # %bb.0: 258; AVX-NEXT: vpsrlw $15, %xmm0, %xmm0 259; AVX-NEXT: vpand {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm1 260; AVX-NEXT: vpackuswb %xmm1, %xmm0, %xmm0 261; AVX-NEXT: retq 262 %1 = lshr <8 x i16> %a0, <i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15, i16 15> 263 %2 = and <8 x i16> %a1, <i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1, i16 1> 264 %3 = trunc <8 x i16> %1 to <8 x i8> 265 %4 = trunc <8 x i16> %2 to <8 x i8> 266 %5 = shufflevector <8 x i8> %3, <8 x i8> %4, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 267 ret <16 x i8> %5 268} 269