1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+sse2 | FileCheck %s --check-prefixes=SSE,SSE2 3; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+sse4.2 | FileCheck %s --check-prefixes=SSE,SSE42 4; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+avx | FileCheck %s --check-prefixes=AVX,AVX1 5; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+avx2 | FileCheck %s --check-prefixes=AVX,AVX2 6; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+avx2,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2 7; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+avx2,+fast-variable-perlane-shuffle | FileCheck %s --check-prefixes=AVX,AVX2 8; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+avx512f | FileCheck %s --check-prefix=AVX512 9; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+avx512f,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX512 10; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+avx512f,+fast-variable-perlane-shuffle | FileCheck %s --check-prefix=AVX512 11; RUN: llc < %s -mtriple=x86_64-pc-linux -mattr=+xop | FileCheck %s --check-prefixes=AVX,XOP 12 13define void @insert_v7i8_v2i16_2(ptr%a0, ptr%a1) nounwind { 14; SSE-LABEL: insert_v7i8_v2i16_2: 15; SSE: # %bb.0: 16; SSE-NEXT: movl (%rsi), %eax 17; SSE-NEXT: movd %eax, %xmm0 18; SSE-NEXT: movq (%rdi), %rcx 19; SSE-NEXT: movq %rcx, %xmm1 20; SSE-NEXT: punpcklwd {{.*#+}} xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 21; SSE-NEXT: shrq $48, %rcx 22; SSE-NEXT: movb %cl, 6(%rdi) 23; SSE-NEXT: shrl $16, %eax 24; SSE-NEXT: movw %ax, 4(%rdi) 25; SSE-NEXT: movd %xmm1, (%rdi) 26; SSE-NEXT: retq 27; 28; AVX-LABEL: insert_v7i8_v2i16_2: 29; AVX: # %bb.0: 30; AVX-NEXT: movl (%rsi), %eax 31; AVX-NEXT: vmovd %eax, %xmm0 32; AVX-NEXT: movq (%rdi), %rcx 33; AVX-NEXT: vmovq %rcx, %xmm1 34; AVX-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 35; AVX-NEXT: shrq $48, %rcx 36; AVX-NEXT: movb %cl, 6(%rdi) 37; AVX-NEXT: shrl $16, %eax 38; AVX-NEXT: movw %ax, 4(%rdi) 39; AVX-NEXT: vmovd %xmm0, (%rdi) 40; AVX-NEXT: retq 41; 42; AVX512-LABEL: insert_v7i8_v2i16_2: 43; AVX512: # %bb.0: 44; AVX512-NEXT: movl (%rsi), %eax 45; AVX512-NEXT: vmovd %eax, %xmm0 46; AVX512-NEXT: movq (%rdi), %rcx 47; AVX512-NEXT: vmovq %rcx, %xmm1 48; AVX512-NEXT: vpunpcklwd {{.*#+}} xmm0 = xmm1[0],xmm0[0],xmm1[1],xmm0[1],xmm1[2],xmm0[2],xmm1[3],xmm0[3] 49; AVX512-NEXT: shrq $48, %rcx 50; AVX512-NEXT: movb %cl, 6(%rdi) 51; AVX512-NEXT: shrl $16, %eax 52; AVX512-NEXT: movw %ax, 4(%rdi) 53; AVX512-NEXT: vmovd %xmm0, (%rdi) 54; AVX512-NEXT: retq 55 %1 = load <2 x i16>, ptr%a1 56 %2 = bitcast <2 x i16> %1 to <4 x i8> 57 %3 = shufflevector <4 x i8> %2, <4 x i8> undef, <7 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef> 58 %4 = load <7 x i8>, ptr%a0 59 %5 = shufflevector <7 x i8> %4, <7 x i8> %3, <7 x i32> <i32 0, i32 1, i32 7, i32 8, i32 9, i32 10, i32 6> 60 store <7 x i8> %5, ptr %a0 61 ret void 62} 63 64%struct.Mat4 = type { %struct.storage } 65%struct.storage = type { [16 x float] } 66 67define void @PR40815(ptr nocapture readonly dereferenceable(64), ptr nocapture dereferenceable(64)) { 68; SSE-LABEL: PR40815: 69; SSE: # %bb.0: 70; SSE-NEXT: movaps (%rdi), %xmm0 71; SSE-NEXT: movaps 16(%rdi), %xmm1 72; SSE-NEXT: movaps 32(%rdi), %xmm2 73; SSE-NEXT: movaps 48(%rdi), %xmm3 74; SSE-NEXT: movaps %xmm3, (%rsi) 75; SSE-NEXT: movaps %xmm2, 16(%rsi) 76; SSE-NEXT: movaps %xmm1, 32(%rsi) 77; SSE-NEXT: movaps %xmm0, 48(%rsi) 78; SSE-NEXT: retq 79; 80; AVX-LABEL: PR40815: 81; AVX: # %bb.0: 82; AVX-NEXT: vmovaps (%rdi), %xmm0 83; AVX-NEXT: vmovaps 16(%rdi), %xmm1 84; AVX-NEXT: vmovaps 32(%rdi), %xmm2 85; AVX-NEXT: vmovaps 48(%rdi), %xmm3 86; AVX-NEXT: vmovaps %xmm2, 16(%rsi) 87; AVX-NEXT: vmovaps %xmm3, (%rsi) 88; AVX-NEXT: vmovaps %xmm0, 48(%rsi) 89; AVX-NEXT: vmovaps %xmm1, 32(%rsi) 90; AVX-NEXT: retq 91; 92; AVX512-LABEL: PR40815: 93; AVX512: # %bb.0: 94; AVX512-NEXT: vmovaps 48(%rdi), %xmm0 95; AVX512-NEXT: vmovups 16(%rdi), %ymm1 96; AVX512-NEXT: vinsertf128 $1, (%rdi), %ymm1, %ymm1 97; AVX512-NEXT: vinsertf128 $1, 32(%rdi), %ymm0, %ymm0 98; AVX512-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 99; AVX512-NEXT: vmovups %zmm0, (%rsi) 100; AVX512-NEXT: vzeroupper 101; AVX512-NEXT: retq 102 %3 = load <16 x float>, ptr %0, align 64 103 %4 = shufflevector <16 x float> %3, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3> 104 %5 = getelementptr inbounds %struct.Mat4, ptr %1, i64 0, i32 0, i32 0, i64 4 105 %6 = bitcast <16 x float> %3 to <4 x i128> 106 %7 = extractelement <4 x i128> %6, i32 1 107 %8 = getelementptr inbounds %struct.Mat4, ptr %1, i64 0, i32 0, i32 0, i64 8 108 %9 = bitcast <16 x float> %3 to <4 x i128> 109 %10 = extractelement <4 x i128> %9, i32 2 110 %11 = getelementptr inbounds %struct.Mat4, ptr %1, i64 0, i32 0, i32 0, i64 12 111 %12 = bitcast <16 x float> %3 to <4 x i128> 112 %13 = extractelement <4 x i128> %12, i32 3 113 store i128 %13, ptr %1, align 16 114 store i128 %10, ptr %5, align 16 115 store i128 %7, ptr %8, align 16 116 store <4 x float> %4, ptr %11, align 16 117 ret void 118} 119 120define <16 x i32> @PR42819(ptr %a0) { 121; SSE-LABEL: PR42819: 122; SSE: # %bb.0: 123; SSE-NEXT: movdqu (%rdi), %xmm3 124; SSE-NEXT: pslldq {{.*#+}} xmm3 = zero,zero,zero,zero,xmm3[0,1,2,3,4,5,6,7,8,9,10,11] 125; SSE-NEXT: xorps %xmm0, %xmm0 126; SSE-NEXT: xorps %xmm1, %xmm1 127; SSE-NEXT: xorps %xmm2, %xmm2 128; SSE-NEXT: retq 129; 130; AVX-LABEL: PR42819: 131; AVX: # %bb.0: 132; AVX-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,0,1,2] 133; AVX-NEXT: vinsertf128 $1, %xmm0, %ymm0, %ymm0 134; AVX-NEXT: vxorps %xmm1, %xmm1, %xmm1 135; AVX-NEXT: vblendps {{.*#+}} ymm1 = ymm1[0,1,2,3,4],ymm0[5,6,7] 136; AVX-NEXT: vxorps %xmm0, %xmm0, %xmm0 137; AVX-NEXT: retq 138; 139; AVX512-LABEL: PR42819: 140; AVX512: # %bb.0: 141; AVX512-NEXT: vmovdqu (%rdi), %ymm0 142; AVX512-NEXT: movw $-8192, %ax # imm = 0xE000 143; AVX512-NEXT: kmovw %eax, %k1 144; AVX512-NEXT: vpexpandd %zmm0, %zmm0 {%k1} {z} 145; AVX512-NEXT: retq 146 %1 = load <8 x i32>, ptr %a0, align 4 147 %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 148 %3 = shufflevector <16 x i32> zeroinitializer, <16 x i32> %2, <16 x i32> <i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18> 149 ret <16 x i32> %3 150} 151 152@b = dso_local local_unnamed_addr global i32 0, align 4 153@c = dso_local local_unnamed_addr global [49 x i32] zeroinitializer, align 16 154@d = dso_local local_unnamed_addr global [49 x i32] zeroinitializer, align 16 155 156define void @PR42833() { 157; SSE2-LABEL: PR42833: 158; SSE2: # %bb.0: 159; SSE2-NEXT: movl b(%rip), %eax 160; SSE2-NEXT: movdqa c+144(%rip), %xmm0 161; SSE2-NEXT: movdqa c+128(%rip), %xmm1 162; SSE2-NEXT: addl c+128(%rip), %eax 163; SSE2-NEXT: movd %eax, %xmm2 164; SSE2-NEXT: movd %eax, %xmm3 165; SSE2-NEXT: paddd %xmm1, %xmm3 166; SSE2-NEXT: movdqa d+144(%rip), %xmm4 167; SSE2-NEXT: psubd %xmm0, %xmm4 168; SSE2-NEXT: paddd %xmm0, %xmm0 169; SSE2-NEXT: movdqa %xmm1, %xmm5 170; SSE2-NEXT: paddd %xmm1, %xmm5 171; SSE2-NEXT: movss {{.*#+}} xmm5 = xmm3[0],xmm5[1,2,3] 172; SSE2-NEXT: movdqa %xmm0, c+144(%rip) 173; SSE2-NEXT: movaps %xmm5, c+128(%rip) 174; SSE2-NEXT: movdqa c+160(%rip), %xmm0 175; SSE2-NEXT: movdqa c+176(%rip), %xmm3 176; SSE2-NEXT: movdqa d+160(%rip), %xmm5 177; SSE2-NEXT: movdqa d+176(%rip), %xmm6 178; SSE2-NEXT: movdqa d+128(%rip), %xmm7 179; SSE2-NEXT: movss {{.*#+}} xmm1 = xmm2[0],xmm1[1,2,3] 180; SSE2-NEXT: psubd %xmm1, %xmm7 181; SSE2-NEXT: psubd %xmm3, %xmm6 182; SSE2-NEXT: psubd %xmm0, %xmm5 183; SSE2-NEXT: movdqa %xmm5, d+160(%rip) 184; SSE2-NEXT: movdqa %xmm6, d+176(%rip) 185; SSE2-NEXT: movdqa %xmm4, d+144(%rip) 186; SSE2-NEXT: movdqa %xmm7, d+128(%rip) 187; SSE2-NEXT: paddd %xmm3, %xmm3 188; SSE2-NEXT: paddd %xmm0, %xmm0 189; SSE2-NEXT: movdqa %xmm0, c+160(%rip) 190; SSE2-NEXT: movdqa %xmm3, c+176(%rip) 191; SSE2-NEXT: retq 192; 193; SSE42-LABEL: PR42833: 194; SSE42: # %bb.0: 195; SSE42-NEXT: movl b(%rip), %eax 196; SSE42-NEXT: movdqa c+144(%rip), %xmm0 197; SSE42-NEXT: movdqa c+128(%rip), %xmm1 198; SSE42-NEXT: addl c+128(%rip), %eax 199; SSE42-NEXT: movd %eax, %xmm2 200; SSE42-NEXT: paddd %xmm1, %xmm2 201; SSE42-NEXT: movdqa d+144(%rip), %xmm3 202; SSE42-NEXT: psubd %xmm0, %xmm3 203; SSE42-NEXT: paddd %xmm0, %xmm0 204; SSE42-NEXT: movdqa %xmm1, %xmm4 205; SSE42-NEXT: paddd %xmm1, %xmm4 206; SSE42-NEXT: pblendw {{.*#+}} xmm4 = xmm2[0,1],xmm4[2,3,4,5,6,7] 207; SSE42-NEXT: movdqa %xmm0, c+144(%rip) 208; SSE42-NEXT: movdqa %xmm4, c+128(%rip) 209; SSE42-NEXT: movdqa c+160(%rip), %xmm0 210; SSE42-NEXT: movdqa c+176(%rip), %xmm2 211; SSE42-NEXT: movdqa d+160(%rip), %xmm4 212; SSE42-NEXT: movdqa d+176(%rip), %xmm5 213; SSE42-NEXT: movdqa d+128(%rip), %xmm6 214; SSE42-NEXT: pinsrd $0, %eax, %xmm1 215; SSE42-NEXT: psubd %xmm1, %xmm6 216; SSE42-NEXT: psubd %xmm2, %xmm5 217; SSE42-NEXT: psubd %xmm0, %xmm4 218; SSE42-NEXT: movdqa %xmm4, d+160(%rip) 219; SSE42-NEXT: movdqa %xmm5, d+176(%rip) 220; SSE42-NEXT: movdqa %xmm3, d+144(%rip) 221; SSE42-NEXT: movdqa %xmm6, d+128(%rip) 222; SSE42-NEXT: paddd %xmm2, %xmm2 223; SSE42-NEXT: paddd %xmm0, %xmm0 224; SSE42-NEXT: movdqa %xmm0, c+160(%rip) 225; SSE42-NEXT: movdqa %xmm2, c+176(%rip) 226; SSE42-NEXT: retq 227; 228; AVX1-LABEL: PR42833: 229; AVX1: # %bb.0: 230; AVX1-NEXT: movl b(%rip), %eax 231; AVX1-NEXT: addl c+128(%rip), %eax 232; AVX1-NEXT: vmovd %eax, %xmm0 233; AVX1-NEXT: vmovdqa c+128(%rip), %xmm1 234; AVX1-NEXT: vpaddd %xmm0, %xmm1, %xmm0 235; AVX1-NEXT: vpaddd %xmm1, %xmm1, %xmm2 236; AVX1-NEXT: vmovdqa c+144(%rip), %xmm3 237; AVX1-NEXT: vpaddd %xmm3, %xmm3, %xmm3 238; AVX1-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 239; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7] 240; AVX1-NEXT: vmovdqa d+144(%rip), %xmm2 241; AVX1-NEXT: vpsubd c+144(%rip), %xmm2, %xmm2 242; AVX1-NEXT: vmovups %ymm0, c+128(%rip) 243; AVX1-NEXT: vpinsrd $0, %eax, %xmm1, %xmm0 244; AVX1-NEXT: vmovdqa d+128(%rip), %xmm1 245; AVX1-NEXT: vpsubd %xmm0, %xmm1, %xmm0 246; AVX1-NEXT: vmovdqa d+176(%rip), %xmm1 247; AVX1-NEXT: vmovdqa c+176(%rip), %xmm3 248; AVX1-NEXT: vpsubd %xmm3, %xmm1, %xmm1 249; AVX1-NEXT: vmovdqa d+160(%rip), %xmm4 250; AVX1-NEXT: vmovdqa c+160(%rip), %xmm5 251; AVX1-NEXT: vpsubd %xmm5, %xmm4, %xmm4 252; AVX1-NEXT: vmovdqa %xmm2, d+144(%rip) 253; AVX1-NEXT: vmovdqa %xmm4, d+160(%rip) 254; AVX1-NEXT: vmovdqa %xmm1, d+176(%rip) 255; AVX1-NEXT: vmovdqa %xmm0, d+128(%rip) 256; AVX1-NEXT: vpaddd %xmm3, %xmm3, %xmm0 257; AVX1-NEXT: vpaddd %xmm5, %xmm5, %xmm1 258; AVX1-NEXT: vmovdqa %xmm1, c+160(%rip) 259; AVX1-NEXT: vmovdqa %xmm0, c+176(%rip) 260; AVX1-NEXT: vzeroupper 261; AVX1-NEXT: retq 262; 263; AVX2-LABEL: PR42833: 264; AVX2: # %bb.0: 265; AVX2-NEXT: movl b(%rip), %eax 266; AVX2-NEXT: vmovdqu c+128(%rip), %ymm0 267; AVX2-NEXT: addl c+128(%rip), %eax 268; AVX2-NEXT: vmovd %eax, %xmm1 269; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm2 270; AVX2-NEXT: vpaddd %ymm0, %ymm0, %ymm3 271; AVX2-NEXT: vpblendd {{.*#+}} ymm2 = ymm2[0],ymm3[1,2,3,4,5,6,7] 272; AVX2-NEXT: vmovdqu %ymm2, c+128(%rip) 273; AVX2-NEXT: vmovdqu c+160(%rip), %ymm2 274; AVX2-NEXT: vmovdqu d+160(%rip), %ymm3 275; AVX2-NEXT: vmovdqu d+128(%rip), %ymm4 276; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7] 277; AVX2-NEXT: vpsubd %ymm0, %ymm4, %ymm0 278; AVX2-NEXT: vpsubd %ymm2, %ymm3, %ymm1 279; AVX2-NEXT: vmovdqu %ymm1, d+160(%rip) 280; AVX2-NEXT: vmovdqu %ymm0, d+128(%rip) 281; AVX2-NEXT: vpaddd %ymm2, %ymm2, %ymm0 282; AVX2-NEXT: vmovdqu %ymm0, c+160(%rip) 283; AVX2-NEXT: vzeroupper 284; AVX2-NEXT: retq 285; 286; AVX512-LABEL: PR42833: 287; AVX512: # %bb.0: 288; AVX512-NEXT: movl b(%rip), %eax 289; AVX512-NEXT: vmovdqu c+128(%rip), %ymm0 290; AVX512-NEXT: vmovdqu64 c+128(%rip), %zmm1 291; AVX512-NEXT: addl c+128(%rip), %eax 292; AVX512-NEXT: vmovd %eax, %xmm2 293; AVX512-NEXT: vpaddd %ymm2, %ymm0, %ymm2 294; AVX512-NEXT: vpaddd %ymm0, %ymm0, %ymm0 295; AVX512-NEXT: vpblendd {{.*#+}} ymm0 = ymm2[0],ymm0[1,2,3,4,5,6,7] 296; AVX512-NEXT: vmovdqa c+128(%rip), %xmm2 297; AVX512-NEXT: vmovdqu %ymm0, c+128(%rip) 298; AVX512-NEXT: vmovdqu c+160(%rip), %ymm0 299; AVX512-NEXT: vmovdqu64 d+128(%rip), %zmm3 300; AVX512-NEXT: vpinsrd $0, %eax, %xmm2, %xmm2 301; AVX512-NEXT: vinserti32x4 $0, %xmm2, %zmm1, %zmm1 302; AVX512-NEXT: vinserti64x4 $1, %ymm0, %zmm1, %zmm1 303; AVX512-NEXT: vpsubd %zmm1, %zmm3, %zmm1 304; AVX512-NEXT: vmovdqu64 %zmm1, d+128(%rip) 305; AVX512-NEXT: vpaddd %ymm0, %ymm0, %ymm0 306; AVX512-NEXT: vmovdqu %ymm0, c+160(%rip) 307; AVX512-NEXT: vzeroupper 308; AVX512-NEXT: retq 309; 310; XOP-LABEL: PR42833: 311; XOP: # %bb.0: 312; XOP-NEXT: movl b(%rip), %eax 313; XOP-NEXT: addl c+128(%rip), %eax 314; XOP-NEXT: vmovd %eax, %xmm0 315; XOP-NEXT: vmovdqa c+128(%rip), %xmm1 316; XOP-NEXT: vpaddd %xmm0, %xmm1, %xmm0 317; XOP-NEXT: vpaddd %xmm1, %xmm1, %xmm2 318; XOP-NEXT: vmovdqa c+144(%rip), %xmm3 319; XOP-NEXT: vpaddd %xmm3, %xmm3, %xmm3 320; XOP-NEXT: vinsertf128 $1, %xmm3, %ymm2, %ymm2 321; XOP-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm2[1,2,3,4,5,6,7] 322; XOP-NEXT: vmovdqa d+144(%rip), %xmm2 323; XOP-NEXT: vpsubd c+144(%rip), %xmm2, %xmm2 324; XOP-NEXT: vmovups %ymm0, c+128(%rip) 325; XOP-NEXT: vpinsrd $0, %eax, %xmm1, %xmm0 326; XOP-NEXT: vmovdqa d+128(%rip), %xmm1 327; XOP-NEXT: vpsubd %xmm0, %xmm1, %xmm0 328; XOP-NEXT: vmovdqa d+176(%rip), %xmm1 329; XOP-NEXT: vmovdqa c+176(%rip), %xmm3 330; XOP-NEXT: vpsubd %xmm3, %xmm1, %xmm1 331; XOP-NEXT: vmovdqa d+160(%rip), %xmm4 332; XOP-NEXT: vmovdqa c+160(%rip), %xmm5 333; XOP-NEXT: vpsubd %xmm5, %xmm4, %xmm4 334; XOP-NEXT: vmovdqa %xmm2, d+144(%rip) 335; XOP-NEXT: vmovdqa %xmm4, d+160(%rip) 336; XOP-NEXT: vmovdqa %xmm1, d+176(%rip) 337; XOP-NEXT: vmovdqa %xmm0, d+128(%rip) 338; XOP-NEXT: vpaddd %xmm3, %xmm3, %xmm0 339; XOP-NEXT: vpaddd %xmm5, %xmm5, %xmm1 340; XOP-NEXT: vmovdqa %xmm1, c+160(%rip) 341; XOP-NEXT: vmovdqa %xmm0, c+176(%rip) 342; XOP-NEXT: vzeroupper 343; XOP-NEXT: retq 344 %1 = load i32, ptr @b, align 4 345 %2 = load <8 x i32>, ptr getelementptr inbounds ([49 x i32], ptr @c, i64 0, i64 32), align 16 346 %3 = shufflevector <8 x i32> %2, <8 x i32> undef, <16 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 347 %4 = extractelement <8 x i32> %2, i32 0 348 %5 = add i32 %1, %4 349 %6 = insertelement <8 x i32> <i32 undef, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1>, i32 %5, i32 0 350 %7 = add <8 x i32> %2, %6 351 %8 = shl <8 x i32> %2, %6 352 %9 = shufflevector <8 x i32> %7, <8 x i32> %8, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 353 store <8 x i32> %9, ptr getelementptr inbounds ([49 x i32], ptr @c, i64 0, i64 32), align 16 354 %10 = load <8 x i32>, ptr getelementptr inbounds ([49 x i32], ptr @c, i64 0, i64 40), align 16 355 %11 = shufflevector <8 x i32> %10, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 356 %12 = load <16 x i32>, ptr getelementptr inbounds ([49 x i32], ptr @d, i64 0, i64 32), align 16 357 %13 = insertelement <16 x i32> %3, i32 %5, i32 0 358 %14 = shufflevector <16 x i32> %13, <16 x i32> %11, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23> 359 %15 = sub <16 x i32> %12, %14 360 store <16 x i32> %15, ptr getelementptr inbounds ([49 x i32], ptr @d, i64 0, i64 32), align 16 361 %16 = shl <8 x i32> %10, <i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1, i32 1> 362 store <8 x i32> %16, ptr getelementptr inbounds ([49 x i32], ptr @c, i64 0, i64 40), align 16 363 ret void 364} 365