1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=arm-eabi -mattr=+neon %s -o - | FileCheck %s 3 4define <8 x i8> @vzipi8(<8 x i8>* %A, <8 x i8>* %B) nounwind { 5; CHECK-LABEL: vzipi8: 6; CHECK: @ BB#0: 7; CHECK-NEXT: vldr d16, [r1] 8; CHECK-NEXT: vldr d17, [r0] 9; CHECK-NEXT: vzip.8 d17, d16 10; CHECK-NEXT: vadd.i8 d16, d17, d16 11; CHECK-NEXT: vmov r0, r1, d16 12; CHECK-NEXT: mov pc, lr 13 %tmp1 = load <8 x i8>, <8 x i8>* %A 14 %tmp2 = load <8 x i8>, <8 x i8>* %B 15 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 16 %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 17 %tmp5 = add <8 x i8> %tmp3, %tmp4 18 ret <8 x i8> %tmp5 19} 20 21define <16 x i8> @vzipi8_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { 22; CHECK-LABEL: vzipi8_Qres: 23; CHECK: @ BB#0: 24; CHECK-NEXT: vldr d17, [r1] 25; CHECK-NEXT: vldr d16, [r0] 26; CHECK-NEXT: vzip.8 d16, d17 27; CHECK-NEXT: vmov r0, r1, d16 28; CHECK-NEXT: vmov r2, r3, d17 29; CHECK-NEXT: mov pc, lr 30 %tmp1 = load <8 x i8>, <8 x i8>* %A 31 %tmp2 = load <8 x i8>, <8 x i8>* %B 32 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 33 ret <16 x i8> %tmp3 34} 35 36define <4 x i16> @vzipi16(<4 x i16>* %A, <4 x i16>* %B) nounwind { 37; CHECK-LABEL: vzipi16: 38; CHECK: @ BB#0: 39; CHECK-NEXT: vldr d16, [r1] 40; CHECK-NEXT: vldr d17, [r0] 41; CHECK-NEXT: vzip.16 d17, d16 42; CHECK-NEXT: vadd.i16 d16, d17, d16 43; CHECK-NEXT: vmov r0, r1, d16 44; CHECK-NEXT: mov pc, lr 45 %tmp1 = load <4 x i16>, <4 x i16>* %A 46 %tmp2 = load <4 x i16>, <4 x i16>* %B 47 %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 48 %tmp4 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 49 %tmp5 = add <4 x i16> %tmp3, %tmp4 50 ret <4 x i16> %tmp5 51} 52 53define <8 x i16> @vzipi16_Qres(<4 x i16>* %A, <4 x i16>* %B) nounwind { 54; CHECK-LABEL: vzipi16_Qres: 55; CHECK: @ BB#0: 56; CHECK-NEXT: vldr d17, [r1] 57; CHECK-NEXT: vldr d16, [r0] 58; CHECK-NEXT: vzip.16 d16, d17 59; CHECK-NEXT: vmov r0, r1, d16 60; CHECK-NEXT: vmov r2, r3, d17 61; CHECK-NEXT: mov pc, lr 62 %tmp1 = load <4 x i16>, <4 x i16>* %A 63 %tmp2 = load <4 x i16>, <4 x i16>* %B 64 %tmp3 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 65 ret <8 x i16> %tmp3 66} 67 68; VZIP.32 is equivalent to VTRN.32 for 64-bit vectors. 69 70define <16 x i8> @vzipQi8(<16 x i8>* %A, <16 x i8>* %B) nounwind { 71; CHECK-LABEL: vzipQi8: 72; CHECK: @ BB#0: 73; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 74; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 75; CHECK-NEXT: vzip.8 q9, q8 76; CHECK-NEXT: vadd.i8 q8, q9, q8 77; CHECK-NEXT: vmov r0, r1, d16 78; CHECK-NEXT: vmov r2, r3, d17 79; CHECK-NEXT: mov pc, lr 80 %tmp1 = load <16 x i8>, <16 x i8>* %A 81 %tmp2 = load <16 x i8>, <16 x i8>* %B 82 %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 83 %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 84 %tmp5 = add <16 x i8> %tmp3, %tmp4 85 ret <16 x i8> %tmp5 86} 87 88define <32 x i8> @vzipQi8_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind { 89; CHECK-LABEL: vzipQi8_QQres: 90; CHECK: @ BB#0: 91; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 92; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 93; CHECK-NEXT: vzip.8 q9, q8 94; CHECK-NEXT: vst1.8 {d18, d19}, [r0:128]! 95; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] 96; CHECK-NEXT: mov pc, lr 97 %tmp1 = load <16 x i8>, <16 x i8>* %A 98 %tmp2 = load <16 x i8>, <16 x i8>* %B 99 %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 16, i32 1, i32 17, i32 2, i32 18, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 25, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 29, i32 14, i32 30, i32 15, i32 31> 100 ret <32 x i8> %tmp3 101} 102 103define <8 x i16> @vzipQi16(<8 x i16>* %A, <8 x i16>* %B) nounwind { 104; CHECK-LABEL: vzipQi16: 105; CHECK: @ BB#0: 106; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 107; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 108; CHECK-NEXT: vzip.16 q9, q8 109; CHECK-NEXT: vadd.i16 q8, q9, q8 110; CHECK-NEXT: vmov r0, r1, d16 111; CHECK-NEXT: vmov r2, r3, d17 112; CHECK-NEXT: mov pc, lr 113 %tmp1 = load <8 x i16>, <8 x i16>* %A 114 %tmp2 = load <8 x i16>, <8 x i16>* %B 115 %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 116 %tmp4 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 117 %tmp5 = add <8 x i16> %tmp3, %tmp4 118 ret <8 x i16> %tmp5 119} 120 121define <16 x i16> @vzipQi16_QQres(<8 x i16>* %A, <8 x i16>* %B) nounwind { 122; CHECK-LABEL: vzipQi16_QQres: 123; CHECK: @ BB#0: 124; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 125; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 126; CHECK-NEXT: vzip.16 q9, q8 127; CHECK-NEXT: vst1.16 {d18, d19}, [r0:128]! 128; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] 129; CHECK-NEXT: mov pc, lr 130 %tmp1 = load <8 x i16>, <8 x i16>* %A 131 %tmp2 = load <8 x i16>, <8 x i16>* %B 132 %tmp3 = shufflevector <8 x i16> %tmp1, <8 x i16> %tmp2, <16 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 14, i32 7, i32 15> 133 ret <16 x i16> %tmp3 134} 135 136define <4 x i32> @vzipQi32(<4 x i32>* %A, <4 x i32>* %B) nounwind { 137; CHECK-LABEL: vzipQi32: 138; CHECK: @ BB#0: 139; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 140; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 141; CHECK-NEXT: vzip.32 q9, q8 142; CHECK-NEXT: vadd.i32 q8, q9, q8 143; CHECK-NEXT: vmov r0, r1, d16 144; CHECK-NEXT: vmov r2, r3, d17 145; CHECK-NEXT: mov pc, lr 146 %tmp1 = load <4 x i32>, <4 x i32>* %A 147 %tmp2 = load <4 x i32>, <4 x i32>* %B 148 %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 149 %tmp4 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 150 %tmp5 = add <4 x i32> %tmp3, %tmp4 151 ret <4 x i32> %tmp5 152} 153 154define <8 x i32> @vzipQi32_QQres(<4 x i32>* %A, <4 x i32>* %B) nounwind { 155; CHECK-LABEL: vzipQi32_QQres: 156; CHECK: @ BB#0: 157; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 158; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 159; CHECK-NEXT: vzip.32 q9, q8 160; CHECK-NEXT: vst1.32 {d18, d19}, [r0:128]! 161; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] 162; CHECK-NEXT: mov pc, lr 163 %tmp1 = load <4 x i32>, <4 x i32>* %A 164 %tmp2 = load <4 x i32>, <4 x i32>* %B 165 %tmp3 = shufflevector <4 x i32> %tmp1, <4 x i32> %tmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 166 ret <8 x i32> %tmp3 167} 168 169define <4 x float> @vzipQf(<4 x float>* %A, <4 x float>* %B) nounwind { 170; CHECK-LABEL: vzipQf: 171; CHECK: @ BB#0: 172; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 173; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 174; CHECK-NEXT: vzip.32 q9, q8 175; CHECK-NEXT: vadd.f32 q8, q9, q8 176; CHECK-NEXT: vmov r0, r1, d16 177; CHECK-NEXT: vmov r2, r3, d17 178; CHECK-NEXT: mov pc, lr 179 %tmp1 = load <4 x float>, <4 x float>* %A 180 %tmp2 = load <4 x float>, <4 x float>* %B 181 %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 0, i32 4, i32 1, i32 5> 182 %tmp4 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <4 x i32> <i32 2, i32 6, i32 3, i32 7> 183 %tmp5 = fadd <4 x float> %tmp3, %tmp4 184 ret <4 x float> %tmp5 185} 186 187define <8 x float> @vzipQf_QQres(<4 x float>* %A, <4 x float>* %B) nounwind { 188; CHECK-LABEL: vzipQf_QQres: 189; CHECK: @ BB#0: 190; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 191; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 192; CHECK-NEXT: vzip.32 q9, q8 193; CHECK-NEXT: vst1.32 {d18, d19}, [r0:128]! 194; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] 195; CHECK-NEXT: mov pc, lr 196 %tmp1 = load <4 x float>, <4 x float>* %A 197 %tmp2 = load <4 x float>, <4 x float>* %B 198 %tmp3 = shufflevector <4 x float> %tmp1, <4 x float> %tmp2, <8 x i32> <i32 0, i32 4, i32 1, i32 5, i32 2, i32 6, i32 3, i32 7> 199 ret <8 x float> %tmp3 200} 201 202; Undef shuffle indices should not prevent matching to VZIP: 203 204define <8 x i8> @vzipi8_undef(<8 x i8>* %A, <8 x i8>* %B) nounwind { 205; CHECK-LABEL: vzipi8_undef: 206; CHECK: @ BB#0: 207; CHECK-NEXT: vldr d16, [r1] 208; CHECK-NEXT: vldr d17, [r0] 209; CHECK-NEXT: vzip.8 d17, d16 210; CHECK-NEXT: vadd.i8 d16, d17, d16 211; CHECK-NEXT: vmov r0, r1, d16 212; CHECK-NEXT: mov pc, lr 213 %tmp1 = load <8 x i8>, <8 x i8>* %A 214 %tmp2 = load <8 x i8>, <8 x i8>* %B 215 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11> 216 %tmp4 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <8 x i32> <i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15> 217 %tmp5 = add <8 x i8> %tmp3, %tmp4 218 ret <8 x i8> %tmp5 219} 220 221define <16 x i8> @vzipi8_undef_Qres(<8 x i8>* %A, <8 x i8>* %B) nounwind { 222; CHECK-LABEL: vzipi8_undef_Qres: 223; CHECK: @ BB#0: 224; CHECK-NEXT: vldr d17, [r1] 225; CHECK-NEXT: vldr d16, [r0] 226; CHECK-NEXT: vzip.8 d16, d17 227; CHECK-NEXT: vmov r0, r1, d16 228; CHECK-NEXT: vmov r2, r3, d17 229; CHECK-NEXT: mov pc, lr 230 %tmp1 = load <8 x i8>, <8 x i8>* %A 231 %tmp2 = load <8 x i8>, <8 x i8>* %B 232 %tmp3 = shufflevector <8 x i8> %tmp1, <8 x i8> %tmp2, <16 x i32> <i32 0, i32 undef, i32 1, i32 9, i32 undef, i32 10, i32 3, i32 11, i32 4, i32 12, i32 5, i32 13, i32 6, i32 undef, i32 undef, i32 15> 233 ret <16 x i8> %tmp3 234} 235 236define <16 x i8> @vzipQi8_undef(<16 x i8>* %A, <16 x i8>* %B) nounwind { 237; CHECK-LABEL: vzipQi8_undef: 238; CHECK: @ BB#0: 239; CHECK-NEXT: vld1.64 {d16, d17}, [r1] 240; CHECK-NEXT: vld1.64 {d18, d19}, [r0] 241; CHECK-NEXT: vzip.8 q9, q8 242; CHECK-NEXT: vadd.i8 q8, q9, q8 243; CHECK-NEXT: vmov r0, r1, d16 244; CHECK-NEXT: vmov r2, r3, d17 245; CHECK-NEXT: mov pc, lr 246 %tmp1 = load <16 x i8>, <16 x i8>* %A 247 %tmp2 = load <16 x i8>, <16 x i8>* %B 248 %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23> 249 %tmp4 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <16 x i32> <i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31> 250 %tmp5 = add <16 x i8> %tmp3, %tmp4 251 ret <16 x i8> %tmp5 252} 253 254define <32 x i8> @vzipQi8_undef_QQres(<16 x i8>* %A, <16 x i8>* %B) nounwind { 255; CHECK-LABEL: vzipQi8_undef_QQres: 256; CHECK: @ BB#0: 257; CHECK-NEXT: vld1.64 {d16, d17}, [r2] 258; CHECK-NEXT: vld1.64 {d18, d19}, [r1] 259; CHECK-NEXT: vzip.8 q9, q8 260; CHECK-NEXT: vst1.8 {d18, d19}, [r0:128]! 261; CHECK-NEXT: vst1.64 {d16, d17}, [r0:128] 262; CHECK-NEXT: mov pc, lr 263 %tmp1 = load <16 x i8>, <16 x i8>* %A 264 %tmp2 = load <16 x i8>, <16 x i8>* %B 265 %tmp3 = shufflevector <16 x i8> %tmp1, <16 x i8> %tmp2, <32 x i32> <i32 0, i32 16, i32 1, i32 undef, i32 undef, i32 undef, i32 3, i32 19, i32 4, i32 20, i32 5, i32 21, i32 6, i32 22, i32 7, i32 23, i32 8, i32 24, i32 9, i32 undef, i32 10, i32 26, i32 11, i32 27, i32 12, i32 28, i32 13, i32 undef, i32 14, i32 30, i32 undef, i32 31> 266 ret <32 x i8> %tmp3 267} 268 269define <8 x i16> @vzip_lower_shufflemask_undef(<4 x i16>* %A, <4 x i16>* %B) { 270; CHECK-LABEL: vzip_lower_shufflemask_undef: 271; CHECK: @ BB#0: @ %entry 272; CHECK-NEXT: vldr d17, [r1] 273; CHECK-NEXT: vldr d16, [r0] 274; CHECK-NEXT: vzip.16 d16, d17 275; CHECK-NEXT: vmov r0, r1, d16 276; CHECK-NEXT: vmov r2, r3, d17 277; CHECK-NEXT: mov pc, lr 278entry: 279 %tmp1 = load <4 x i16>, <4 x i16>* %A 280 %tmp2 = load <4 x i16>, <4 x i16>* %B 281 %0 = shufflevector <4 x i16> %tmp1, <4 x i16> %tmp2, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 2, i32 6, i32 3, i32 7> 282 ret <8 x i16> %0 283} 284 285define <4 x i32> @vzip_lower_shufflemask_zeroed(<2 x i32>* %A) { 286; CHECK-LABEL: vzip_lower_shufflemask_zeroed: 287; CHECK: @ BB#0: @ %entry 288; CHECK-NEXT: vldr d16, [r0] 289; CHECK-NEXT: vdup.32 q9, d16[0] 290; CHECK-NEXT: vzip.32 q8, q9 291; CHECK-NEXT: vmov r0, r1, d16 292; CHECK-NEXT: vmov r2, r3, d17 293; CHECK-NEXT: mov pc, lr 294entry: 295 %tmp1 = load <2 x i32>, <2 x i32>* %A 296 %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp1, <4 x i32> <i32 0, i32 0, i32 1, i32 0> 297 ret <4 x i32> %0 298} 299 300define <4 x i32> @vzip_lower_shufflemask_vuzp(<2 x i32>* %A) { 301; CHECK-LABEL: vzip_lower_shufflemask_vuzp: 302; CHECK: @ BB#0: @ %entry 303; CHECK-NEXT: vldr d16, [r0] 304; CHECK-NEXT: vdup.32 q9, d16[0] 305; CHECK-NEXT: vzip.32 q8, q9 306; CHECK-NEXT: vmov r0, r1, d16 307; CHECK-NEXT: vmov r2, r3, d17 308; CHECK-NEXT: mov pc, lr 309entry: 310 %tmp1 = load <2 x i32>, <2 x i32>* %A 311 %0 = shufflevector <2 x i32> %tmp1, <2 x i32> %tmp1, <4 x i32> <i32 0, i32 2, i32 1, i32 0> 312 ret <4 x i32> %0 313} 314 315define void @vzip_undef_rev_shufflemask_vtrn(<2 x i32>* %A, <4 x i32>* %B) { 316; CHECK-LABEL: vzip_undef_rev_shufflemask_vtrn: 317; CHECK: @ BB#0: @ %entry 318; CHECK-NEXT: vldr d16, [r0] 319; CHECK-NEXT: vorr q9, q8, q8 320; CHECK-NEXT: vzip.32 q8, q9 321; CHECK-NEXT: vext.32 q8, q8, q8, #2 322; CHECK-NEXT: vst1.64 {d16, d17}, [r1] 323; CHECK-NEXT: mov pc, lr 324entry: 325 %tmp1 = load <2 x i32>, <2 x i32>* %A 326 %0 = shufflevector <2 x i32> %tmp1, <2 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 0> 327 store <4 x i32> %0, <4 x i32>* %B 328 ret void 329} 330 331define void @vzip_vext_factor(<8 x i16>* %A, <4 x i16>* %B) { 332; CHECK-LABEL: vzip_vext_factor: 333; CHECK: @ BB#0: @ %entry 334; CHECK-NEXT: vld1.64 {d16, d17}, [r0] 335; CHECK-NEXT: vext.16 d18, d16, d17, #1 336; CHECK-NEXT: vext.16 d16, d18, d17, #2 337; CHECK-NEXT: vext.16 d16, d16, d16, #1 338; CHECK-NEXT: vstr d16, [r1] 339; CHECK-NEXT: mov pc, lr 340entry: 341 %tmp1 = load <8 x i16>, <8 x i16>* %A 342 %0 = shufflevector <8 x i16> %tmp1, <8 x i16> undef, <4 x i32> <i32 4, i32 4, i32 5, i32 3> 343 store <4 x i16> %0, <4 x i16>* %B 344 ret void 345} 346 347define <8 x i8> @vdup_zip(i8* nocapture readonly %x, i8* nocapture readonly %y) { 348; CHECK-LABEL: vdup_zip: 349; CHECK: @ BB#0: @ %entry 350; CHECK-NEXT: vld1.8 {d16[]}, [r1] 351; CHECK-NEXT: vld1.8 {d17[]}, [r0] 352; CHECK-NEXT: vzip.8 d17, d16 353; CHECK-NEXT: vmov r0, r1, d17 354; CHECK-NEXT: mov pc, lr 355entry: 356 %0 = load i8, i8* %x, align 1 357 %1 = insertelement <8 x i8> undef, i8 %0, i32 0 358 %lane = shufflevector <8 x i8> %1, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef> 359 %2 = load i8, i8* %y, align 1 360 %3 = insertelement <8 x i8> undef, i8 %2, i32 0 361 %lane3 = shufflevector <8 x i8> %3, <8 x i8> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef> 362 %vzip.i = shufflevector <8 x i8> %lane, <8 x i8> %lane3, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11> 363 ret <8 x i8> %vzip.i 364} 365