1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=aarch64--linux-gnu | FileCheck %s 3 4 5; i8 6 7define <16 x i8> @insert_v16i8_2_1(float %tmp, <16 x i8> %b, <16 x i8> %a) { 8; CHECK-LABEL: insert_v16i8_2_1: 9; CHECK: // %bb.0: 10; CHECK-NEXT: mov v0.16b, v1.16b 11; CHECK-NEXT: mov v0.h[0], v2.h[0] 12; CHECK-NEXT: ret 13 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 14 ret <16 x i8> %s2 15} 16 17define <16 x i8> @insert_v16i8_2_2(float %tmp, <16 x i8> %b, <16 x i8> %a) { 18; CHECK-LABEL: insert_v16i8_2_2: 19; CHECK: // %bb.0: 20; CHECK-NEXT: mov v0.16b, v1.16b 21; CHECK-NEXT: mov v0.h[1], v2.h[0] 22; CHECK-NEXT: ret 23 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 0, i32 1, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 24 ret <16 x i8> %s2 25} 26 27define <16 x i8> @insert_v16i8_2_6(float %tmp, <16 x i8> %b, <16 x i8> %a) { 28; CHECK-LABEL: insert_v16i8_2_6: 29; CHECK: // %bb.0: 30; CHECK-NEXT: mov v0.16b, v1.16b 31; CHECK-NEXT: mov v0.h[6], v2.h[0] 32; CHECK-NEXT: ret 33 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 0, i32 1, i32 30, i32 31> 34 ret <16 x i8> %s2 35} 36 37define <16 x i8> @insert_v16i8_4_1(float %tmp, <16 x i8> %b, <16 x i8> %a) { 38; CHECK-LABEL: insert_v16i8_4_1: 39; CHECK: // %bb.0: 40; CHECK-NEXT: mov v0.16b, v1.16b 41; CHECK-NEXT: mov v0.s[0], v2.s[0] 42; CHECK-NEXT: ret 43 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 44 ret <16 x i8> %s2 45} 46 47define <16 x i8> @insert_v16i8_4_15(float %tmp, <16 x i8> %b, <16 x i8> %a) { 48; CHECK-LABEL: insert_v16i8_4_15: 49; CHECK: // %bb.0: 50; CHECK-NEXT: adrp x8, .LCPI4_0 51; CHECK-NEXT: // kill: def $q2 killed $q2 def $q2_q3 52; CHECK-NEXT: mov v3.16b, v1.16b 53; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI4_0] 54; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b 55; CHECK-NEXT: ret 56 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 0, i32 1, i32 2, i32 3, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 57 ret <16 x i8> %s2 58} 59 60define <16 x i8> @insert_v16i8_4_2(float %tmp, <16 x i8> %b, <16 x i8> %a) { 61; CHECK-LABEL: insert_v16i8_4_2: 62; CHECK: // %bb.0: 63; CHECK-NEXT: mov v0.16b, v1.16b 64; CHECK-NEXT: mov v0.s[1], v2.s[0] 65; CHECK-NEXT: ret 66 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 67 ret <16 x i8> %s2 68} 69 70define <16 x i8> @insert_v16i8_4_3(float %tmp, <16 x i8> %b, <16 x i8> %a) { 71; CHECK-LABEL: insert_v16i8_4_3: 72; CHECK: // %bb.0: 73; CHECK-NEXT: mov v0.16b, v1.16b 74; CHECK-NEXT: mov v0.s[2], v2.s[0] 75; CHECK-NEXT: ret 76 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 28, i32 29, i32 30, i32 31> 77 ret <16 x i8> %s2 78} 79 80define <16 x i8> @insert_v16i8_4_4(float %tmp, <16 x i8> %b, <16 x i8> %a) { 81; CHECK-LABEL: insert_v16i8_4_4: 82; CHECK: // %bb.0: 83; CHECK-NEXT: mov v0.16b, v1.16b 84; CHECK-NEXT: mov v0.s[3], v2.s[0] 85; CHECK-NEXT: ret 86 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 0, i32 1, i32 2, i32 3> 87 ret <16 x i8> %s2 88} 89 90define <8 x i8> @insert_v8i8_4_1(float %tmp, <8 x i8> %b, <8 x i8> %a) { 91; CHECK-LABEL: insert_v8i8_4_1: 92; CHECK: // %bb.0: 93; CHECK-NEXT: fmov d0, d2 94; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 95; CHECK-NEXT: mov v0.s[1], v1.s[1] 96; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 97; CHECK-NEXT: ret 98 %s2 = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15> 99 ret <8 x i8> %s2 100} 101 102define <8 x i8> @insert_v8i8_4_2(float %tmp, <8 x i8> %b, <8 x i8> %a) { 103; CHECK-LABEL: insert_v8i8_4_2: 104; CHECK: // %bb.0: 105; CHECK-NEXT: fmov d0, d1 106; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 107; CHECK-NEXT: mov v0.s[1], v2.s[0] 108; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 109; CHECK-NEXT: ret 110 %s2 = shufflevector <8 x i8> %a, <8 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3> 111 ret <8 x i8> %s2 112} 113 114define <16 x i8> @insert_v16i8_8_1(float %tmp, <16 x i8> %b, <16 x i8> %a) { 115; CHECK-LABEL: insert_v16i8_8_1: 116; CHECK: // %bb.0: 117; CHECK-NEXT: mov v0.16b, v2.16b 118; CHECK-NEXT: mov v0.d[1], v1.d[1] 119; CHECK-NEXT: ret 120 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 121 ret <16 x i8> %s2 122} 123 124define <16 x i8> @insert_v16i8_8_2(float %tmp, <16 x i8> %b, <16 x i8> %a) { 125; CHECK-LABEL: insert_v16i8_8_2: 126; CHECK: // %bb.0: 127; CHECK-NEXT: mov v0.16b, v1.16b 128; CHECK-NEXT: mov v0.d[1], v2.d[0] 129; CHECK-NEXT: ret 130 %s2 = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 131 ret <16 x i8> %s2 132} 133 134; i16 135 136define <8 x i16> @insert_v8i16_2_1(float %tmp, <8 x i16> %b, <8 x i16> %a) { 137; CHECK-LABEL: insert_v8i16_2_1: 138; CHECK: // %bb.0: 139; CHECK-NEXT: mov v0.16b, v1.16b 140; CHECK-NEXT: mov v0.s[0], v2.s[0] 141; CHECK-NEXT: ret 142 %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 143 ret <8 x i16> %s2 144} 145 146define <8 x i16> @insert_v8i16_2_15(float %tmp, <8 x i16> %b, <8 x i16> %a) { 147; CHECK-LABEL: insert_v8i16_2_15: 148; CHECK: // %bb.0: 149; CHECK-NEXT: adrp x8, .LCPI13_0 150; CHECK-NEXT: // kill: def $q2 killed $q2 def $q2_q3 151; CHECK-NEXT: mov v3.16b, v1.16b 152; CHECK-NEXT: ldr q0, [x8, :lo12:.LCPI13_0] 153; CHECK-NEXT: tbl v0.16b, { v2.16b, v3.16b }, v0.16b 154; CHECK-NEXT: ret 155 %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 1, i32 11, i32 12, i32 13, i32 14, i32 15> 156 ret <8 x i16> %s2 157} 158 159define <8 x i16> @insert_v8i16_2_2(float %tmp, <8 x i16> %b, <8 x i16> %a) { 160; CHECK-LABEL: insert_v8i16_2_2: 161; CHECK: // %bb.0: 162; CHECK-NEXT: mov v0.16b, v1.16b 163; CHECK-NEXT: mov v0.s[1], v2.s[0] 164; CHECK-NEXT: ret 165 %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 0, i32 1, i32 12, i32 13, i32 14, i32 15> 166 ret <8 x i16> %s2 167} 168 169define <8 x i16> @insert_v8i16_2_3(float %tmp, <8 x i16> %b, <8 x i16> %a) { 170; CHECK-LABEL: insert_v8i16_2_3: 171; CHECK: // %bb.0: 172; CHECK-NEXT: mov v0.16b, v1.16b 173; CHECK-NEXT: mov v0.s[2], v2.s[0] 174; CHECK-NEXT: ret 175 %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 14, i32 15> 176 ret <8 x i16> %s2 177} 178 179define <8 x i16> @insert_v8i16_2_4(float %tmp, <8 x i16> %b, <8 x i16> %a) { 180; CHECK-LABEL: insert_v8i16_2_4: 181; CHECK: // %bb.0: 182; CHECK-NEXT: mov v0.16b, v1.16b 183; CHECK-NEXT: mov v0.s[3], v2.s[0] 184; CHECK-NEXT: ret 185 %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 0, i32 1> 186 ret <8 x i16> %s2 187} 188 189define <4 x i16> @insert_v4i16_2_1(float %tmp, <4 x i16> %b, <4 x i16> %a) { 190; CHECK-LABEL: insert_v4i16_2_1: 191; CHECK: // %bb.0: 192; CHECK-NEXT: fmov d0, d2 193; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 194; CHECK-NEXT: mov v0.s[1], v1.s[1] 195; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 196; CHECK-NEXT: ret 197 %s2 = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 198 ret <4 x i16> %s2 199} 200 201define <4 x i16> @insert_v4i16_2_2(float %tmp, <4 x i16> %b, <4 x i16> %a) { 202; CHECK-LABEL: insert_v4i16_2_2: 203; CHECK: // %bb.0: 204; CHECK-NEXT: fmov d0, d1 205; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2 206; CHECK-NEXT: mov v0.s[1], v2.s[0] 207; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 208; CHECK-NEXT: ret 209 %s2 = shufflevector <4 x i16> %a, <4 x i16> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 210 ret <4 x i16> %s2 211} 212 213define <8 x i16> @insert_v8i16_4_1(float %tmp, <8 x i16> %b, <8 x i16> %a) { 214; CHECK-LABEL: insert_v8i16_4_1: 215; CHECK: // %bb.0: 216; CHECK-NEXT: mov v0.16b, v2.16b 217; CHECK-NEXT: mov v0.d[1], v1.d[1] 218; CHECK-NEXT: ret 219 %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15> 220 ret <8 x i16> %s2 221} 222 223define <8 x i16> @insert_v8i16_4_2(float %tmp, <8 x i16> %b, <8 x i16> %a) { 224; CHECK-LABEL: insert_v8i16_4_2: 225; CHECK: // %bb.0: 226; CHECK-NEXT: mov v0.16b, v1.16b 227; CHECK-NEXT: mov v0.d[1], v2.d[0] 228; CHECK-NEXT: ret 229 %s2 = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3> 230 ret <8 x i16> %s2 231} 232 233; i32 234 235define <4 x i32> @insert_v4i32_2_1(float %tmp, <4 x i32> %b, <4 x i32> %a) { 236; CHECK-LABEL: insert_v4i32_2_1: 237; CHECK: // %bb.0: 238; CHECK-NEXT: mov v0.16b, v2.16b 239; CHECK-NEXT: mov v0.d[1], v1.d[1] 240; CHECK-NEXT: ret 241 %s2 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 242 ret <4 x i32> %s2 243} 244 245define <4 x i32> @insert_v4i32_2_2(float %tmp, <4 x i32> %b, <4 x i32> %a) { 246; CHECK-LABEL: insert_v4i32_2_2: 247; CHECK: // %bb.0: 248; CHECK-NEXT: mov v0.16b, v1.16b 249; CHECK-NEXT: mov v0.d[1], v2.d[0] 250; CHECK-NEXT: ret 251 %s2 = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 252 ret <4 x i32> %s2 253} 254 255 256 257 258; i8 259 260define <16 x i8> @load_v16i8_4_1(float %tmp, <16 x i8> %b, <4 x i8> *%a) { 261; CHECK-LABEL: load_v16i8_4_1: 262; CHECK: // %bb.0: 263; CHECK-NEXT: mov v0.16b, v1.16b 264; CHECK-NEXT: ldr s1, [x0] 265; CHECK-NEXT: mov v0.s[0], v1.s[0] 266; CHECK-NEXT: ret 267 %l = load <4 x i8>, <4 x i8> *%a 268 %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 269 %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 270 ret <16 x i8> %s2 271} 272 273define <16 x i8> @load_v16i8_4_15(float %tmp, <16 x i8> %b, <4 x i8> *%a) { 274; CHECK-LABEL: load_v16i8_4_15: 275; CHECK: // %bb.0: 276; CHECK-NEXT: adrp x8, .LCPI24_0 277; CHECK-NEXT: // kill: def $q1 killed $q1 def $q0_q1 278; CHECK-NEXT: ldr s0, [x0] 279; CHECK-NEXT: ldr q2, [x8, :lo12:.LCPI24_0] 280; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v2.16b 281; CHECK-NEXT: ret 282 %l = load <4 x i8>, <4 x i8> *%a 283 %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 284 %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 0, i32 1, i32 2, i32 3, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 285 ret <16 x i8> %s2 286} 287 288define <16 x i8> @load_v16i8_4_2(float %tmp, <16 x i8> %b, <4 x i8> *%a) { 289; CHECK-LABEL: load_v16i8_4_2: 290; CHECK: // %bb.0: 291; CHECK-NEXT: mov v0.16b, v1.16b 292; CHECK-NEXT: ldr s1, [x0] 293; CHECK-NEXT: mov v0.s[1], v1.s[0] 294; CHECK-NEXT: ret 295 %l = load <4 x i8>, <4 x i8> *%a 296 %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 297 %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 298 ret <16 x i8> %s2 299} 300 301define <16 x i8> @load_v16i8_4_3(float %tmp, <16 x i8> %b, <4 x i8> *%a) { 302; CHECK-LABEL: load_v16i8_4_3: 303; CHECK: // %bb.0: 304; CHECK-NEXT: mov v0.16b, v1.16b 305; CHECK-NEXT: ldr s1, [x0] 306; CHECK-NEXT: mov v0.s[2], v1.s[0] 307; CHECK-NEXT: ret 308 %l = load <4 x i8>, <4 x i8> *%a 309 %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 310 %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 28, i32 29, i32 30, i32 31> 311 ret <16 x i8> %s2 312} 313 314define <16 x i8> @load_v16i8_4_4(float %tmp, <16 x i8> %b, <4 x i8> *%a) { 315; CHECK-LABEL: load_v16i8_4_4: 316; CHECK: // %bb.0: 317; CHECK-NEXT: mov v0.16b, v1.16b 318; CHECK-NEXT: ldr s1, [x0] 319; CHECK-NEXT: mov v0.s[3], v1.s[0] 320; CHECK-NEXT: ret 321 %l = load <4 x i8>, <4 x i8> *%a 322 %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 323 %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 0, i32 1, i32 2, i32 3> 324 ret <16 x i8> %s2 325} 326 327define <8 x i8> @load_v8i8_4_1(float %tmp, <8 x i8> %b, <4 x i8> *%a) { 328; CHECK-LABEL: load_v8i8_4_1: 329; CHECK: // %bb.0: 330; CHECK-NEXT: ldr s0, [x0] 331; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 332; CHECK-NEXT: ushll v0.8h, v0.8b, #0 333; CHECK-NEXT: uzp1 v0.8b, v0.8b, v0.8b 334; CHECK-NEXT: mov v0.s[1], v1.s[1] 335; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 336; CHECK-NEXT: ret 337 %l = load <4 x i8>, <4 x i8> *%a 338 %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 339 %s2 = shufflevector <8 x i8> %s1, <8 x i8> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15> 340 ret <8 x i8> %s2 341} 342 343define <8 x i8> @load_v8i8_4_2(float %tmp, <8 x i8> %b, <4 x i8> *%a) { 344; CHECK-LABEL: load_v8i8_4_2: 345; CHECK: // %bb.0: 346; CHECK-NEXT: ldr s0, [x0] 347; CHECK-NEXT: ushll v0.8h, v0.8b, #0 348; CHECK-NEXT: uzp1 v2.8b, v0.8b, v0.8b 349; CHECK-NEXT: fmov d0, d1 350; CHECK-NEXT: mov v0.s[1], v2.s[0] 351; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 352; CHECK-NEXT: ret 353 %l = load <4 x i8>, <4 x i8> *%a 354 %s1 = shufflevector <4 x i8> %l, <4 x i8> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 355 %s2 = shufflevector <8 x i8> %s1, <8 x i8> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3> 356 ret <8 x i8> %s2 357} 358 359define <16 x i8> @load_v16i8_8_1(float %tmp, <16 x i8> %b, <8 x i8> *%a) { 360; CHECK-LABEL: load_v16i8_8_1: 361; CHECK: // %bb.0: 362; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8 363; CHECK-NEXT: ldr d0, [x0] 364; CHECK-NEXT: mov v0.d[1], v1.d[0] 365; CHECK-NEXT: ret 366 %l = load <8 x i8>, <8 x i8> *%a 367 %s1 = shufflevector <8 x i8> %l, <8 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 368 %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31> 369 ret <16 x i8> %s2 370} 371 372define <16 x i8> @load_v16i8_8_2(float %tmp, <16 x i8> %b, <8 x i8> *%a) { 373; CHECK-LABEL: load_v16i8_8_2: 374; CHECK: // %bb.0: 375; CHECK-NEXT: mov v0.16b, v1.16b 376; CHECK-NEXT: ldr d1, [x0] 377; CHECK-NEXT: mov v0.d[1], v1.d[0] 378; CHECK-NEXT: ret 379 %l = load <8 x i8>, <8 x i8> *%a 380 %s1 = shufflevector <8 x i8> %l, <8 x i8> poison, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 381 %s2 = shufflevector <16 x i8> %s1, <16 x i8> %b, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7> 382 ret <16 x i8> %s2 383} 384 385; i16 386 387define <8 x i16> @load_v8i16_2_1(float %tmp, <8 x i16> %b, <2 x i16> *%a) { 388; CHECK-LABEL: load_v8i16_2_1: 389; CHECK: // %bb.0: 390; CHECK-NEXT: ldrh w9, [x0] 391; CHECK-NEXT: add x8, x0, #2 392; CHECK-NEXT: mov v0.16b, v1.16b 393; CHECK-NEXT: fmov s2, w9 394; CHECK-NEXT: ld1 { v2.h }[2], [x8] 395; CHECK-NEXT: xtn v1.4h, v2.4s 396; CHECK-NEXT: mov v0.s[0], v1.s[0] 397; CHECK-NEXT: ret 398 %l = load <2 x i16>, <2 x i16> *%a 399 %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 400 %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 401 ret <8 x i16> %s2 402} 403 404define <8 x i16> @load_v8i16_2_15(float %tmp, <8 x i16> %b, <2 x i16> *%a) { 405; CHECK-LABEL: load_v8i16_2_15: 406; CHECK: // %bb.0: 407; CHECK-NEXT: ldrh w9, [x0] 408; CHECK-NEXT: add x8, x0, #2 409; CHECK-NEXT: // kill: def $q1 killed $q1 def $q0_q1 410; CHECK-NEXT: fmov s2, w9 411; CHECK-NEXT: ld1 { v2.h }[2], [x8] 412; CHECK-NEXT: adrp x8, .LCPI33_0 413; CHECK-NEXT: ldr q3, [x8, :lo12:.LCPI33_0] 414; CHECK-NEXT: xtn v0.4h, v2.4s 415; CHECK-NEXT: tbl v0.16b, { v0.16b, v1.16b }, v3.16b 416; CHECK-NEXT: ret 417 %l = load <2 x i16>, <2 x i16> *%a 418 %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 419 %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 8, i32 0, i32 1, i32 11, i32 12, i32 13, i32 14, i32 15> 420 ret <8 x i16> %s2 421} 422 423define <8 x i16> @load_v8i16_2_2(float %tmp, <8 x i16> %b, <2 x i16> *%a) { 424; CHECK-LABEL: load_v8i16_2_2: 425; CHECK: // %bb.0: 426; CHECK-NEXT: ldrh w9, [x0] 427; CHECK-NEXT: add x8, x0, #2 428; CHECK-NEXT: mov v0.16b, v1.16b 429; CHECK-NEXT: fmov s2, w9 430; CHECK-NEXT: ld1 { v2.h }[2], [x8] 431; CHECK-NEXT: xtn v1.4h, v2.4s 432; CHECK-NEXT: mov v0.s[1], v1.s[0] 433; CHECK-NEXT: ret 434 %l = load <2 x i16>, <2 x i16> *%a 435 %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 436 %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 0, i32 1, i32 12, i32 13, i32 14, i32 15> 437 ret <8 x i16> %s2 438} 439 440define <8 x i16> @load_v8i16_2_3(float %tmp, <8 x i16> %b, <2 x i16> *%a) { 441; CHECK-LABEL: load_v8i16_2_3: 442; CHECK: // %bb.0: 443; CHECK-NEXT: ldrh w9, [x0] 444; CHECK-NEXT: add x8, x0, #2 445; CHECK-NEXT: mov v0.16b, v1.16b 446; CHECK-NEXT: fmov s2, w9 447; CHECK-NEXT: ld1 { v2.h }[2], [x8] 448; CHECK-NEXT: xtn v1.4h, v2.4s 449; CHECK-NEXT: mov v0.s[2], v1.s[0] 450; CHECK-NEXT: ret 451 %l = load <2 x i16>, <2 x i16> *%a 452 %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 453 %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 14, i32 15> 454 ret <8 x i16> %s2 455} 456 457define <8 x i16> @load_v8i16_2_4(float %tmp, <8 x i16> %b, <2 x i16> *%a) { 458; CHECK-LABEL: load_v8i16_2_4: 459; CHECK: // %bb.0: 460; CHECK-NEXT: ldrh w9, [x0] 461; CHECK-NEXT: add x8, x0, #2 462; CHECK-NEXT: mov v0.16b, v1.16b 463; CHECK-NEXT: fmov s2, w9 464; CHECK-NEXT: ld1 { v2.h }[2], [x8] 465; CHECK-NEXT: xtn v1.4h, v2.4s 466; CHECK-NEXT: mov v0.s[3], v1.s[0] 467; CHECK-NEXT: ret 468 %l = load <2 x i16>, <2 x i16> *%a 469 %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 470 %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 0, i32 1> 471 ret <8 x i16> %s2 472} 473 474define <4 x i16> @load_v4i16_2_1(float %tmp, <4 x i16> %b, <2 x i16> *%a) { 475; CHECK-LABEL: load_v4i16_2_1: 476; CHECK: // %bb.0: 477; CHECK-NEXT: ld1 { v0.h }[0], [x0] 478; CHECK-NEXT: add x8, x0, #2 479; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1 480; CHECK-NEXT: ld1 { v0.h }[2], [x8] 481; CHECK-NEXT: uzp1 v0.4h, v0.4h, v0.4h 482; CHECK-NEXT: mov v0.s[1], v1.s[1] 483; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 484; CHECK-NEXT: ret 485 %l = load <2 x i16>, <2 x i16> *%a 486 %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 487 %s2 = shufflevector <4 x i16> %s1, <4 x i16> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 488 ret <4 x i16> %s2 489} 490 491define <4 x i16> @load_v4i16_2_2(float %tmp, <4 x i16> %b, <2 x i16> *%a) { 492; CHECK-LABEL: load_v4i16_2_2: 493; CHECK: // %bb.0: 494; CHECK-NEXT: ld1 { v0.h }[0], [x0] 495; CHECK-NEXT: add x8, x0, #2 496; CHECK-NEXT: ld1 { v0.h }[2], [x8] 497; CHECK-NEXT: uzp1 v2.4h, v0.4h, v0.4h 498; CHECK-NEXT: fmov d0, d1 499; CHECK-NEXT: mov v0.s[1], v2.s[0] 500; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 501; CHECK-NEXT: ret 502 %l = load <2 x i16>, <2 x i16> *%a 503 %s1 = shufflevector <2 x i16> %l, <2 x i16> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 504 %s2 = shufflevector <4 x i16> %s1, <4 x i16> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 505 ret <4 x i16> %s2 506} 507 508define <8 x i16> @load_v8i16_4_1(float %tmp, <8 x i16> %b, <4 x i16> *%a) { 509; CHECK-LABEL: load_v8i16_4_1: 510; CHECK: // %bb.0: 511; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8 512; CHECK-NEXT: ldr d0, [x0] 513; CHECK-NEXT: mov v0.d[1], v1.d[0] 514; CHECK-NEXT: ret 515 %l = load <4 x i16>, <4 x i16> *%a 516 %s1 = shufflevector <4 x i16> %l, <4 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 517 %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 12, i32 13, i32 14, i32 15> 518 ret <8 x i16> %s2 519} 520 521define <8 x i16> @load_v8i16_4_2(float %tmp, <8 x i16> %b, <4 x i16> *%a) { 522; CHECK-LABEL: load_v8i16_4_2: 523; CHECK: // %bb.0: 524; CHECK-NEXT: mov v0.16b, v1.16b 525; CHECK-NEXT: ldr d1, [x0] 526; CHECK-NEXT: mov v0.d[1], v1.d[0] 527; CHECK-NEXT: ret 528 %l = load <4 x i16>, <4 x i16> *%a 529 %s1 = shufflevector <4 x i16> %l, <4 x i16> poison, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef> 530 %s2 = shufflevector <8 x i16> %s1, <8 x i16> %b, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 0, i32 1, i32 2, i32 3> 531 ret <8 x i16> %s2 532} 533 534; i32 535 536define <4 x i32> @load_v4i32_2_1(float %tmp, <4 x i32> %b, <2 x i32> *%a) { 537; CHECK-LABEL: load_v4i32_2_1: 538; CHECK: // %bb.0: 539; CHECK-NEXT: ext v1.16b, v1.16b, v1.16b, #8 540; CHECK-NEXT: ldr d0, [x0] 541; CHECK-NEXT: mov v0.d[1], v1.d[0] 542; CHECK-NEXT: ret 543 %l = load <2 x i32>, <2 x i32> *%a 544 %s1 = shufflevector <2 x i32> %l, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 545 %s2 = shufflevector <4 x i32> %s1, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 546 ret <4 x i32> %s2 547} 548 549define <4 x i32> @load_v4i32_2_2(float %tmp, <4 x i32> %b, <2 x i32> *%a) { 550; CHECK-LABEL: load_v4i32_2_2: 551; CHECK: // %bb.0: 552; CHECK-NEXT: mov v0.16b, v1.16b 553; CHECK-NEXT: ldr d1, [x0] 554; CHECK-NEXT: mov v0.d[1], v1.d[0] 555; CHECK-NEXT: ret 556 %l = load <2 x i32>, <2 x i32> *%a 557 %s1 = shufflevector <2 x i32> %l, <2 x i32> poison, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef> 558 %s2 = shufflevector <4 x i32> %s1, <4 x i32> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1> 559 ret <4 x i32> %s2 560} 561