1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=arm64-eabi -mcpu=generic -aarch64-neon-syntax=apple -mattr="+fullfp16" | FileCheck %s 3 4define void @test0f(float* nocapture %x, float %a) #0 { 5; CHECK-LABEL: test0f: 6; CHECK: // %bb.0: // %entry 7; CHECK-NEXT: movi.2d v1, #0000000000000000 8; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 9; CHECK-NEXT: mov.s v1[0], v0[0] 10; CHECK-NEXT: str q1, [x0] 11; CHECK-NEXT: ret 12entry: 13 %0 = insertelement <4 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float %a, i32 0 14 %1 = bitcast float* %x to <4 x float>* 15 store <4 x float> %0, <4 x float>* %1, align 16 16 ret void 17} 18 19define void @test1f(float* nocapture %x, float %a) #0 { 20; CHECK-LABEL: test1f: 21; CHECK: // %bb.0: // %entry 22; CHECK-NEXT: fmov.4s v1, #1.00000000 23; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 24; CHECK-NEXT: mov.s v1[0], v0[0] 25; CHECK-NEXT: str q1, [x0] 26; CHECK-NEXT: ret 27entry: 28 %0 = insertelement <4 x float> <float undef, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, float %a, i32 0 29 %1 = bitcast float* %x to <4 x float>* 30 store <4 x float> %0, <4 x float>* %1, align 16 31 ret void 32} 33 34define <16 x i8> @test_insert_v16i8_insert_1(i8 %a) { 35; CHECK-LABEL: test_insert_v16i8_insert_1: 36; CHECK: // %bb.0: 37; CHECK-NEXT: movi.2d v0, #0000000000000000 38; CHECK-NEXT: mov.b v0[14], w0 39; CHECK-NEXT: ret 40 %v.0 = insertelement <16 x i8> zeroinitializer, i8 %a, i32 14 41 ret <16 x i8> %v.0 42} 43 44define <16 x i8> @test_insert_v16i8_insert_2(i8 %a) { 45; CHECK-LABEL: test_insert_v16i8_insert_2: 46; CHECK: // %bb.0: 47; CHECK-NEXT: movi.2d v0, #0000000000000000 48; CHECK-NEXT: mov.b v0[1], w0 49; CHECK-NEXT: mov.b v0[2], w0 50; CHECK-NEXT: ret 51 %v.0 = insertelement <16 x i8> zeroinitializer, i8 %a, i32 2 52 %v.1 = insertelement <16 x i8> %v.0, i8 %a, i32 1 53 ret <16 x i8> %v.1 54} 55 56define <16 x i8> @test_insert_v16i8_insert_2_undef_base(i8 %a) { 57; CHECK-LABEL: test_insert_v16i8_insert_2_undef_base: 58; CHECK: // %bb.0: 59; CHECK-NEXT: dup.16b v0, w0 60; CHECK-NEXT: mov.b v0[5], wzr 61; CHECK-NEXT: mov.b v0[9], wzr 62; CHECK-NEXT: ret 63 %v.0 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef> , i8 %a, i32 0 64 %v.1 = insertelement <16 x i8> %v.0, i8 %a, i32 1 65 %v.2 = insertelement <16 x i8> %v.1, i8 %a, i32 2 66 %v.3 = insertelement <16 x i8> %v.2, i8 %a, i32 3 67 %v.4 = insertelement <16 x i8> %v.3, i8 %a, i32 4 68 %v.6 = insertelement <16 x i8> %v.4, i8 %a, i32 6 69 %v.7 = insertelement <16 x i8> %v.6, i8 %a, i32 7 70 %v.8 = insertelement <16 x i8> %v.7, i8 %a, i32 8 71 %v.10 = insertelement <16 x i8> %v.7, i8 %a, i32 10 72 %v.11 = insertelement <16 x i8> %v.10, i8 %a, i32 11 73 %v.12 = insertelement <16 x i8> %v.11, i8 %a, i32 12 74 %v.13 = insertelement <16 x i8> %v.12, i8 %a, i32 13 75 %v.14 = insertelement <16 x i8> %v.13, i8 %a, i32 14 76 %v.15 = insertelement <16 x i8> %v.14, i8 %a, i32 15 77 ret <16 x i8> %v.15 78} 79 80define <16 x i8> @test_insert_v16i8_insert_2_undef_base_different_valeus(i8 %a, i8 %b) { 81; CHECK-LABEL: test_insert_v16i8_insert_2_undef_base_different_valeus: 82; CHECK: // %bb.0: 83; CHECK-NEXT: dup.16b v0, w0 84; CHECK-NEXT: mov.b v0[2], w1 85; CHECK-NEXT: mov.b v0[5], wzr 86; CHECK-NEXT: mov.b v0[7], w1 87; CHECK-NEXT: mov.b v0[9], wzr 88; CHECK-NEXT: mov.b v0[12], w1 89; CHECK-NEXT: mov.b v0[15], w1 90; CHECK-NEXT: ret 91 %v.0 = insertelement <16 x i8> <i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 0, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef, i8 undef> , i8 %a, i32 0 92 %v.1 = insertelement <16 x i8> %v.0, i8 %a, i32 1 93 %v.2 = insertelement <16 x i8> %v.1, i8 %b, i32 2 94 %v.3 = insertelement <16 x i8> %v.2, i8 %a, i32 3 95 %v.4 = insertelement <16 x i8> %v.3, i8 %a, i32 4 96 %v.6 = insertelement <16 x i8> %v.4, i8 %a, i32 6 97 %v.7 = insertelement <16 x i8> %v.6, i8 %b, i32 7 98 %v.8 = insertelement <16 x i8> %v.7, i8 %a, i32 8 99 %v.10 = insertelement <16 x i8> %v.7, i8 %a, i32 10 100 %v.11 = insertelement <16 x i8> %v.10, i8 %a, i32 11 101 %v.12 = insertelement <16 x i8> %v.11, i8 %b, i32 12 102 %v.13 = insertelement <16 x i8> %v.12, i8 %a, i32 13 103 %v.14 = insertelement <16 x i8> %v.13, i8 %a, i32 14 104 %v.15 = insertelement <16 x i8> %v.14, i8 %b, i32 15 105 ret <16 x i8> %v.15 106} 107 108define <8 x half> @test_insert_v8f16_insert_1(half %a) { 109; CHECK-LABEL: test_insert_v8f16_insert_1: 110; CHECK: // %bb.0: 111; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0 112; CHECK-NEXT: dup.8h v0, v0[0] 113; CHECK-NEXT: mov.h v0[7], wzr 114; CHECK-NEXT: ret 115 %v.0 = insertelement <8 x half> <half undef, half undef, half undef, half undef, half undef, half undef, half undef, half 0.0>, half %a, i32 0 116 %v.1 = insertelement <8 x half> %v.0, half %a, i32 1 117 %v.2 = insertelement <8 x half> %v.1, half %a, i32 2 118 %v.3 = insertelement <8 x half> %v.2, half %a, i32 3 119 %v.4 = insertelement <8 x half> %v.3, half %a, i32 4 120 %v.5 = insertelement <8 x half> %v.4, half %a, i32 5 121 %v.6 = insertelement <8 x half> %v.5, half %a, i32 6 122 ret <8 x half> %v.6 123} 124 125 126define <8 x half> @test_insert_v8f16_insert_2(half %a) { 127; CHECK-LABEL: test_insert_v8f16_insert_2: 128; CHECK: // %bb.0: 129; CHECK-NEXT: movi.2d v1, #0000000000000000 130; CHECK-NEXT: // kill: def $h0 killed $h0 def $q0 131; CHECK-NEXT: mov.h v1[1], v0[0] 132; CHECK-NEXT: mov.h v1[2], v0[0] 133; CHECK-NEXT: mov.16b v0, v1 134; CHECK-NEXT: ret 135 %v.0 = insertelement <8 x half> zeroinitializer, half %a, i32 2 136 %v.1 = insertelement <8 x half> %v.0, half %a, i32 1 137 ret <8 x half> %v.1 138} 139 140define <8 x i16> @test_insert_v8i16_insert_2(i16 %a) { 141; CHECK-LABEL: test_insert_v8i16_insert_2: 142; CHECK: // %bb.0: 143; CHECK-NEXT: dup.8h v0, w0 144; CHECK-NEXT: mov.h v0[3], wzr 145; CHECK-NEXT: mov.h v0[7], wzr 146; CHECK-NEXT: ret 147 %v.0 = insertelement <8 x i16> <i16 undef, i16 undef, i16 undef, i16 0, i16 undef, i16 undef, i16 undef, i16 0>, i16 %a, i32 0 148 %v.1 = insertelement <8 x i16> %v.0, i16 %a, i32 1 149 %v.2 = insertelement <8 x i16> %v.1, i16 %a, i32 2 150 %v.3 = insertelement <8 x i16> %v.2, i16 %a, i32 4 151 %v.4 = insertelement <8 x i16> %v.3, i16 %a, i32 5 152 %v.5 = insertelement <8 x i16> %v.4, i16 %a, i32 6 153 ret <8 x i16> %v.5 154} 155 156define <8 x i16> @test_insert_v8i16_insert_3(i16 %a) { 157; CHECK-LABEL: test_insert_v8i16_insert_3: 158; CHECK: // %bb.0: 159; CHECK-NEXT: dup.8h v0, w0 160; CHECK-NEXT: mov.h v0[1], wzr 161; CHECK-NEXT: mov.h v0[3], wzr 162; CHECK-NEXT: mov.h v0[7], wzr 163; CHECK-NEXT: ret 164 %v.0 = insertelement <8 x i16> <i16 undef, i16 0, i16 undef, i16 0, i16 undef, i16 undef, i16 undef, i16 0>, i16 %a, i32 0 165 %v.2 = insertelement <8 x i16> %v.0, i16 %a, i32 2 166 %v.3 = insertelement <8 x i16> %v.2, i16 %a, i32 4 167 %v.4 = insertelement <8 x i16> %v.3, i16 %a, i32 5 168 %v.5 = insertelement <8 x i16> %v.4, i16 %a, i32 6 169 ret <8 x i16> %v.5 170} 171 172define <8 x i16> @test_insert_v8i16_insert_4(i16 %a) { 173; CHECK-LABEL: test_insert_v8i16_insert_4: 174; CHECK: // %bb.0: 175; CHECK-NEXT: movi.2d v0, #0000000000000000 176; CHECK-NEXT: mov.h v0[0], w0 177; CHECK-NEXT: mov.h v0[2], w0 178; CHECK-NEXT: mov.h v0[4], w0 179; CHECK-NEXT: mov.h v0[5], w0 180; CHECK-NEXT: ret 181 %v.0 = insertelement <8 x i16> <i16 undef, i16 0, i16 undef, i16 0, i16 undef, i16 undef, i16 0, i16 0>, i16 %a, i32 0 182 %v.2 = insertelement <8 x i16> %v.0, i16 %a, i32 2 183 %v.3 = insertelement <8 x i16> %v.2, i16 %a, i32 4 184 %v.4 = insertelement <8 x i16> %v.3, i16 %a, i32 5 185 ret <8 x i16> %v.4 186} 187 188define <8 x i16> @test_insert_v8i16_insert_5(i16 %a) { 189; CHECK-LABEL: test_insert_v8i16_insert_5: 190; CHECK: // %bb.0: 191; CHECK-NEXT: movi.2d v0, #0000000000000000 192; CHECK-NEXT: mov.h v0[0], w0 193; CHECK-NEXT: mov.h v0[4], w0 194; CHECK-NEXT: mov.h v0[5], w0 195; CHECK-NEXT: ret 196 %v.0 = insertelement <8 x i16> <i16 undef, i16 0, i16 0, i16 0, i16 undef, i16 undef, i16 0, i16 0>, i16 %a, i32 0 197 %v.3 = insertelement <8 x i16> %v.0, i16 %a, i32 4 198 %v.4 = insertelement <8 x i16> %v.3, i16 %a, i32 5 199 ret <8 x i16> %v.4 200} 201 202define <2 x float> @test_insert_v2f32_undef_zero_vector(float %a) { 203; CHECK-LABEL: test_insert_v2f32_undef_zero_vector: 204; CHECK: // %bb.0: 205; CHECK-NEXT: movi d1, #0000000000000000 206; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 207; CHECK-NEXT: mov.s v1[1], v0[0] 208; CHECK-NEXT: fmov d0, d1 209; CHECK-NEXT: ret 210 %v.0 = insertelement <2 x float> <float 0.000000e+00, float undef>, float %a, i32 1 211 ret <2 x float> %v.0 212} 213 214define <4 x float> @test_insert_3_f32_undef_zero_vector(float %a) { 215; CHECK-LABEL: test_insert_3_f32_undef_zero_vector: 216; CHECK: // %bb.0: 217; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 218; CHECK-NEXT: dup.4s v0, v0[0] 219; CHECK-NEXT: mov.s v0[3], wzr 220; CHECK-NEXT: ret 221 %v.0 = insertelement <4 x float> <float undef, float undef, float undef, float 0.000000e+00>, float %a, i32 0 222 %v.1 = insertelement <4 x float> %v.0, float %a, i32 1 223 %v.2 = insertelement <4 x float> %v.1, float %a, i32 2 224 ret <4 x float> %v.2 225} 226 227define <4 x float> @test_insert_3_f32_undef(float %a) { 228; CHECK-LABEL: test_insert_3_f32_undef: 229; CHECK: // %bb.0: 230; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 231; CHECK-NEXT: dup.4s v0, v0[0] 232; CHECK-NEXT: ret 233 %v.0 = insertelement <4 x float> <float undef, float undef, float undef, float undef>, float %a, i32 0 234 %v.1 = insertelement <4 x float> %v.0, float %a, i32 1 235 %v.2 = insertelement <4 x float> %v.1, float %a, i32 2 236 ret <4 x float> %v.2 237} 238 239define <4 x float> @test_insert_2_f32_undef_zero(float %a) { 240; CHECK-LABEL: test_insert_2_f32_undef_zero: 241; CHECK: // %bb.0: 242; CHECK-NEXT: movi.2d v1, #0000000000000000 243; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 244; CHECK-NEXT: mov.s v1[0], v0[0] 245; CHECK-NEXT: mov.s v1[2], v0[0] 246; CHECK-NEXT: mov.16b v0, v1 247; CHECK-NEXT: ret 248 %v.0 = insertelement <4 x float> <float undef, float 0.000000e+00, float undef, float 0.000000e+00>, float %a, i32 0 249 %v.1 = insertelement <4 x float> %v.0, float %a, i32 2 250 ret <4 x float> %v.1 251} 252 253define <2 x double> @test_insert_v2f64_undef_insert1(double %a) { 254; CHECK-LABEL: test_insert_v2f64_undef_insert1: 255; CHECK: // %bb.0: 256; CHECK-NEXT: movi.2d v1, #0000000000000000 257; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 258; CHECK-NEXT: mov.d v1[0], v0[0] 259; CHECK-NEXT: mov.16b v0, v1 260; CHECK-NEXT: ret 261 %v.0 = insertelement <2 x double > <double undef, double 0.000000e+00>, double %a, i32 0 262 ret <2 x double> %v.0 263} 264 265define <4 x float> @test_insert_2_f32_var(float %a, <4 x float> %b) { 266; CHECK-LABEL: test_insert_2_f32_var: 267; CHECK: // %bb.0: 268; CHECK-NEXT: // kill: def $s0 killed $s0 def $q0 269; CHECK-NEXT: mov.s v1[0], v0[0] 270; CHECK-NEXT: mov.s v1[2], v0[0] 271; CHECK-NEXT: mov.16b v0, v1 272; CHECK-NEXT: ret 273 %v.0 = insertelement <4 x float> %b, float %a, i32 0 274 %v.1 = insertelement <4 x float> %v.0, float %a, i32 2 275 ret <4 x float> %v.1 276} 277 278define <8 x i16> @test_insert_v8i16_i16_zero(<8 x i16> %a) { 279; CHECK-LABEL: test_insert_v8i16_i16_zero: 280; CHECK: // %bb.0: 281; CHECK-NEXT: mov.h v0[5], wzr 282; CHECK-NEXT: ret 283 %v.0 = insertelement <8 x i16> %a, i16 0, i32 5 284 ret <8 x i16> %v.0 285} 286 287; TODO: This should jsut be a mov.s v0[3], wzr 288define <4 x half> @test_insert_v4f16_f16_zero(<4 x half> %a) { 289; CHECK-LABEL: test_insert_v4f16_f16_zero: 290; CHECK: // %bb.0: 291; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 292; CHECK-NEXT: mov.h v0[0], wzr 293; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 294; CHECK-NEXT: ret 295 %v.0 = insertelement <4 x half> %a, half 0.000000e+00, i32 0 296 ret <4 x half> %v.0 297} 298 299define <8 x half> @test_insert_v8f16_f16_zero(<8 x half> %a) { 300; CHECK-LABEL: test_insert_v8f16_f16_zero: 301; CHECK: // %bb.0: 302; CHECK-NEXT: mov.h v0[6], wzr 303; CHECK-NEXT: ret 304 %v.0 = insertelement <8 x half> %a, half 0.000000e+00, i32 6 305 ret <8 x half> %v.0 306} 307 308define <2 x float> @test_insert_v2f32_f32_zero(<2 x float> %a) { 309; CHECK-LABEL: test_insert_v2f32_f32_zero: 310; CHECK: // %bb.0: 311; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0 312; CHECK-NEXT: mov.s v0[0], wzr 313; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0 314; CHECK-NEXT: ret 315 %v.0 = insertelement <2 x float> %a, float 0.000000e+00, i32 0 316 ret <2 x float> %v.0 317} 318 319define <4 x float> @test_insert_v4f32_f32_zero(<4 x float> %a) { 320; CHECK-LABEL: test_insert_v4f32_f32_zero: 321; CHECK: // %bb.0: 322; CHECK-NEXT: mov.s v0[3], wzr 323; CHECK-NEXT: ret 324 %v.0 = insertelement <4 x float> %a, float 0.000000e+00, i32 3 325 ret <4 x float> %v.0 326} 327 328define <2 x double> @test_insert_v2f64_f64_zero(<2 x double> %a) { 329; CHECK-LABEL: test_insert_v2f64_f64_zero: 330; CHECK: // %bb.0: 331; CHECK-NEXT: mov.d v0[1], xzr 332; CHECK-NEXT: ret 333 %v.0 = insertelement <2 x double> %a, double 0.000000e+00, i32 1 334 ret <2 x double> %v.0 335} 336