1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s 3 4; Test that the logic to choose between v128.const vector 5; initialization and splat vector initialization and to optimize the 6; choice of splat value works correctly. 7 8target triple = "wasm32-unknown-unknown" 9 10define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) { 11; CHECK-LABEL: same_const_one_replaced_i16x8: 12; CHECK: .functype same_const_one_replaced_i16x8 (i32) -> (v128) 13; CHECK-NEXT: # %bb.0: 14; CHECK-NEXT: v128.const $push0=, 42, 42, 42, 42, 42, 0, 42, 42 15; CHECK-NEXT: i16x8.replace_lane $push1=, $pop0, 5, $0 16; CHECK-NEXT: return $pop1 17 %v = insertelement 18 <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>, 19 i16 %x, 20 i32 5 21 ret <8 x i16> %v 22} 23 24define <8 x i16> @different_const_one_replaced_i16x8(i16 %x) { 25; CHECK-LABEL: different_const_one_replaced_i16x8: 26; CHECK: .functype different_const_one_replaced_i16x8 (i32) -> (v128) 27; CHECK-NEXT: # %bb.0: 28; CHECK-NEXT: v128.const $push0=, 1, -2, 3, -4, 5, 0, 7, -8 29; CHECK-NEXT: i16x8.replace_lane $push1=, $pop0, 5, $0 30; CHECK-NEXT: return $pop1 31 %v = insertelement 32 <8 x i16> <i16 1, i16 -2, i16 3, i16 -4, i16 5, i16 -6, i16 7, i16 -8>, 33 i16 %x, 34 i32 5 35 ret <8 x i16> %v 36} 37 38define <4 x float> @same_const_one_replaced_f32x4(float %x) { 39; CHECK-LABEL: same_const_one_replaced_f32x4: 40; CHECK: .functype same_const_one_replaced_f32x4 (f32) -> (v128) 41; CHECK-NEXT: # %bb.0: 42; CHECK-NEXT: v128.const $push0=, 0x1.5p5, 0x1.5p5, 0x0p0, 0x1.5p5 43; CHECK-NEXT: f32x4.replace_lane $push1=, $pop0, 2, $0 44; CHECK-NEXT: return $pop1 45 %v = insertelement 46 <4 x float> <float 42., float 42., float 42., float 42.>, 47 float %x, 48 i32 2 49 ret <4 x float> %v 50} 51 52define <4 x float> @different_const_one_replaced_f32x4(float %x) { 53; CHECK-LABEL: different_const_one_replaced_f32x4: 54; CHECK: .functype different_const_one_replaced_f32x4 (f32) -> (v128) 55; CHECK-NEXT: # %bb.0: 56; CHECK-NEXT: v128.const $push0=, 0x1p0, 0x1p1, 0x0p0, 0x1p2 57; CHECK-NEXT: f32x4.replace_lane $push1=, $pop0, 2, $0 58; CHECK-NEXT: return $pop1 59 %v = insertelement 60 <4 x float> <float 1., float 2., float 3., float 4.>, 61 float %x, 62 i32 2 63 ret <4 x float> %v 64} 65 66define <4 x i32> @splat_common_const_i32x4() { 67; CHECK-LABEL: splat_common_const_i32x4: 68; CHECK: .functype splat_common_const_i32x4 () -> (v128) 69; CHECK-NEXT: # %bb.0: 70; CHECK-NEXT: v128.const $push0=, 0, 3, 3, 1 71; CHECK-NEXT: return $pop0 72 ret <4 x i32> <i32 undef, i32 3, i32 3, i32 1> 73} 74 75define <8 x i16> @splat_common_arg_i16x8(i16 %a, i16 %b, i16 %c) { 76; CHECK-LABEL: splat_common_arg_i16x8: 77; CHECK: .functype splat_common_arg_i16x8 (i32, i32, i32) -> (v128) 78; CHECK-NEXT: # %bb.0: 79; CHECK-NEXT: i16x8.splat $push0=, $2 80; CHECK-NEXT: i16x8.replace_lane $push1=, $pop0, 0, $1 81; CHECK-NEXT: i16x8.replace_lane $push2=, $pop1, 2, $0 82; CHECK-NEXT: i16x8.replace_lane $push3=, $pop2, 4, $1 83; CHECK-NEXT: i16x8.replace_lane $push4=, $pop3, 7, $1 84; CHECK-NEXT: return $pop4 85 %v0 = insertelement <8 x i16> undef, i16 %b, i32 0 86 %v1 = insertelement <8 x i16> %v0, i16 %c, i32 1 87 %v2 = insertelement <8 x i16> %v1, i16 %a, i32 2 88 %v3 = insertelement <8 x i16> %v2, i16 %c, i32 3 89 %v4 = insertelement <8 x i16> %v3, i16 %b, i32 4 90 %v5 = insertelement <8 x i16> %v4, i16 %c, i32 5 91 %v6 = insertelement <8 x i16> %v5, i16 %c, i32 6 92 %v7 = insertelement <8 x i16> %v6, i16 %b, i32 7 93 ret <8 x i16> %v7 94} 95 96define <16 x i8> @swizzle_one_i8x16(<16 x i8> %src, <16 x i8> %mask) { 97; CHECK-LABEL: swizzle_one_i8x16: 98; CHECK: .functype swizzle_one_i8x16 (v128, v128) -> (v128) 99; CHECK-NEXT: # %bb.0: 100; CHECK-NEXT: i8x16.swizzle $push0=, $0, $1 101; CHECK-NEXT: return $pop0 102 %m0 = extractelement <16 x i8> %mask, i32 0 103 %s0 = extractelement <16 x i8> %src, i8 %m0 104 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 105 ret <16 x i8> %v0 106} 107 108define <16 x i8> @swizzle_all_i8x16(<16 x i8> %src, <16 x i8> %mask) { 109; CHECK-LABEL: swizzle_all_i8x16: 110; CHECK: .functype swizzle_all_i8x16 (v128, v128) -> (v128) 111; CHECK-NEXT: # %bb.0: 112; CHECK-NEXT: i8x16.swizzle $push0=, $0, $1 113; CHECK-NEXT: return $pop0 114 %m0 = extractelement <16 x i8> %mask, i32 0 115 %s0 = extractelement <16 x i8> %src, i8 %m0 116 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 117 %m1 = extractelement <16 x i8> %mask, i32 1 118 %s1 = extractelement <16 x i8> %src, i8 %m1 119 %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 1 120 %m2 = extractelement <16 x i8> %mask, i32 2 121 %s2 = extractelement <16 x i8> %src, i8 %m2 122 %v2 = insertelement <16 x i8> %v1, i8 %s2, i32 2 123 %m3 = extractelement <16 x i8> %mask, i32 3 124 %s3 = extractelement <16 x i8> %src, i8 %m3 125 %v3 = insertelement <16 x i8> %v2, i8 %s3, i32 3 126 %m4 = extractelement <16 x i8> %mask, i32 4 127 %s4 = extractelement <16 x i8> %src, i8 %m4 128 %v4 = insertelement <16 x i8> %v3, i8 %s4, i32 4 129 %m5 = extractelement <16 x i8> %mask, i32 5 130 %s5 = extractelement <16 x i8> %src, i8 %m5 131 %v5 = insertelement <16 x i8> %v4, i8 %s5, i32 5 132 %m6 = extractelement <16 x i8> %mask, i32 6 133 %s6 = extractelement <16 x i8> %src, i8 %m6 134 %v6 = insertelement <16 x i8> %v5, i8 %s6, i32 6 135 %m7 = extractelement <16 x i8> %mask, i32 7 136 %s7 = extractelement <16 x i8> %src, i8 %m7 137 %v7 = insertelement <16 x i8> %v6, i8 %s7, i32 7 138 %m8 = extractelement <16 x i8> %mask, i32 8 139 %s8 = extractelement <16 x i8> %src, i8 %m8 140 %v8 = insertelement <16 x i8> %v7, i8 %s8, i32 8 141 %m9 = extractelement <16 x i8> %mask, i32 9 142 %s9 = extractelement <16 x i8> %src, i8 %m9 143 %v9 = insertelement <16 x i8> %v8, i8 %s9, i32 9 144 %m10 = extractelement <16 x i8> %mask, i32 10 145 %s10 = extractelement <16 x i8> %src, i8 %m10 146 %v10 = insertelement <16 x i8> %v9, i8 %s10, i32 10 147 %m11 = extractelement <16 x i8> %mask, i32 11 148 %s11 = extractelement <16 x i8> %src, i8 %m11 149 %v11 = insertelement <16 x i8> %v10, i8 %s11, i32 11 150 %m12 = extractelement <16 x i8> %mask, i32 12 151 %s12 = extractelement <16 x i8> %src, i8 %m12 152 %v12 = insertelement <16 x i8> %v11, i8 %s12, i32 12 153 %m13 = extractelement <16 x i8> %mask, i32 13 154 %s13 = extractelement <16 x i8> %src, i8 %m13 155 %v13 = insertelement <16 x i8> %v12, i8 %s13, i32 13 156 %m14 = extractelement <16 x i8> %mask, i32 14 157 %s14 = extractelement <16 x i8> %src, i8 %m14 158 %v14 = insertelement <16 x i8> %v13, i8 %s14, i32 14 159 %m15 = extractelement <16 x i8> %mask, i32 15 160 %s15 = extractelement <16 x i8> %src, i8 %m15 161 %v15 = insertelement <16 x i8> %v14, i8 %s15, i32 15 162 ret <16 x i8> %v15 163} 164 165; Ensure we don't us swizzle 166define <8 x i16> @swizzle_one_i16x8(<8 x i16> %src, <8 x i16> %mask) { 167; CHECK-LABEL: swizzle_one_i16x8: 168; CHECK: .functype swizzle_one_i16x8 (v128, v128) -> (v128) 169; CHECK-NEXT: # %bb.0: 170; CHECK-NEXT: global.get $push7=, __stack_pointer 171; CHECK-NEXT: i32.const $push8=, 16 172; CHECK-NEXT: i32.sub $push10=, $pop7, $pop8 173; CHECK-NEXT: local.tee $push9=, $2=, $pop10 174; CHECK-NEXT: v128.store 0($pop9), $0 175; CHECK-NEXT: i16x8.extract_lane_u $push0=, $1, 0 176; CHECK-NEXT: i32.const $push1=, 7 177; CHECK-NEXT: i32.and $push2=, $pop0, $pop1 178; CHECK-NEXT: i32.const $push3=, 1 179; CHECK-NEXT: i32.shl $push4=, $pop2, $pop3 180; CHECK-NEXT: i32.or $push5=, $2, $pop4 181; CHECK-NEXT: v128.load16_splat $push6=, 0($pop5) 182; CHECK-NEXT: return $pop6 183 %m0 = extractelement <8 x i16> %mask, i32 0 184 %s0 = extractelement <8 x i16> %src, i16 %m0 185 %v0 = insertelement <8 x i16> undef, i16 %s0, i32 0 186 ret <8 x i16> %v0 187} 188 189define <4 x i32> @half_shuffle_i32x4(<4 x i32> %src) { 190; CHECK-LABEL: half_shuffle_i32x4: 191; CHECK: .functype half_shuffle_i32x4 (v128) -> (v128) 192; CHECK-NEXT: # %bb.0: 193; CHECK-NEXT: i8x16.shuffle $push0=, $0, $0, 0, 0, 0, 0, 8, 9, 10, 11, 0, 1, 2, 3, 0, 0, 0, 0 194; CHECK-NEXT: i32.const $push1=, 0 195; CHECK-NEXT: i32x4.replace_lane $push2=, $pop0, 0, $pop1 196; CHECK-NEXT: i32.const $push3=, 3 197; CHECK-NEXT: i32x4.replace_lane $push4=, $pop2, 3, $pop3 198; CHECK-NEXT: return $pop4 199 %s0 = extractelement <4 x i32> %src, i32 0 200 %s2 = extractelement <4 x i32> %src, i32 2 201 %v0 = insertelement <4 x i32> undef, i32 0, i32 0 202 %v1 = insertelement <4 x i32> %v0, i32 %s2, i32 1 203 %v2 = insertelement <4 x i32> %v1, i32 %s0, i32 2 204 %v3 = insertelement <4 x i32> %v2, i32 3, i32 3 205 ret <4 x i32> %v3 206} 207 208define <16 x i8> @mashup_swizzle_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) { 209 ; swizzle 0 210; CHECK-LABEL: mashup_swizzle_i8x16: 211; CHECK: .functype mashup_swizzle_i8x16 (v128, v128, i32) -> (v128) 212; CHECK-NEXT: # %bb.0: 213; CHECK-NEXT: i8x16.swizzle $push0=, $0, $1 214; CHECK-NEXT: i8x16.replace_lane $push1=, $pop0, 3, $2 215; CHECK-NEXT: i32.const $push2=, 42 216; CHECK-NEXT: i8x16.replace_lane $push3=, $pop1, 4, $pop2 217; CHECK-NEXT: i8x16.replace_lane $push4=, $pop3, 12, $2 218; CHECK-NEXT: i32.const $push6=, 42 219; CHECK-NEXT: i8x16.replace_lane $push5=, $pop4, 14, $pop6 220; CHECK-NEXT: return $pop5 221 %m0 = extractelement <16 x i8> %mask, i32 0 222 %s0 = extractelement <16 x i8> %src, i8 %m0 223 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 224 ; swizzle 7 225 %m1 = extractelement <16 x i8> %mask, i32 7 226 %s1 = extractelement <16 x i8> %src, i8 %m1 227 %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 7 228 ; splat 3 229 %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 3 230 ; splat 12 231 %v3 = insertelement <16 x i8> %v2, i8 %splatted, i32 12 232 ; const 4 233 %v4 = insertelement <16 x i8> %v3, i8 42, i32 4 234 ; const 14 235 %v5 = insertelement <16 x i8> %v4, i8 42, i32 14 236 ret <16 x i8> %v5 237} 238 239define <16 x i8> @mashup_const_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) { 240 ; swizzle 0 241; CHECK-LABEL: mashup_const_i8x16: 242; CHECK: .functype mashup_const_i8x16 (v128, v128, i32) -> (v128) 243; CHECK-NEXT: # %bb.0: 244; CHECK-NEXT: global.get $push8=, __stack_pointer 245; CHECK-NEXT: i32.const $push9=, 16 246; CHECK-NEXT: i32.sub $push11=, $pop8, $pop9 247; CHECK-NEXT: local.tee $push10=, $3=, $pop11 248; CHECK-NEXT: v128.store 0($pop10), $0 249; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 0 250; CHECK-NEXT: i32.const $push1=, 15 251; CHECK-NEXT: i32.and $push2=, $pop0, $pop1 252; CHECK-NEXT: i32.or $push3=, $3, $pop2 253; CHECK-NEXT: v128.const $push4=, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0 254; CHECK-NEXT: v128.load8_lane $push5=, 0($pop3), $pop4, 0 255; CHECK-NEXT: i8x16.replace_lane $push6=, $pop5, 3, $2 256; CHECK-NEXT: i8x16.replace_lane $push7=, $pop6, 12, $2 257; CHECK-NEXT: return $pop7 258 %m0 = extractelement <16 x i8> %mask, i32 0 259 %s0 = extractelement <16 x i8> %src, i8 %m0 260 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 261 ; splat 3 262 %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3 263 ; splat 12 264 %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12 265 ; const 4 266 %v3 = insertelement <16 x i8> %v2, i8 42, i32 4 267 ; const 14 268 %v4 = insertelement <16 x i8> %v3, i8 42, i32 14 269 ret <16 x i8> %v4 270} 271 272define <16 x i8> @mashup_splat_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) { 273 ; swizzle 0 274; CHECK-LABEL: mashup_splat_i8x16: 275; CHECK: .functype mashup_splat_i8x16 (v128, v128, i32) -> (v128) 276; CHECK-NEXT: # %bb.0: 277; CHECK-NEXT: global.get $push8=, __stack_pointer 278; CHECK-NEXT: i32.const $push9=, 16 279; CHECK-NEXT: i32.sub $push11=, $pop8, $pop9 280; CHECK-NEXT: local.tee $push10=, $3=, $pop11 281; CHECK-NEXT: v128.store 0($pop10), $0 282; CHECK-NEXT: i8x16.extract_lane_u $push0=, $1, 0 283; CHECK-NEXT: i32.const $push1=, 15 284; CHECK-NEXT: i32.and $push2=, $pop0, $pop1 285; CHECK-NEXT: i32.or $push3=, $3, $pop2 286; CHECK-NEXT: i8x16.splat $push4=, $2 287; CHECK-NEXT: v128.load8_lane $push5=, 0($pop3), $pop4, 0 288; CHECK-NEXT: i32.const $push6=, 42 289; CHECK-NEXT: i8x16.replace_lane $push7=, $pop5, 4, $pop6 290; CHECK-NEXT: return $pop7 291 %m0 = extractelement <16 x i8> %mask, i32 0 292 %s0 = extractelement <16 x i8> %src, i8 %m0 293 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 294 ; splat 3 295 %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3 296 ; splat 12 297 %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12 298 ; const 4 299 %v3 = insertelement <16 x i8> %v2, i8 42, i32 4 300 ret <16 x i8> %v3 301} 302 303define <4 x float> @undef_const_insert_f32x4() { 304; CHECK-LABEL: undef_const_insert_f32x4: 305; CHECK: .functype undef_const_insert_f32x4 () -> (v128) 306; CHECK-NEXT: # %bb.0: 307; CHECK-NEXT: v128.const $push0=, 0x0p0, 0x1.5p5, 0x0p0, 0x0p0 308; CHECK-NEXT: return $pop0 309 %v = insertelement <4 x float> undef, float 42., i32 1 310 ret <4 x float> %v 311} 312 313define <4 x i32> @undef_arg_insert_i32x4(i32 %x) { 314; CHECK-LABEL: undef_arg_insert_i32x4: 315; CHECK: .functype undef_arg_insert_i32x4 (i32) -> (v128) 316; CHECK-NEXT: # %bb.0: 317; CHECK-NEXT: i32x4.splat $push0=, $0 318; CHECK-NEXT: return $pop0 319 %v = insertelement <4 x i32> undef, i32 %x, i32 3 320 ret <4 x i32> %v 321} 322 323define <16 x i8> @all_undef_i8x16() { 324; CHECK-LABEL: all_undef_i8x16: 325; CHECK: .functype all_undef_i8x16 () -> (v128) 326; CHECK-NEXT: # %bb.0: 327; CHECK-NEXT: return $0 328 %v = insertelement <16 x i8> undef, i8 undef, i32 4 329 ret <16 x i8> %v 330} 331 332define <2 x double> @all_undef_f64x2() { 333; CHECK-LABEL: all_undef_f64x2: 334; CHECK: .functype all_undef_f64x2 () -> (v128) 335; CHECK-NEXT: # %bb.0: 336; CHECK-NEXT: return $0 337 ret <2 x double> undef 338} 339