1; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+unimplemented-simd128 | FileCheck %s --check-prefixes=CHECK,UNIMP 2; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefixes=CHECK,SIMD-VM 3 4; Test that the logic to choose between v128.const vector 5; initialization and splat vector initialization and to optimize the 6; choice of splat value works correctly. 7 8target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" 9target triple = "wasm32-unknown-unknown" 10 11; CHECK-LABEL: emulated_const_trivial_splat: 12; CHECK-NEXT: .functype emulated_const_trivial_splat () -> (v128) 13; SIMD-VM-NEXT: i64.const $push0=, 8589934593 14; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 15; SIMD-VM-NEXT: return $pop1 16; UNIMP: v128.const 17define <4 x i32> @emulated_const_trivial_splat() { 18 ret <4 x i32> <i32 1, i32 2, i32 1, i32 2> 19} 20 21; CHECK-LABEL: emulated_const_first_sufficient: 22; CHECK-NEXT: .functype emulated_const_first_sufficient () -> (v128) 23; SIMD-VM-NEXT: i64.const $push0=, 8589934593 24; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 25; SIMD-VM-NEXT: return $pop1 26; UNIMP: v128.const 27define <4 x i32> @emulated_const_first_sufficient() { 28 ret <4 x i32> <i32 1, i32 2, i32 undef, i32 2> 29} 30 31; CHECK-LABEL: emulated_const_second_sufficient: 32; CHECK-NEXT: .functype emulated_const_second_sufficient () -> (v128) 33; SIMD-VM-NEXT: i64.const $push0=, 8589934593 34; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 35; SIMD-VM-NEXT: return $pop1 36; UNIMP: v128.const 37define <4 x i32> @emulated_const_second_sufficient() { 38 ret <4 x i32> <i32 1, i32 undef, i32 1, i32 2> 39} 40 41; CHECK-LABEL: emulated_const_combined_sufficient: 42; CHECK-NEXT: .functype emulated_const_combined_sufficient () -> (v128) 43; SIMD-VM-NEXT: i64.const $push0=, 8589934593 44; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 45; SIMD-VM-NEXT: return $pop1 46; UNIMP: v128.const 47define <4 x i32> @emulated_const_combined_sufficient() { 48 ret <4 x i32> <i32 1, i32 undef, i32 undef, i32 2> 49} 50 51; CHECK-LABEL: emulated_const_either_sufficient: 52; CHECK-NEXT: .functype emulated_const_either_sufficient () -> (v128) 53; SIMD-VM-NEXT: i64.const $push0=, 1 54; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 55; SIMD-VM-NEXT: return $pop1 56; UNIMP: v128.const 57define <4 x i32> @emulated_const_either_sufficient() { 58 ret <4 x i32> <i32 1, i32 undef, i32 1, i32 undef> 59} 60 61; CHECK-LABEL: emulated_const_neither_sufficient: 62; CHECK-NEXT: .functype emulated_const_neither_sufficient () -> (v128) 63; SIMD-VM-NEXT: i64.const $push0=, 8589934593 64; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 65; SIMD-VM-NEXT: i64.const $push2=, 17179869184 66; SIMD-VM-NEXT: i64x2.replace_lane $push3=, $pop1, 1, $pop2 67; SIMD-VM-NEXT: return $pop3 68define <4 x i32> @emulated_const_neither_sufficient() { 69 ret <4 x i32> <i32 1, i32 2, i32 undef, i32 4> 70} 71 72; CHECK-LABEL: emulated_const_combined_sufficient_large: 73; CHECK-NEXT: .functype emulated_const_combined_sufficient_large () -> (v128) 74; SIMD-VM-NEXT: i64.const $push0=, 506097522914230528 75; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 76; SIMD-VM-NEXT: return $pop1 77define <16 x i8> @emulated_const_combined_sufficient_large() { 78 ret <16 x i8> <i8 0, i8 undef, i8 2, i8 undef, i8 4, i8 undef, i8 6, i8 undef, 79 i8 undef, i8 1, i8 undef, i8 3, i8 undef, i8 5, i8 undef, i8 7> 80} 81 82; CHECK-LABEL: emulated_const_neither_sufficient_large: 83; CHECK-NEXT: .functype emulated_const_neither_sufficient_large () -> (v128) 84; SIMD-VM-NEXT: i64.const $push0=, -70368726997663744 85; SIMD-VM-NEXT: i64x2.splat $push1=, $pop0 86; SIMD-VM-NEXT: i64.const $push2=, 504408655873966336 87; SIMD-VM-NEXT: i64x2.replace_lane $push3=, $pop1, 1, $pop2 88; SIMD-VM-NEXT: return $pop3 89define <16 x i8> @emulated_const_neither_sufficient_large() { 90 ret <16 x i8> <i8 0, i8 undef, i8 2, i8 undef, i8 4, i8 undef, i8 6, i8 255, 91 i8 undef, i8 1, i8 undef, i8 3, i8 undef, i8 5, i8 undef, i8 7> 92} 93 94; CHECK-LABEL: same_const_one_replaced_i16x8: 95; CHECK-NEXT: .functype same_const_one_replaced_i16x8 (i32) -> (v128) 96; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 42, 42, 42, 42, 42, 0, 42, 42 97; UNIMP-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0 98; UNIMP-NEXT: return $pop[[L1]] 99; SIMD-VM: i64x2.splat 100define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) { 101 %v = insertelement 102 <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>, 103 i16 %x, 104 i32 5 105 ret <8 x i16> %v 106} 107 108; CHECK-LABEL: different_const_one_replaced_i16x8: 109; CHECK-NEXT: .functype different_const_one_replaced_i16x8 (i32) -> (v128) 110; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 1, -2, 3, -4, 5, 0, 7, -8 111; UNIMP-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0 112; UNIMP-NEXT: return $pop[[L1]] 113; SIMD-VM: i64x2.splat 114define <8 x i16> @different_const_one_replaced_i16x8(i16 %x) { 115 %v = insertelement 116 <8 x i16> <i16 1, i16 -2, i16 3, i16 -4, i16 5, i16 -6, i16 7, i16 -8>, 117 i16 %x, 118 i32 5 119 ret <8 x i16> %v 120} 121 122; CHECK-LABEL: same_const_one_replaced_f32x4: 123; CHECK-NEXT: .functype same_const_one_replaced_f32x4 (f32) -> (v128) 124; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.5p5, 0x1.5p5, 0x0p0, 0x1.5p5 125; UNIMP-NEXT: f32x4.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 2, $0 126; UNIMP-NEXT: return $pop[[L1]] 127; SIMD-VM: f32x4.splat 128define <4 x float> @same_const_one_replaced_f32x4(float %x) { 129 %v = insertelement 130 <4 x float> <float 42., float 42., float 42., float 42.>, 131 float %x, 132 i32 2 133 ret <4 x float> %v 134} 135 136; CHECK-LABEL: different_const_one_replaced_f32x4: 137; CHECK-NEXT: .functype different_const_one_replaced_f32x4 (f32) -> (v128) 138; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1p0, 0x1p1, 0x0p0, 0x1p2 139; UNIMP-NEXT: f32x4.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 2, $0 140; UNIMP-NEXT: return $pop[[L1]] 141; SIMD-VM: f32x4.splat 142define <4 x float> @different_const_one_replaced_f32x4(float %x) { 143 %v = insertelement 144 <4 x float> <float 1., float 2., float 3., float 4.>, 145 float %x, 146 i32 2 147 ret <4 x float> %v 148} 149 150; CHECK-LABEL: splat_common_const_i32x4: 151; CHECK-NEXT: .functype splat_common_const_i32x4 () -> (v128) 152; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 0, 3, 3, 1 153; UNIMP-NEXT: return $pop[[L0]] 154; SIMD-VM: i64x2.splat 155define <4 x i32> @splat_common_const_i32x4() { 156 ret <4 x i32> <i32 undef, i32 3, i32 3, i32 1> 157} 158 159; CHECK-LABEL: splat_common_arg_i16x8: 160; CHECK-NEXT: .functype splat_common_arg_i16x8 (i32, i32, i32) -> (v128) 161; CHECK-NEXT: i16x8.splat $push[[L0:[0-9]+]]=, $2 162; CHECK-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 0, $1 163; CHECK-NEXT: i16x8.replace_lane $push[[L2:[0-9]+]]=, $pop[[L1]], 2, $0 164; CHECK-NEXT: i16x8.replace_lane $push[[L3:[0-9]+]]=, $pop[[L2]], 4, $1 165; CHECK-NEXT: i16x8.replace_lane $push[[L4:[0-9]+]]=, $pop[[L3]], 7, $1 166; CHECK-NEXT: return $pop[[L4]] 167define <8 x i16> @splat_common_arg_i16x8(i16 %a, i16 %b, i16 %c) { 168 %v0 = insertelement <8 x i16> undef, i16 %b, i32 0 169 %v1 = insertelement <8 x i16> %v0, i16 %c, i32 1 170 %v2 = insertelement <8 x i16> %v1, i16 %a, i32 2 171 %v3 = insertelement <8 x i16> %v2, i16 %c, i32 3 172 %v4 = insertelement <8 x i16> %v3, i16 %b, i32 4 173 %v5 = insertelement <8 x i16> %v4, i16 %c, i32 5 174 %v6 = insertelement <8 x i16> %v5, i16 %c, i32 6 175 %v7 = insertelement <8 x i16> %v6, i16 %b, i32 7 176 ret <8 x i16> %v7 177} 178 179; CHECK-LABEL: swizzle_one_i8x16: 180; CHECK-NEXT: .functype swizzle_one_i8x16 (v128, v128) -> (v128) 181; CHECK-NEXT: i8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 182; CHECK-NEXT: return $pop[[L0]] 183define <16 x i8> @swizzle_one_i8x16(<16 x i8> %src, <16 x i8> %mask) { 184 %m0 = extractelement <16 x i8> %mask, i32 0 185 %s0 = extractelement <16 x i8> %src, i8 %m0 186 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 187 ret <16 x i8> %v0 188} 189 190; CHECK-LABEL: swizzle_all_i8x16: 191; CHECK-NEXT: .functype swizzle_all_i8x16 (v128, v128) -> (v128) 192; CHECK-NEXT: i8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 193; CHECK-NEXT: return $pop[[L0]] 194define <16 x i8> @swizzle_all_i8x16(<16 x i8> %src, <16 x i8> %mask) { 195 %m0 = extractelement <16 x i8> %mask, i32 0 196 %s0 = extractelement <16 x i8> %src, i8 %m0 197 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 198 %m1 = extractelement <16 x i8> %mask, i32 1 199 %s1 = extractelement <16 x i8> %src, i8 %m1 200 %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 1 201 %m2 = extractelement <16 x i8> %mask, i32 2 202 %s2 = extractelement <16 x i8> %src, i8 %m2 203 %v2 = insertelement <16 x i8> %v1, i8 %s2, i32 2 204 %m3 = extractelement <16 x i8> %mask, i32 3 205 %s3 = extractelement <16 x i8> %src, i8 %m3 206 %v3 = insertelement <16 x i8> %v2, i8 %s3, i32 3 207 %m4 = extractelement <16 x i8> %mask, i32 4 208 %s4 = extractelement <16 x i8> %src, i8 %m4 209 %v4 = insertelement <16 x i8> %v3, i8 %s4, i32 4 210 %m5 = extractelement <16 x i8> %mask, i32 5 211 %s5 = extractelement <16 x i8> %src, i8 %m5 212 %v5 = insertelement <16 x i8> %v4, i8 %s5, i32 5 213 %m6 = extractelement <16 x i8> %mask, i32 6 214 %s6 = extractelement <16 x i8> %src, i8 %m6 215 %v6 = insertelement <16 x i8> %v5, i8 %s6, i32 6 216 %m7 = extractelement <16 x i8> %mask, i32 7 217 %s7 = extractelement <16 x i8> %src, i8 %m7 218 %v7 = insertelement <16 x i8> %v6, i8 %s7, i32 7 219 %m8 = extractelement <16 x i8> %mask, i32 8 220 %s8 = extractelement <16 x i8> %src, i8 %m8 221 %v8 = insertelement <16 x i8> %v7, i8 %s8, i32 8 222 %m9 = extractelement <16 x i8> %mask, i32 9 223 %s9 = extractelement <16 x i8> %src, i8 %m9 224 %v9 = insertelement <16 x i8> %v8, i8 %s9, i32 9 225 %m10 = extractelement <16 x i8> %mask, i32 10 226 %s10 = extractelement <16 x i8> %src, i8 %m10 227 %v10 = insertelement <16 x i8> %v9, i8 %s10, i32 10 228 %m11 = extractelement <16 x i8> %mask, i32 11 229 %s11 = extractelement <16 x i8> %src, i8 %m11 230 %v11 = insertelement <16 x i8> %v10, i8 %s11, i32 11 231 %m12 = extractelement <16 x i8> %mask, i32 12 232 %s12 = extractelement <16 x i8> %src, i8 %m12 233 %v12 = insertelement <16 x i8> %v11, i8 %s12, i32 12 234 %m13 = extractelement <16 x i8> %mask, i32 13 235 %s13 = extractelement <16 x i8> %src, i8 %m13 236 %v13 = insertelement <16 x i8> %v12, i8 %s13, i32 13 237 %m14 = extractelement <16 x i8> %mask, i32 14 238 %s14 = extractelement <16 x i8> %src, i8 %m14 239 %v14 = insertelement <16 x i8> %v13, i8 %s14, i32 14 240 %m15 = extractelement <16 x i8> %mask, i32 15 241 %s15 = extractelement <16 x i8> %src, i8 %m15 242 %v15 = insertelement <16 x i8> %v14, i8 %s15, i32 15 243 ret <16 x i8> %v15 244} 245 246; CHECK-LABEL: swizzle_one_i16x8: 247; CHECK-NEXT: .functype swizzle_one_i16x8 (v128, v128) -> (v128) 248; CHECK-NOT: swizzle 249; CHECK: return 250define <8 x i16> @swizzle_one_i16x8(<8 x i16> %src, <8 x i16> %mask) { 251 %m0 = extractelement <8 x i16> %mask, i32 0 252 %s0 = extractelement <8 x i16> %src, i16 %m0 253 %v0 = insertelement <8 x i16> undef, i16 %s0, i32 0 254 ret <8 x i16> %v0 255} 256 257; CHECK-LABEL: mashup_swizzle_i8x16: 258; CHECK-NEXT: .functype mashup_swizzle_i8x16 (v128, v128, i32) -> (v128) 259; CHECK-NEXT: i8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 260; CHECK: i8x16.replace_lane 261; CHECK: i8x16.replace_lane 262; CHECK: i8x16.replace_lane 263; CHECK: i8x16.replace_lane 264; CHECK: return 265define <16 x i8> @mashup_swizzle_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) { 266 ; swizzle 0 267 %m0 = extractelement <16 x i8> %mask, i32 0 268 %s0 = extractelement <16 x i8> %src, i8 %m0 269 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 270 ; swizzle 7 271 %m1 = extractelement <16 x i8> %mask, i32 7 272 %s1 = extractelement <16 x i8> %src, i8 %m1 273 %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 7 274 ; splat 3 275 %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 3 276 ; splat 12 277 %v3 = insertelement <16 x i8> %v2, i8 %splatted, i32 12 278 ; const 4 279 %v4 = insertelement <16 x i8> %v3, i8 42, i32 4 280 ; const 14 281 %v5 = insertelement <16 x i8> %v4, i8 42, i32 14 282 ret <16 x i8> %v5 283} 284 285; CHECK-LABEL: mashup_const_i8x16: 286; CHECK-NEXT: .functype mashup_const_i8x16 (v128, v128, i32) -> (v128) 287; UNIMP: v128.const $push[[L0:[0-9]+]]=, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0 288; UNIMP: i8x16.replace_lane 289; UNIMP: i8x16.replace_lane 290; UNIMP: i8x16.replace_lane 291; UNIMP: return 292; SIMD-VM: i64x2.splat 293define <16 x i8> @mashup_const_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) { 294 ; swizzle 0 295 %m0 = extractelement <16 x i8> %mask, i32 0 296 %s0 = extractelement <16 x i8> %src, i8 %m0 297 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 298 ; splat 3 299 %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3 300 ; splat 12 301 %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12 302 ; const 4 303 %v3 = insertelement <16 x i8> %v2, i8 42, i32 4 304 ; const 14 305 %v4 = insertelement <16 x i8> %v3, i8 42, i32 14 306 ret <16 x i8> %v4 307} 308 309; CHECK-LABEL: mashup_splat_i8x16: 310; CHECK-NEXT: .functype mashup_splat_i8x16 (v128, v128, i32) -> (v128) 311; CHECK: i8x16.splat $push[[L0:[0-9]+]]=, $2 312; CHECK: i8x16.replace_lane 313; CHECK: i8x16.replace_lane 314; CHECK: return 315define <16 x i8> @mashup_splat_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) { 316 ; swizzle 0 317 %m0 = extractelement <16 x i8> %mask, i32 0 318 %s0 = extractelement <16 x i8> %src, i8 %m0 319 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 320 ; splat 3 321 %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3 322 ; splat 12 323 %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12 324 ; const 4 325 %v3 = insertelement <16 x i8> %v2, i8 42, i32 4 326 ret <16 x i8> %v3 327} 328 329; CHECK-LABEL: undef_const_insert_f32x4: 330; CHECK-NEXT: .functype undef_const_insert_f32x4 () -> (v128) 331; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x0p0, 0x1.5p5, 0x0p0, 0x0p0 332; UNIMP-NEXT: return $pop[[L0]] 333; SIMD-VM: f32x4.splat 334define <4 x float> @undef_const_insert_f32x4() { 335 %v = insertelement <4 x float> undef, float 42., i32 1 336 ret <4 x float> %v 337} 338 339; CHECK-LABEL: undef_arg_insert_i32x4: 340; CHECK-NEXT: .functype undef_arg_insert_i32x4 (i32) -> (v128) 341; CHECK-NEXT: i32x4.splat $push[[L0:[0-9]+]]=, $0 342; CHECK-NEXT: return $pop[[L0]] 343define <4 x i32> @undef_arg_insert_i32x4(i32 %x) { 344 %v = insertelement <4 x i32> undef, i32 %x, i32 3 345 ret <4 x i32> %v 346} 347 348; CHECK-LABEL: all_undef_i8x16: 349; CHECK-NEXT: .functype all_undef_i8x16 () -> (v128) 350; CHECK-NEXT: return $0 351define <16 x i8> @all_undef_i8x16() { 352 %v = insertelement <16 x i8> undef, i8 undef, i32 4 353 ret <16 x i8> %v 354} 355 356; CHECK-LABEL: all_undef_f64x2: 357; CHECK-NEXT: .functype all_undef_f64x2 () -> (v128) 358; CHECK-NEXT: return $0 359define <2 x double> @all_undef_f64x2() { 360 ret <2 x double> undef 361} 362