1; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+unimplemented-simd128 | FileCheck %s --check-prefixes=CHECK,UNIMP 2; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s --check-prefixes=CHECK,SIMD-VM 3 4; Test that the logic to choose between v128.const vector 5; initialization and splat vector initialization and to optimize the 6; choice of splat value works correctly. 7 8target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" 9target triple = "wasm32-unknown-unknown" 10 11; CHECK-LABEL: same_const_one_replaced_i16x8: 12; CHECK-NEXT: .functype same_const_one_replaced_i16x8 (i32) -> (v128) 13; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 42, 42, 42, 42, 42, 0, 42, 42 14; UNIMP-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0 15; UNIMP-NEXT: return $pop[[L1]] 16; SIMD-VM: i16x8.splat 17define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) { 18 %v = insertelement 19 <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>, 20 i16 %x, 21 i32 5 22 ret <8 x i16> %v 23} 24 25; CHECK-LABEL: different_const_one_replaced_i16x8: 26; CHECK-NEXT: .functype different_const_one_replaced_i16x8 (i32) -> (v128) 27; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 1, -2, 3, -4, 5, 0, 7, -8 28; UNIMP-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0 29; UNIMP-NEXT: return $pop[[L1]] 30; SIMD-VM: i16x8.splat 31define <8 x i16> @different_const_one_replaced_i16x8(i16 %x) { 32 %v = insertelement 33 <8 x i16> <i16 1, i16 -2, i16 3, i16 -4, i16 5, i16 -6, i16 7, i16 -8>, 34 i16 %x, 35 i32 5 36 ret <8 x i16> %v 37} 38 39; CHECK-LABEL: same_const_one_replaced_f32x4: 40; CHECK-NEXT: .functype same_const_one_replaced_f32x4 (f32) -> (v128) 41; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.5p5, 0x1.5p5, 0x0p0, 0x1.5p5 42; UNIMP-NEXT: f32x4.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 2, $0 43; UNIMP-NEXT: return $pop[[L1]] 44; SIMD-VM: f32x4.splat 45define <4 x float> @same_const_one_replaced_f32x4(float %x) { 46 %v = insertelement 47 <4 x float> <float 42., float 42., float 42., float 42.>, 48 float %x, 49 i32 2 50 ret <4 x float> %v 51} 52 53; CHECK-LABEL: different_const_one_replaced_f32x4: 54; CHECK-NEXT: .functype different_const_one_replaced_f32x4 (f32) -> (v128) 55; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1p0, 0x1p1, 0x0p0, 0x1p2 56; UNIMP-NEXT: f32x4.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 2, $0 57; UNIMP-NEXT: return $pop[[L1]] 58; SIMD-VM: f32x4.splat 59define <4 x float> @different_const_one_replaced_f32x4(float %x) { 60 %v = insertelement 61 <4 x float> <float 1., float 2., float 3., float 4.>, 62 float %x, 63 i32 2 64 ret <4 x float> %v 65} 66 67; CHECK-LABEL: splat_common_const_i32x4: 68; CHECK-NEXT: .functype splat_common_const_i32x4 () -> (v128) 69; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 0, 3, 3, 1 70; UNIMP-NEXT: return $pop[[L0]] 71; SIMD-VM: i32x4.splat 72define <4 x i32> @splat_common_const_i32x4() { 73 ret <4 x i32> <i32 undef, i32 3, i32 3, i32 1> 74} 75 76; CHECK-LABEL: splat_common_arg_i16x8: 77; CHECK-NEXT: .functype splat_common_arg_i16x8 (i32, i32, i32) -> (v128) 78; CHECK-NEXT: i16x8.splat $push[[L0:[0-9]+]]=, $2 79; CHECK-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 0, $1 80; CHECK-NEXT: i16x8.replace_lane $push[[L2:[0-9]+]]=, $pop[[L1]], 2, $0 81; CHECK-NEXT: i16x8.replace_lane $push[[L3:[0-9]+]]=, $pop[[L2]], 4, $1 82; CHECK-NEXT: i16x8.replace_lane $push[[L4:[0-9]+]]=, $pop[[L3]], 7, $1 83; CHECK-NEXT: return $pop[[L4]] 84define <8 x i16> @splat_common_arg_i16x8(i16 %a, i16 %b, i16 %c) { 85 %v0 = insertelement <8 x i16> undef, i16 %b, i32 0 86 %v1 = insertelement <8 x i16> %v0, i16 %c, i32 1 87 %v2 = insertelement <8 x i16> %v1, i16 %a, i32 2 88 %v3 = insertelement <8 x i16> %v2, i16 %c, i32 3 89 %v4 = insertelement <8 x i16> %v3, i16 %b, i32 4 90 %v5 = insertelement <8 x i16> %v4, i16 %c, i32 5 91 %v6 = insertelement <8 x i16> %v5, i16 %c, i32 6 92 %v7 = insertelement <8 x i16> %v6, i16 %b, i32 7 93 ret <8 x i16> %v7 94} 95 96; CHECK-LABEL: swizzle_one_i8x16: 97; CHECK-NEXT: .functype swizzle_one_i8x16 (v128, v128) -> (v128) 98; CHECK-NEXT: v8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 99; CHECK-NEXT: return $pop[[L0]] 100define <16 x i8> @swizzle_one_i8x16(<16 x i8> %src, <16 x i8> %mask) { 101 %m0 = extractelement <16 x i8> %mask, i32 0 102 %s0 = extractelement <16 x i8> %src, i8 %m0 103 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 104 ret <16 x i8> %v0 105} 106 107; CHECK-LABEL: swizzle_all_i8x16: 108; CHECK-NEXT: .functype swizzle_all_i8x16 (v128, v128) -> (v128) 109; CHECK-NEXT: v8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 110; CHECK-NEXT: return $pop[[L0]] 111define <16 x i8> @swizzle_all_i8x16(<16 x i8> %src, <16 x i8> %mask) { 112 %m0 = extractelement <16 x i8> %mask, i32 0 113 %s0 = extractelement <16 x i8> %src, i8 %m0 114 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 115 %m1 = extractelement <16 x i8> %mask, i32 1 116 %s1 = extractelement <16 x i8> %src, i8 %m1 117 %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 1 118 %m2 = extractelement <16 x i8> %mask, i32 2 119 %s2 = extractelement <16 x i8> %src, i8 %m2 120 %v2 = insertelement <16 x i8> %v1, i8 %s2, i32 2 121 %m3 = extractelement <16 x i8> %mask, i32 3 122 %s3 = extractelement <16 x i8> %src, i8 %m3 123 %v3 = insertelement <16 x i8> %v2, i8 %s3, i32 3 124 %m4 = extractelement <16 x i8> %mask, i32 4 125 %s4 = extractelement <16 x i8> %src, i8 %m4 126 %v4 = insertelement <16 x i8> %v3, i8 %s4, i32 4 127 %m5 = extractelement <16 x i8> %mask, i32 5 128 %s5 = extractelement <16 x i8> %src, i8 %m5 129 %v5 = insertelement <16 x i8> %v4, i8 %s5, i32 5 130 %m6 = extractelement <16 x i8> %mask, i32 6 131 %s6 = extractelement <16 x i8> %src, i8 %m6 132 %v6 = insertelement <16 x i8> %v5, i8 %s6, i32 6 133 %m7 = extractelement <16 x i8> %mask, i32 7 134 %s7 = extractelement <16 x i8> %src, i8 %m7 135 %v7 = insertelement <16 x i8> %v6, i8 %s7, i32 7 136 %m8 = extractelement <16 x i8> %mask, i32 8 137 %s8 = extractelement <16 x i8> %src, i8 %m8 138 %v8 = insertelement <16 x i8> %v7, i8 %s8, i32 8 139 %m9 = extractelement <16 x i8> %mask, i32 9 140 %s9 = extractelement <16 x i8> %src, i8 %m9 141 %v9 = insertelement <16 x i8> %v8, i8 %s9, i32 9 142 %m10 = extractelement <16 x i8> %mask, i32 10 143 %s10 = extractelement <16 x i8> %src, i8 %m10 144 %v10 = insertelement <16 x i8> %v9, i8 %s10, i32 10 145 %m11 = extractelement <16 x i8> %mask, i32 11 146 %s11 = extractelement <16 x i8> %src, i8 %m11 147 %v11 = insertelement <16 x i8> %v10, i8 %s11, i32 11 148 %m12 = extractelement <16 x i8> %mask, i32 12 149 %s12 = extractelement <16 x i8> %src, i8 %m12 150 %v12 = insertelement <16 x i8> %v11, i8 %s12, i32 12 151 %m13 = extractelement <16 x i8> %mask, i32 13 152 %s13 = extractelement <16 x i8> %src, i8 %m13 153 %v13 = insertelement <16 x i8> %v12, i8 %s13, i32 13 154 %m14 = extractelement <16 x i8> %mask, i32 14 155 %s14 = extractelement <16 x i8> %src, i8 %m14 156 %v14 = insertelement <16 x i8> %v13, i8 %s14, i32 14 157 %m15 = extractelement <16 x i8> %mask, i32 15 158 %s15 = extractelement <16 x i8> %src, i8 %m15 159 %v15 = insertelement <16 x i8> %v14, i8 %s15, i32 15 160 ret <16 x i8> %v15 161} 162 163; CHECK-LABEL: swizzle_one_i16x8: 164; CHECK-NEXT: .functype swizzle_one_i16x8 (v128, v128) -> (v128) 165; CHECK-NOT: swizzle 166; CHECK: return 167define <8 x i16> @swizzle_one_i16x8(<8 x i16> %src, <8 x i16> %mask) { 168 %m0 = extractelement <8 x i16> %mask, i32 0 169 %s0 = extractelement <8 x i16> %src, i16 %m0 170 %v0 = insertelement <8 x i16> undef, i16 %s0, i32 0 171 ret <8 x i16> %v0 172} 173 174; CHECK-LABEL: mashup_swizzle_i8x16: 175; CHECK-NEXT: .functype mashup_swizzle_i8x16 (v128, v128, i32) -> (v128) 176; CHECK-NEXT: v8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 177; CHECK: i8x16.replace_lane 178; CHECK: i8x16.replace_lane 179; CHECK: i8x16.replace_lane 180; CHECK: i8x16.replace_lane 181; CHECK: return 182define <16 x i8> @mashup_swizzle_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) { 183 ; swizzle 0 184 %m0 = extractelement <16 x i8> %mask, i32 0 185 %s0 = extractelement <16 x i8> %src, i8 %m0 186 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 187 ; swizzle 7 188 %m1 = extractelement <16 x i8> %mask, i32 7 189 %s1 = extractelement <16 x i8> %src, i8 %m1 190 %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 7 191 ; splat 3 192 %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 3 193 ; splat 12 194 %v3 = insertelement <16 x i8> %v2, i8 %splatted, i32 12 195 ; const 4 196 %v4 = insertelement <16 x i8> %v3, i8 42, i32 4 197 ; const 14 198 %v5 = insertelement <16 x i8> %v4, i8 42, i32 14 199 ret <16 x i8> %v5 200} 201 202; CHECK-LABEL: mashup_const_i8x16: 203; CHECK-NEXT: .functype mashup_const_i8x16 (v128, v128, i32) -> (v128) 204; UNIMP: v128.const $push[[L0:[0-9]+]]=, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0 205; UNIMP: i8x16.replace_lane 206; UNIMP: i8x16.replace_lane 207; UNIMP: i8x16.replace_lane 208; UNIMP: return 209; SIMD-VM: i8x16.splat 210define <16 x i8> @mashup_const_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) { 211 ; swizzle 0 212 %m0 = extractelement <16 x i8> %mask, i32 0 213 %s0 = extractelement <16 x i8> %src, i8 %m0 214 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 215 ; splat 3 216 %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3 217 ; splat 12 218 %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12 219 ; const 4 220 %v3 = insertelement <16 x i8> %v2, i8 42, i32 4 221 ; const 14 222 %v4 = insertelement <16 x i8> %v3, i8 42, i32 14 223 ret <16 x i8> %v4 224} 225 226; CHECK-LABEL: mashup_splat_i8x16: 227; CHECK-NEXT: .functype mashup_splat_i8x16 (v128, v128, i32) -> (v128) 228; CHECK: i8x16.splat $push[[L0:[0-9]+]]=, $2 229; CHECK: i8x16.replace_lane 230; CHECK: i8x16.replace_lane 231; CHECK: return 232define <16 x i8> @mashup_splat_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) { 233 ; swizzle 0 234 %m0 = extractelement <16 x i8> %mask, i32 0 235 %s0 = extractelement <16 x i8> %src, i8 %m0 236 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 237 ; splat 3 238 %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3 239 ; splat 12 240 %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12 241 ; const 4 242 %v3 = insertelement <16 x i8> %v2, i8 42, i32 4 243 ret <16 x i8> %v3 244} 245 246; CHECK-LABEL: undef_const_insert_f32x4: 247; CHECK-NEXT: .functype undef_const_insert_f32x4 () -> (v128) 248; UNIMP-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x0p0, 0x1.5p5, 0x0p0, 0x0p0 249; UNIMP-NEXT: return $pop[[L0]] 250; SIMD-VM: f32x4.splat 251define <4 x float> @undef_const_insert_f32x4() { 252 %v = insertelement <4 x float> undef, float 42., i32 1 253 ret <4 x float> %v 254} 255 256; CHECK-LABEL: undef_arg_insert_i32x4: 257; CHECK-NEXT: .functype undef_arg_insert_i32x4 (i32) -> (v128) 258; CHECK-NEXT: i32x4.splat $push[[L0:[0-9]+]]=, $0 259; CHECK-NEXT: return $pop[[L0]] 260define <4 x i32> @undef_arg_insert_i32x4(i32 %x) { 261 %v = insertelement <4 x i32> undef, i32 %x, i32 3 262 ret <4 x i32> %v 263} 264 265; CHECK-LABEL: all_undef_i8x16: 266; CHECK-NEXT: .functype all_undef_i8x16 () -> (v128) 267; CHECK-NEXT: return $0 268define <16 x i8> @all_undef_i8x16() { 269 %v = insertelement <16 x i8> undef, i8 undef, i32 4 270 ret <16 x i8> %v 271} 272 273; CHECK-LABEL: all_undef_f64x2: 274; CHECK-NEXT: .functype all_undef_f64x2 () -> (v128) 275; CHECK-NEXT: return $0 276define <2 x double> @all_undef_f64x2() { 277 ret <2 x double> undef 278} 279