1; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+simd128 | FileCheck %s 2 3; Test that the logic to choose between v128.const vector 4; initialization and splat vector initialization and to optimize the 5; choice of splat value works correctly. 6 7target triple = "wasm32-unknown-unknown" 8 9; CHECK-LABEL: same_const_one_replaced_i16x8: 10; CHECK-NEXT: .functype same_const_one_replaced_i16x8 (i32) -> (v128) 11; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 42, 42, 42, 42, 42, 0, 42, 42 12; CHECK-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0 13; CHECK-NEXT: return $pop[[L1]] 14define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) { 15 %v = insertelement 16 <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>, 17 i16 %x, 18 i32 5 19 ret <8 x i16> %v 20} 21 22; CHECK-LABEL: different_const_one_replaced_i16x8: 23; CHECK-NEXT: .functype different_const_one_replaced_i16x8 (i32) -> (v128) 24; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 1, -2, 3, -4, 5, 0, 7, -8 25; CHECK-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0 26; CHECK-NEXT: return $pop[[L1]] 27define <8 x i16> @different_const_one_replaced_i16x8(i16 %x) { 28 %v = insertelement 29 <8 x i16> <i16 1, i16 -2, i16 3, i16 -4, i16 5, i16 -6, i16 7, i16 -8>, 30 i16 %x, 31 i32 5 32 ret <8 x i16> %v 33} 34 35; CHECK-LABEL: same_const_one_replaced_f32x4: 36; CHECK-NEXT: .functype same_const_one_replaced_f32x4 (f32) -> (v128) 37; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.5p5, 0x1.5p5, 0x0p0, 0x1.5p5 38; CHECK-NEXT: f32x4.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 2, $0 39; CHECK-NEXT: return $pop[[L1]] 40define <4 x float> @same_const_one_replaced_f32x4(float %x) { 41 %v = insertelement 42 <4 x float> <float 42., float 42., float 42., float 42.>, 43 float %x, 44 i32 2 45 ret <4 x float> %v 46} 47 48; CHECK-LABEL: different_const_one_replaced_f32x4: 49; CHECK-NEXT: .functype different_const_one_replaced_f32x4 (f32) -> (v128) 50; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1p0, 0x1p1, 0x0p0, 0x1p2 51; CHECK-NEXT: f32x4.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 2, $0 52; CHECK-NEXT: return $pop[[L1]] 53define <4 x float> @different_const_one_replaced_f32x4(float %x) { 54 %v = insertelement 55 <4 x float> <float 1., float 2., float 3., float 4.>, 56 float %x, 57 i32 2 58 ret <4 x float> %v 59} 60 61; CHECK-LABEL: splat_common_const_i32x4: 62; CHECK-NEXT: .functype splat_common_const_i32x4 () -> (v128) 63; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 0, 3, 3, 1 64; CHECK-NEXT: return $pop[[L0]] 65define <4 x i32> @splat_common_const_i32x4() { 66 ret <4 x i32> <i32 undef, i32 3, i32 3, i32 1> 67} 68 69; CHECK-LABEL: splat_common_arg_i16x8: 70; CHECK-NEXT: .functype splat_common_arg_i16x8 (i32, i32, i32) -> (v128) 71; CHECK-NEXT: i16x8.splat $push[[L0:[0-9]+]]=, $2 72; CHECK-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 0, $1 73; CHECK-NEXT: i16x8.replace_lane $push[[L2:[0-9]+]]=, $pop[[L1]], 2, $0 74; CHECK-NEXT: i16x8.replace_lane $push[[L3:[0-9]+]]=, $pop[[L2]], 4, $1 75; CHECK-NEXT: i16x8.replace_lane $push[[L4:[0-9]+]]=, $pop[[L3]], 7, $1 76; CHECK-NEXT: return $pop[[L4]] 77define <8 x i16> @splat_common_arg_i16x8(i16 %a, i16 %b, i16 %c) { 78 %v0 = insertelement <8 x i16> undef, i16 %b, i32 0 79 %v1 = insertelement <8 x i16> %v0, i16 %c, i32 1 80 %v2 = insertelement <8 x i16> %v1, i16 %a, i32 2 81 %v3 = insertelement <8 x i16> %v2, i16 %c, i32 3 82 %v4 = insertelement <8 x i16> %v3, i16 %b, i32 4 83 %v5 = insertelement <8 x i16> %v4, i16 %c, i32 5 84 %v6 = insertelement <8 x i16> %v5, i16 %c, i32 6 85 %v7 = insertelement <8 x i16> %v6, i16 %b, i32 7 86 ret <8 x i16> %v7 87} 88 89; CHECK-LABEL: swizzle_one_i8x16: 90; CHECK-NEXT: .functype swizzle_one_i8x16 (v128, v128) -> (v128) 91; CHECK-NEXT: i8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 92; CHECK-NEXT: return $pop[[L0]] 93define <16 x i8> @swizzle_one_i8x16(<16 x i8> %src, <16 x i8> %mask) { 94 %m0 = extractelement <16 x i8> %mask, i32 0 95 %s0 = extractelement <16 x i8> %src, i8 %m0 96 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 97 ret <16 x i8> %v0 98} 99 100; CHECK-LABEL: swizzle_all_i8x16: 101; CHECK-NEXT: .functype swizzle_all_i8x16 (v128, v128) -> (v128) 102; CHECK-NEXT: i8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 103; CHECK-NEXT: return $pop[[L0]] 104define <16 x i8> @swizzle_all_i8x16(<16 x i8> %src, <16 x i8> %mask) { 105 %m0 = extractelement <16 x i8> %mask, i32 0 106 %s0 = extractelement <16 x i8> %src, i8 %m0 107 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 108 %m1 = extractelement <16 x i8> %mask, i32 1 109 %s1 = extractelement <16 x i8> %src, i8 %m1 110 %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 1 111 %m2 = extractelement <16 x i8> %mask, i32 2 112 %s2 = extractelement <16 x i8> %src, i8 %m2 113 %v2 = insertelement <16 x i8> %v1, i8 %s2, i32 2 114 %m3 = extractelement <16 x i8> %mask, i32 3 115 %s3 = extractelement <16 x i8> %src, i8 %m3 116 %v3 = insertelement <16 x i8> %v2, i8 %s3, i32 3 117 %m4 = extractelement <16 x i8> %mask, i32 4 118 %s4 = extractelement <16 x i8> %src, i8 %m4 119 %v4 = insertelement <16 x i8> %v3, i8 %s4, i32 4 120 %m5 = extractelement <16 x i8> %mask, i32 5 121 %s5 = extractelement <16 x i8> %src, i8 %m5 122 %v5 = insertelement <16 x i8> %v4, i8 %s5, i32 5 123 %m6 = extractelement <16 x i8> %mask, i32 6 124 %s6 = extractelement <16 x i8> %src, i8 %m6 125 %v6 = insertelement <16 x i8> %v5, i8 %s6, i32 6 126 %m7 = extractelement <16 x i8> %mask, i32 7 127 %s7 = extractelement <16 x i8> %src, i8 %m7 128 %v7 = insertelement <16 x i8> %v6, i8 %s7, i32 7 129 %m8 = extractelement <16 x i8> %mask, i32 8 130 %s8 = extractelement <16 x i8> %src, i8 %m8 131 %v8 = insertelement <16 x i8> %v7, i8 %s8, i32 8 132 %m9 = extractelement <16 x i8> %mask, i32 9 133 %s9 = extractelement <16 x i8> %src, i8 %m9 134 %v9 = insertelement <16 x i8> %v8, i8 %s9, i32 9 135 %m10 = extractelement <16 x i8> %mask, i32 10 136 %s10 = extractelement <16 x i8> %src, i8 %m10 137 %v10 = insertelement <16 x i8> %v9, i8 %s10, i32 10 138 %m11 = extractelement <16 x i8> %mask, i32 11 139 %s11 = extractelement <16 x i8> %src, i8 %m11 140 %v11 = insertelement <16 x i8> %v10, i8 %s11, i32 11 141 %m12 = extractelement <16 x i8> %mask, i32 12 142 %s12 = extractelement <16 x i8> %src, i8 %m12 143 %v12 = insertelement <16 x i8> %v11, i8 %s12, i32 12 144 %m13 = extractelement <16 x i8> %mask, i32 13 145 %s13 = extractelement <16 x i8> %src, i8 %m13 146 %v13 = insertelement <16 x i8> %v12, i8 %s13, i32 13 147 %m14 = extractelement <16 x i8> %mask, i32 14 148 %s14 = extractelement <16 x i8> %src, i8 %m14 149 %v14 = insertelement <16 x i8> %v13, i8 %s14, i32 14 150 %m15 = extractelement <16 x i8> %mask, i32 15 151 %s15 = extractelement <16 x i8> %src, i8 %m15 152 %v15 = insertelement <16 x i8> %v14, i8 %s15, i32 15 153 ret <16 x i8> %v15 154} 155 156; CHECK-LABEL: swizzle_one_i16x8: 157; CHECK-NEXT: .functype swizzle_one_i16x8 (v128, v128) -> (v128) 158; CHECK-NOT: swizzle 159; CHECK: return 160define <8 x i16> @swizzle_one_i16x8(<8 x i16> %src, <8 x i16> %mask) { 161 %m0 = extractelement <8 x i16> %mask, i32 0 162 %s0 = extractelement <8 x i16> %src, i16 %m0 163 %v0 = insertelement <8 x i16> undef, i16 %s0, i32 0 164 ret <8 x i16> %v0 165} 166 167; CHECK-LABEL: half_shuffle_i32x4: 168; CHECK-NEXT: .functype half_shuffle_i32x4 (v128) -> (v128) 169; CHECK: i8x16.shuffle $push[[L0:[0-9]+]]=, $0, $0, 0, 0, 0, 0, 8, 9, 10, 11, 0, 1, 2, 3, 0, 0, 0, 0 170; CHECK: i32x4.replace_lane 171; CHECK: i32x4.replace_lane 172; CHECK: return 173define <4 x i32> @half_shuffle_i32x4(<4 x i32> %src) { 174 %s0 = extractelement <4 x i32> %src, i32 0 175 %s2 = extractelement <4 x i32> %src, i32 2 176 %v0 = insertelement <4 x i32> undef, i32 0, i32 0 177 %v1 = insertelement <4 x i32> %v0, i32 %s2, i32 1 178 %v2 = insertelement <4 x i32> %v1, i32 %s0, i32 2 179 %v3 = insertelement <4 x i32> %v2, i32 3, i32 3 180 ret <4 x i32> %v3 181} 182 183; CHECK-LABEL: mashup_swizzle_i8x16: 184; CHECK-NEXT: .functype mashup_swizzle_i8x16 (v128, v128, i32) -> (v128) 185; CHECK-NEXT: i8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 186; CHECK: i8x16.replace_lane 187; CHECK: i8x16.replace_lane 188; CHECK: i8x16.replace_lane 189; CHECK: i8x16.replace_lane 190; CHECK: return 191define <16 x i8> @mashup_swizzle_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) { 192 ; swizzle 0 193 %m0 = extractelement <16 x i8> %mask, i32 0 194 %s0 = extractelement <16 x i8> %src, i8 %m0 195 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 196 ; swizzle 7 197 %m1 = extractelement <16 x i8> %mask, i32 7 198 %s1 = extractelement <16 x i8> %src, i8 %m1 199 %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 7 200 ; splat 3 201 %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 3 202 ; splat 12 203 %v3 = insertelement <16 x i8> %v2, i8 %splatted, i32 12 204 ; const 4 205 %v4 = insertelement <16 x i8> %v3, i8 42, i32 4 206 ; const 14 207 %v5 = insertelement <16 x i8> %v4, i8 42, i32 14 208 ret <16 x i8> %v5 209} 210 211; CHECK-LABEL: mashup_const_i8x16: 212; CHECK-NEXT: .functype mashup_const_i8x16 (v128, v128, i32) -> (v128) 213; CHECK: v128.const $push[[L0:[0-9]+]]=, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0 214; CHECK: v128.load8_lane 215; CHECK: i8x16.replace_lane 216; CHECK: i8x16.replace_lane 217; CHECK: return 218define <16 x i8> @mashup_const_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) { 219 ; swizzle 0 220 %m0 = extractelement <16 x i8> %mask, i32 0 221 %s0 = extractelement <16 x i8> %src, i8 %m0 222 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 223 ; splat 3 224 %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3 225 ; splat 12 226 %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12 227 ; const 4 228 %v3 = insertelement <16 x i8> %v2, i8 42, i32 4 229 ; const 14 230 %v4 = insertelement <16 x i8> %v3, i8 42, i32 14 231 ret <16 x i8> %v4 232} 233 234; CHECK-LABEL: mashup_splat_i8x16: 235; CHECK-NEXT: .functype mashup_splat_i8x16 (v128, v128, i32) -> (v128) 236; CHECK: i8x16.splat $push[[L0:[0-9]+]]=, $2 237; CHECK: v128.load8_lane 238; CHECK: i8x16.replace_lane 239; CHECK: return 240define <16 x i8> @mashup_splat_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) { 241 ; swizzle 0 242 %m0 = extractelement <16 x i8> %mask, i32 0 243 %s0 = extractelement <16 x i8> %src, i8 %m0 244 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 245 ; splat 3 246 %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3 247 ; splat 12 248 %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12 249 ; const 4 250 %v3 = insertelement <16 x i8> %v2, i8 42, i32 4 251 ret <16 x i8> %v3 252} 253 254; CHECK-LABEL: undef_const_insert_f32x4: 255; CHECK-NEXT: .functype undef_const_insert_f32x4 () -> (v128) 256; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x0p0, 0x1.5p5, 0x0p0, 0x0p0 257; CHECK-NEXT: return $pop[[L0]] 258; SIMD-VM: f32x4.splat 259define <4 x float> @undef_const_insert_f32x4() { 260 %v = insertelement <4 x float> undef, float 42., i32 1 261 ret <4 x float> %v 262} 263 264; CHECK-LABEL: undef_arg_insert_i32x4: 265; CHECK-NEXT: .functype undef_arg_insert_i32x4 (i32) -> (v128) 266; CHECK-NEXT: i32x4.splat $push[[L0:[0-9]+]]=, $0 267; CHECK-NEXT: return $pop[[L0]] 268define <4 x i32> @undef_arg_insert_i32x4(i32 %x) { 269 %v = insertelement <4 x i32> undef, i32 %x, i32 3 270 ret <4 x i32> %v 271} 272 273; CHECK-LABEL: all_undef_i8x16: 274; CHECK-NEXT: .functype all_undef_i8x16 () -> (v128) 275; CHECK-NEXT: return $0 276define <16 x i8> @all_undef_i8x16() { 277 %v = insertelement <16 x i8> undef, i8 undef, i32 4 278 ret <16 x i8> %v 279} 280 281; CHECK-LABEL: all_undef_f64x2: 282; CHECK-NEXT: .functype all_undef_f64x2 () -> (v128) 283; CHECK-NEXT: return $0 284define <2 x double> @all_undef_f64x2() { 285 ret <2 x double> undef 286} 287