1; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -wasm-keep-registers -mattr=+unimplemented-simd128 | FileCheck %s 2 3; Test that the logic to choose between v128.const vector 4; initialization and splat vector initialization and to optimize the 5; choice of splat value works correctly. 6 7target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128" 8target triple = "wasm32-unknown-unknown" 9 10; CHECK-LABEL: same_const_one_replaced_i16x8: 11; CHECK-NEXT: .functype same_const_one_replaced_i16x8 (i32) -> (v128) 12; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 42, 42, 42, 42, 42, 0, 42, 42 13; CHECK-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0 14; CHECK-NEXT: return $pop[[L1]] 15define <8 x i16> @same_const_one_replaced_i16x8(i16 %x) { 16 %v = insertelement 17 <8 x i16> <i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42, i16 42>, 18 i16 %x, 19 i32 5 20 ret <8 x i16> %v 21} 22 23; CHECK-LABEL: different_const_one_replaced_i16x8: 24; CHECK-NEXT: .functype different_const_one_replaced_i16x8 (i32) -> (v128) 25; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 1, -2, 3, -4, 5, 0, 7, -8 26; CHECK-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 5, $0 27; CHECK-NEXT: return $pop[[L1]] 28define <8 x i16> @different_const_one_replaced_i16x8(i16 %x) { 29 %v = insertelement 30 <8 x i16> <i16 1, i16 -2, i16 3, i16 -4, i16 5, i16 -6, i16 7, i16 -8>, 31 i16 %x, 32 i32 5 33 ret <8 x i16> %v 34} 35 36; CHECK-LABEL: same_const_one_replaced_f32x4: 37; CHECK-NEXT: .functype same_const_one_replaced_f32x4 (f32) -> (v128) 38; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1.5p5, 0x1.5p5, 0x0p0, 0x1.5p5 39; CHECK-NEXT: f32x4.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 2, $0 40; CHECK-NEXT: return $pop[[L1]] 41define <4 x float> @same_const_one_replaced_f32x4(float %x) { 42 %v = insertelement 43 <4 x float> <float 42., float 42., float 42., float 42.>, 44 float %x, 45 i32 2 46 ret <4 x float> %v 47} 48 49; CHECK-LABEL: different_const_one_replaced_f32x4: 50; CHECK-NEXT: .functype different_const_one_replaced_f32x4 (f32) -> (v128) 51; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x1p0, 0x1p1, 0x0p0, 0x1p2 52; CHECK-NEXT: f32x4.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 2, $0 53; CHECK-NEXT: return $pop[[L1]] 54define <4 x float> @different_const_one_replaced_f32x4(float %x) { 55 %v = insertelement 56 <4 x float> <float 1., float 2., float 3., float 4.>, 57 float %x, 58 i32 2 59 ret <4 x float> %v 60} 61 62; CHECK-LABEL: splat_common_const_i32x4: 63; CHECK-NEXT: .functype splat_common_const_i32x4 () -> (v128) 64; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 0, 3, 3, 1 65; CHECK-NEXT: return $pop[[L0]] 66define <4 x i32> @splat_common_const_i32x4() { 67 ret <4 x i32> <i32 undef, i32 3, i32 3, i32 1> 68} 69 70; CHECK-LABEL: splat_common_arg_i16x8: 71; CHECK-NEXT: .functype splat_common_arg_i16x8 (i32, i32, i32) -> (v128) 72; CHECK-NEXT: i16x8.splat $push[[L0:[0-9]+]]=, $2 73; CHECK-NEXT: i16x8.replace_lane $push[[L1:[0-9]+]]=, $pop[[L0]], 0, $1 74; CHECK-NEXT: i16x8.replace_lane $push[[L2:[0-9]+]]=, $pop[[L1]], 2, $0 75; CHECK-NEXT: i16x8.replace_lane $push[[L3:[0-9]+]]=, $pop[[L2]], 4, $1 76; CHECK-NEXT: i16x8.replace_lane $push[[L4:[0-9]+]]=, $pop[[L3]], 7, $1 77; CHECK-NEXT: return $pop[[L4]] 78define <8 x i16> @splat_common_arg_i16x8(i16 %a, i16 %b, i16 %c) { 79 %v0 = insertelement <8 x i16> undef, i16 %b, i32 0 80 %v1 = insertelement <8 x i16> %v0, i16 %c, i32 1 81 %v2 = insertelement <8 x i16> %v1, i16 %a, i32 2 82 %v3 = insertelement <8 x i16> %v2, i16 %c, i32 3 83 %v4 = insertelement <8 x i16> %v3, i16 %b, i32 4 84 %v5 = insertelement <8 x i16> %v4, i16 %c, i32 5 85 %v6 = insertelement <8 x i16> %v5, i16 %c, i32 6 86 %v7 = insertelement <8 x i16> %v6, i16 %b, i32 7 87 ret <8 x i16> %v7 88} 89 90; CHECK-LABEL: swizzle_one_i8x16: 91; CHECK-NEXT: .functype swizzle_one_i8x16 (v128, v128) -> (v128) 92; CHECK-NEXT: v8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 93; CHECK-NEXT: return $pop[[L0]] 94define <16 x i8> @swizzle_one_i8x16(<16 x i8> %src, <16 x i8> %mask) { 95 %m0 = extractelement <16 x i8> %mask, i32 0 96 %s0 = extractelement <16 x i8> %src, i8 %m0 97 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 98 ret <16 x i8> %v0 99} 100 101; CHECK-LABEL: swizzle_all_i8x16: 102; CHECK-NEXT: .functype swizzle_all_i8x16 (v128, v128) -> (v128) 103; CHECK-NEXT: v8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 104; CHECK-NEXT: return $pop[[L0]] 105define <16 x i8> @swizzle_all_i8x16(<16 x i8> %src, <16 x i8> %mask) { 106 %m0 = extractelement <16 x i8> %mask, i32 0 107 %s0 = extractelement <16 x i8> %src, i8 %m0 108 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 109 %m1 = extractelement <16 x i8> %mask, i32 1 110 %s1 = extractelement <16 x i8> %src, i8 %m1 111 %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 1 112 %m2 = extractelement <16 x i8> %mask, i32 2 113 %s2 = extractelement <16 x i8> %src, i8 %m2 114 %v2 = insertelement <16 x i8> %v1, i8 %s2, i32 2 115 %m3 = extractelement <16 x i8> %mask, i32 3 116 %s3 = extractelement <16 x i8> %src, i8 %m3 117 %v3 = insertelement <16 x i8> %v2, i8 %s3, i32 3 118 %m4 = extractelement <16 x i8> %mask, i32 4 119 %s4 = extractelement <16 x i8> %src, i8 %m4 120 %v4 = insertelement <16 x i8> %v3, i8 %s4, i32 4 121 %m5 = extractelement <16 x i8> %mask, i32 5 122 %s5 = extractelement <16 x i8> %src, i8 %m5 123 %v5 = insertelement <16 x i8> %v4, i8 %s5, i32 5 124 %m6 = extractelement <16 x i8> %mask, i32 6 125 %s6 = extractelement <16 x i8> %src, i8 %m6 126 %v6 = insertelement <16 x i8> %v5, i8 %s6, i32 6 127 %m7 = extractelement <16 x i8> %mask, i32 7 128 %s7 = extractelement <16 x i8> %src, i8 %m7 129 %v7 = insertelement <16 x i8> %v6, i8 %s7, i32 7 130 %m8 = extractelement <16 x i8> %mask, i32 8 131 %s8 = extractelement <16 x i8> %src, i8 %m8 132 %v8 = insertelement <16 x i8> %v7, i8 %s8, i32 8 133 %m9 = extractelement <16 x i8> %mask, i32 9 134 %s9 = extractelement <16 x i8> %src, i8 %m9 135 %v9 = insertelement <16 x i8> %v8, i8 %s9, i32 9 136 %m10 = extractelement <16 x i8> %mask, i32 10 137 %s10 = extractelement <16 x i8> %src, i8 %m10 138 %v10 = insertelement <16 x i8> %v9, i8 %s10, i32 10 139 %m11 = extractelement <16 x i8> %mask, i32 11 140 %s11 = extractelement <16 x i8> %src, i8 %m11 141 %v11 = insertelement <16 x i8> %v10, i8 %s11, i32 11 142 %m12 = extractelement <16 x i8> %mask, i32 12 143 %s12 = extractelement <16 x i8> %src, i8 %m12 144 %v12 = insertelement <16 x i8> %v11, i8 %s12, i32 12 145 %m13 = extractelement <16 x i8> %mask, i32 13 146 %s13 = extractelement <16 x i8> %src, i8 %m13 147 %v13 = insertelement <16 x i8> %v12, i8 %s13, i32 13 148 %m14 = extractelement <16 x i8> %mask, i32 14 149 %s14 = extractelement <16 x i8> %src, i8 %m14 150 %v14 = insertelement <16 x i8> %v13, i8 %s14, i32 14 151 %m15 = extractelement <16 x i8> %mask, i32 15 152 %s15 = extractelement <16 x i8> %src, i8 %m15 153 %v15 = insertelement <16 x i8> %v14, i8 %s15, i32 15 154 ret <16 x i8> %v15 155} 156 157; CHECK-LABEL: swizzle_one_i16x8: 158; CHECK-NEXT: .functype swizzle_one_i16x8 (v128, v128) -> (v128) 159; CHECK-NOT: swizzle 160; CHECK: return 161define <8 x i16> @swizzle_one_i16x8(<8 x i16> %src, <8 x i16> %mask) { 162 %m0 = extractelement <8 x i16> %mask, i32 0 163 %s0 = extractelement <8 x i16> %src, i16 %m0 164 %v0 = insertelement <8 x i16> undef, i16 %s0, i32 0 165 ret <8 x i16> %v0 166} 167 168; CHECK-LABEL: mashup_swizzle_i8x16: 169; CHECK-NEXT: .functype mashup_swizzle_i8x16 (v128, v128, i32) -> (v128) 170; CHECK-NEXT: v8x16.swizzle $push[[L0:[0-9]+]]=, $0, $1 171; CHECK: i8x16.replace_lane 172; CHECK: i8x16.replace_lane 173; CHECK: i8x16.replace_lane 174; CHECK: i8x16.replace_lane 175; CHECK: return 176define <16 x i8> @mashup_swizzle_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) { 177 ; swizzle 0 178 %m0 = extractelement <16 x i8> %mask, i32 0 179 %s0 = extractelement <16 x i8> %src, i8 %m0 180 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 181 ; swizzle 7 182 %m1 = extractelement <16 x i8> %mask, i32 7 183 %s1 = extractelement <16 x i8> %src, i8 %m1 184 %v1 = insertelement <16 x i8> %v0, i8 %s1, i32 7 185 ; splat 3 186 %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 3 187 ; splat 12 188 %v3 = insertelement <16 x i8> %v2, i8 %splatted, i32 12 189 ; const 4 190 %v4 = insertelement <16 x i8> %v3, i8 42, i32 4 191 ; const 14 192 %v5 = insertelement <16 x i8> %v4, i8 42, i32 14 193 ret <16 x i8> %v5 194} 195 196; CHECK-LABEL: mashup_const_i8x16: 197; CHECK-NEXT: .functype mashup_const_i8x16 (v128, v128, i32) -> (v128) 198; CHECK: v128.const $push[[L0:[0-9]+]]=, 0, 0, 0, 0, 42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 42, 0 199; CHECK: i8x16.replace_lane 200; CHECK: i8x16.replace_lane 201; CHECK: i8x16.replace_lane 202; CHECK: return 203define <16 x i8> @mashup_const_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) { 204 ; swizzle 0 205 %m0 = extractelement <16 x i8> %mask, i32 0 206 %s0 = extractelement <16 x i8> %src, i8 %m0 207 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 208 ; splat 3 209 %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3 210 ; splat 12 211 %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12 212 ; const 4 213 %v3 = insertelement <16 x i8> %v2, i8 42, i32 4 214 ; const 14 215 %v4 = insertelement <16 x i8> %v3, i8 42, i32 14 216 ret <16 x i8> %v4 217} 218 219; CHECK-LABEL: mashup_splat_i8x16: 220; CHECK-NEXT: .functype mashup_splat_i8x16 (v128, v128, i32) -> (v128) 221; CHECK: i8x16.splat $push[[L0:[0-9]+]]=, $2 222; CHECK: i8x16.replace_lane 223; CHECK: i8x16.replace_lane 224; CHECK: return 225define <16 x i8> @mashup_splat_i8x16(<16 x i8> %src, <16 x i8> %mask, i8 %splatted) { 226 ; swizzle 0 227 %m0 = extractelement <16 x i8> %mask, i32 0 228 %s0 = extractelement <16 x i8> %src, i8 %m0 229 %v0 = insertelement <16 x i8> undef, i8 %s0, i32 0 230 ; splat 3 231 %v1 = insertelement <16 x i8> %v0, i8 %splatted, i32 3 232 ; splat 12 233 %v2 = insertelement <16 x i8> %v1, i8 %splatted, i32 12 234 ; const 4 235 %v3 = insertelement <16 x i8> %v2, i8 42, i32 4 236 ret <16 x i8> %v3 237} 238 239; CHECK-LABEL: undef_const_insert_f32x4: 240; CHECK-NEXT: .functype undef_const_insert_f32x4 () -> (v128) 241; CHECK-NEXT: v128.const $push[[L0:[0-9]+]]=, 0x0p0, 0x1.5p5, 0x0p0, 0x0p0 242; CHECK-NEXT: return $pop[[L0]] 243define <4 x float> @undef_const_insert_f32x4() { 244 %v = insertelement <4 x float> undef, float 42., i32 1 245 ret <4 x float> %v 246} 247 248; CHECK-LABEL: undef_arg_insert_i32x4: 249; CHECK-NEXT: .functype undef_arg_insert_i32x4 (i32) -> (v128) 250; CHECK-NEXT: i32x4.splat $push[[L0:[0-9]+]]=, $0 251; CHECK-NEXT: return $pop[[L0]] 252define <4 x i32> @undef_arg_insert_i32x4(i32 %x) { 253 %v = insertelement <4 x i32> undef, i32 %x, i32 3 254 ret <4 x i32> %v 255} 256 257; CHECK-LABEL: all_undef_i8x16: 258; CHECK-NEXT: .functype all_undef_i8x16 () -> (v128) 259; CHECK-NEXT: return $0 260define <16 x i8> @all_undef_i8x16() { 261 %v = insertelement <16 x i8> undef, i8 undef, i32 4 262 ret <16 x i8> %v 263} 264 265; CHECK-LABEL: all_undef_f64x2: 266; CHECK-NEXT: .functype all_undef_f64x2 () -> (v128) 267; CHECK-NEXT: return $0 268define <2 x double> @all_undef_f64x2() { 269 ret <2 x double> undef 270} 271