1; RUN: llc < %s -asm-verbose=false -verify-machineinstrs -disable-wasm-fallthrough-return-opt -wasm-disable-explicit-locals -mattr=+simd128 | FileCheck %s
2
3; Regression test for an issue with patterns like the following:
4;
5;     t101: v4i32 = BUILD_VECTOR t99, t99, t99, t99
6;         t92: i32 = extract_vector_elt t101, Constant:i32<0>
7;             t89: i32 = sign_extend_inreg t92, ValueType:ch:i8
8;
9; Notice that the sign_extend_inreg has source value type i8 but the
10; extracted vector has type v4i32. There are no ISel patterns that
11; handle mismatched types like this, so we insert a bitcast before the
12; extract. This was previously an ISel failure. This test case is
13; reduced from a private user bug report, and the vector extracts are
14; optimized out via subsequent DAG combines.
15
16target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
17target triple = "wasm32-unknown-unknown"
18
19define void @foo(<4 x i8>* %p) {
20; CHECK-LABEL: foo:
21; CHECK:         .functype foo (i32) -> ()
22; CHECK-NEXT:    i32.load8_u 0
23; CHECK-NEXT:    i32x4.splat
24; CHECK-NEXT:    i32.load8_u 1
25; CHECK-NEXT:    i32x4.replace_lane 1
26; CHECK-NEXT:    i32.const 2
27; CHECK-NEXT:    i32.add
28; CHECK-NEXT:    i32.load8_u 0
29; CHECK-NEXT:    i32x4.replace_lane 2
30; CHECK-NEXT:    i32.const 3
31; CHECK-NEXT:    i32.add
32; CHECK-NEXT:    i32.load8_u 0
33; CHECK-NEXT:    i32x4.replace_lane 3
34; CHECK-NEXT:    local.tee
35; CHECK-NEXT:    i8x16.extract_lane_s 0
36; CHECK-NEXT:    f64.convert_i32_s
37; CHECK-NEXT:    f64.const 0x0p0
38; CHECK-NEXT:    f64.mul
39; CHECK-NEXT:    f64.const 0x0p0
40; CHECK-NEXT:    f64.add
41; CHECK-NEXT:    f32.demote_f64
42; CHECK-NEXT:    f32x4.splat
43; CHECK-NEXT:    i8x16.extract_lane_s 4
44; CHECK-NEXT:    f64.convert_i32_s
45; CHECK-NEXT:    f64.const 0x0p0
46; CHECK-NEXT:    f64.mul
47; CHECK-NEXT:    f64.const 0x0p0
48; CHECK-NEXT:    f64.add
49; CHECK-NEXT:    f32.demote_f64
50; CHECK-NEXT:    f32x4.replace_lane 1
51; CHECK-NEXT:    i8x16.extract_lane_s 8
52; CHECK-NEXT:    f64.convert_i32_s
53; CHECK-NEXT:    f64.const 0x0p0
54; CHECK-NEXT:    f64.mul
55; CHECK-NEXT:    f64.const 0x0p0
56; CHECK-NEXT:    f64.add
57; CHECK-NEXT:    f32.demote_f64
58; CHECK-NEXT:    f32x4.replace_lane 2
59; CHECK-NEXT:    i8x16.extract_lane_s 12
60; CHECK-NEXT:    f64.convert_i32_s
61; CHECK-NEXT:    f64.const 0x0p0
62; CHECK-NEXT:    f64.mul
63; CHECK-NEXT:    f64.const 0x0p0
64; CHECK-NEXT:    f64.add
65; CHECK-NEXT:    f32.demote_f64
66; CHECK-NEXT:    f32x4.replace_lane 3
67; CHECK-NEXT:    v128.store 0
68; CHECK-NEXT:    return
69  %1 = load <4 x i8>, <4 x i8>* %p
70  %2 = sitofp <4 x i8> %1 to <4 x double>
71  %3 = fmul <4 x double> zeroinitializer, %2
72  %4 = fadd <4 x double> %3, zeroinitializer
73  %5 = fptrunc <4 x double> %4 to <4 x float>
74  store <4 x float> %5, <4 x float>* undef
75  ret void
76}
77