1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc < %s -mattr=+simd128 | FileCheck %s
3
4;; Test that SIMD extending operations can be successfully selected
5
6target datalayout = "e-m:e-p:32:32-i64:64-n32:64-S128"
7target triple = "wasm32-unknown-unknown"
8
9define <8 x i16> @extend_low_i8x16_s(<16 x i8> %v) {
10; CHECK-LABEL: extend_low_i8x16_s:
11; CHECK:         .functype extend_low_i8x16_s (v128) -> (v128)
12; CHECK-NEXT:  # %bb.0:
13; CHECK-NEXT:    local.get 0
14; CHECK-NEXT:    i16x8.extend_low_i8x16_s
15; CHECK-NEXT:    # fallthrough-return
16  %low = shufflevector <16 x i8> %v, <16 x i8> undef,
17           <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
18  %extended = sext <8 x i8> %low to <8 x i16>
19  ret <8 x i16> %extended
20}
21
22define <8 x i16> @extend_low_i8x16_u(<16 x i8> %v) {
23; CHECK-LABEL: extend_low_i8x16_u:
24; CHECK:         .functype extend_low_i8x16_u (v128) -> (v128)
25; CHECK-NEXT:  # %bb.0:
26; CHECK-NEXT:    local.get 0
27; CHECK-NEXT:    i16x8.extend_low_i8x16_u
28; CHECK-NEXT:    # fallthrough-return
29  %low = shufflevector <16 x i8> %v, <16 x i8> undef,
30           <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
31  %extended = zext <8 x i8> %low to <8 x i16>
32  ret <8 x i16> %extended
33}
34
35define <8 x i16> @extend_high_i8x16_s(<16 x i8> %v) {
36; CHECK-LABEL: extend_high_i8x16_s:
37; CHECK:         .functype extend_high_i8x16_s (v128) -> (v128)
38; CHECK-NEXT:  # %bb.0:
39; CHECK-NEXT:    local.get 0
40; CHECK-NEXT:    i16x8.extend_high_i8x16_s
41; CHECK-NEXT:    # fallthrough-return
42  %low = shufflevector <16 x i8> %v, <16 x i8> undef,
43           <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
44  %extended = sext <8 x i8> %low to <8 x i16>
45  ret <8 x i16> %extended
46}
47
48define <8 x i16> @extend_high_i8x16_u(<16 x i8> %v) {
49; CHECK-LABEL: extend_high_i8x16_u:
50; CHECK:         .functype extend_high_i8x16_u (v128) -> (v128)
51; CHECK-NEXT:  # %bb.0:
52; CHECK-NEXT:    local.get 0
53; CHECK-NEXT:    i16x8.extend_high_i8x16_u
54; CHECK-NEXT:    # fallthrough-return
55  %low = shufflevector <16 x i8> %v, <16 x i8> undef,
56           <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
57  %extended = zext <8 x i8> %low to <8 x i16>
58  ret <8 x i16> %extended
59}
60
61define <4 x i32> @extend_low_i16x8_s(<8 x i16> %v) {
62; CHECK-LABEL: extend_low_i16x8_s:
63; CHECK:         .functype extend_low_i16x8_s (v128) -> (v128)
64; CHECK-NEXT:  # %bb.0:
65; CHECK-NEXT:    local.get 0
66; CHECK-NEXT:    i32x4.extend_low_i16x8_s
67; CHECK-NEXT:    # fallthrough-return
68  %low = shufflevector <8 x i16> %v, <8 x i16> undef,
69           <4 x i32> <i32 0, i32 1, i32 2, i32 3>
70  %extended = sext <4 x i16> %low to <4 x i32>
71  ret <4 x i32> %extended
72}
73
74define <4 x i32> @extend_low_i16x8_u(<8 x i16> %v) {
75; CHECK-LABEL: extend_low_i16x8_u:
76; CHECK:         .functype extend_low_i16x8_u (v128) -> (v128)
77; CHECK-NEXT:  # %bb.0:
78; CHECK-NEXT:    local.get 0
79; CHECK-NEXT:    i32x4.extend_low_i16x8_u
80; CHECK-NEXT:    # fallthrough-return
81  %low = shufflevector <8 x i16> %v, <8 x i16> undef,
82           <4 x i32> <i32 0, i32 1, i32 2, i32 3>
83  %extended = zext <4 x i16> %low to <4 x i32>
84  ret <4 x i32> %extended
85}
86
87define <4 x i32> @extend_high_i16x8_s(<8 x i16> %v) {
88; CHECK-LABEL: extend_high_i16x8_s:
89; CHECK:         .functype extend_high_i16x8_s (v128) -> (v128)
90; CHECK-NEXT:  # %bb.0:
91; CHECK-NEXT:    local.get 0
92; CHECK-NEXT:    i32x4.extend_high_i16x8_s
93; CHECK-NEXT:    # fallthrough-return
94  %low = shufflevector <8 x i16> %v, <8 x i16> undef,
95           <4 x i32> <i32 4, i32 5, i32 6, i32 7>
96  %extended = sext <4 x i16> %low to <4 x i32>
97  ret <4 x i32> %extended
98}
99
100define <4 x i32> @extend_high_i16x8_u(<8 x i16> %v) {
101; CHECK-LABEL: extend_high_i16x8_u:
102; CHECK:         .functype extend_high_i16x8_u (v128) -> (v128)
103; CHECK-NEXT:  # %bb.0:
104; CHECK-NEXT:    local.get 0
105; CHECK-NEXT:    i32x4.extend_high_i16x8_u
106; CHECK-NEXT:    # fallthrough-return
107  %low = shufflevector <8 x i16> %v, <8 x i16> undef,
108           <4 x i32> <i32 4, i32 5, i32 6, i32 7>
109  %extended = zext <4 x i16> %low to <4 x i32>
110  ret <4 x i32> %extended
111}
112
113define <2 x i64> @extend_low_i32x4_s(<4 x i32> %v) {
114; CHECK-LABEL: extend_low_i32x4_s:
115; CHECK:         .functype extend_low_i32x4_s (v128) -> (v128)
116; CHECK-NEXT:  # %bb.0:
117; CHECK-NEXT:    local.get 0
118; CHECK-NEXT:    i64x2.extend_low_i32x4_s
119; CHECK-NEXT:    # fallthrough-return
120  %low = shufflevector <4 x i32> %v, <4 x i32> undef,
121           <2 x i32> <i32 0, i32 1>
122  %extended = sext <2 x i32> %low to <2 x i64>
123  ret <2 x i64> %extended
124}
125
126define <2 x i64> @extend_low_i32x4_u(<4 x i32> %v) {
127; CHECK-LABEL: extend_low_i32x4_u:
128; CHECK:         .functype extend_low_i32x4_u (v128) -> (v128)
129; CHECK-NEXT:  # %bb.0:
130; CHECK-NEXT:    local.get 0
131; CHECK-NEXT:    i64x2.extend_low_i32x4_u
132; CHECK-NEXT:    # fallthrough-return
133  %low = shufflevector <4 x i32> %v, <4 x i32> undef,
134           <2 x i32> <i32 0, i32 1>
135  %extended = zext <2 x i32> %low to <2 x i64>
136  ret <2 x i64> %extended
137}
138
139define <2 x i64> @extend_high_i32x4_s(<4 x i32> %v) {
140; CHECK-LABEL: extend_high_i32x4_s:
141; CHECK:         .functype extend_high_i32x4_s (v128) -> (v128)
142; CHECK-NEXT:  # %bb.0:
143; CHECK-NEXT:    local.get 0
144; CHECK-NEXT:    i64x2.extend_high_i32x4_s
145; CHECK-NEXT:    # fallthrough-return
146  %low = shufflevector <4 x i32> %v, <4 x i32> undef,
147           <2 x i32> <i32 2, i32 3>
148  %extended = sext <2 x i32> %low to <2 x i64>
149  ret <2 x i64> %extended
150}
151
152define <2 x i64> @extend_high_i32x4_u(<4 x i32> %v) {
153; CHECK-LABEL: extend_high_i32x4_u:
154; CHECK:         .functype extend_high_i32x4_u (v128) -> (v128)
155; CHECK-NEXT:  # %bb.0:
156; CHECK-NEXT:    local.get 0
157; CHECK-NEXT:    i64x2.extend_high_i32x4_u
158; CHECK-NEXT:    # fallthrough-return
159  %low = shufflevector <4 x i32> %v, <4 x i32> undef,
160           <2 x i32> <i32 2, i32 3>
161  %extended = zext <2 x i32> %low to <2 x i64>
162  ret <2 x i64> %extended
163}
164
165;; Also test that similar patterns with offsets not corresponding to
166;; the low or high half are correctly expanded.
167
168define <8 x i16> @extend_lowish_i8x16_s(<16 x i8> %v) {
169; CHECK-LABEL: extend_lowish_i8x16_s:
170; CHECK:         .functype extend_lowish_i8x16_s (v128) -> (v128)
171; CHECK-NEXT:  # %bb.0:
172; CHECK-NEXT:    local.get 0
173; CHECK-NEXT:    i8x16.extract_lane_u 1
174; CHECK-NEXT:    i16x8.splat
175; CHECK-NEXT:    local.get 0
176; CHECK-NEXT:    i8x16.extract_lane_u 2
177; CHECK-NEXT:    i16x8.replace_lane 1
178; CHECK-NEXT:    local.get 0
179; CHECK-NEXT:    i8x16.extract_lane_u 3
180; CHECK-NEXT:    i16x8.replace_lane 2
181; CHECK-NEXT:    local.get 0
182; CHECK-NEXT:    i8x16.extract_lane_u 4
183; CHECK-NEXT:    i16x8.replace_lane 3
184; CHECK-NEXT:    local.get 0
185; CHECK-NEXT:    i8x16.extract_lane_u 5
186; CHECK-NEXT:    i16x8.replace_lane 4
187; CHECK-NEXT:    local.get 0
188; CHECK-NEXT:    i8x16.extract_lane_u 6
189; CHECK-NEXT:    i16x8.replace_lane 5
190; CHECK-NEXT:    local.get 0
191; CHECK-NEXT:    i8x16.extract_lane_u 7
192; CHECK-NEXT:    i16x8.replace_lane 6
193; CHECK-NEXT:    local.get 0
194; CHECK-NEXT:    i8x16.extract_lane_u 8
195; CHECK-NEXT:    i16x8.replace_lane 7
196; CHECK-NEXT:    i32.const 8
197; CHECK-NEXT:    i16x8.shl
198; CHECK-NEXT:    i32.const 8
199; CHECK-NEXT:    i16x8.shr_s
200; CHECK-NEXT:    # fallthrough-return
201  %lowish = shufflevector <16 x i8> %v, <16 x i8> undef,
202           <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
203  %extended = sext <8 x i8> %lowish to <8 x i16>
204  ret <8 x i16> %extended
205}
206
207define <4 x i32> @extend_lowish_i16x8_s(<8 x i16> %v) {
208; CHECK-LABEL: extend_lowish_i16x8_s:
209; CHECK:         .functype extend_lowish_i16x8_s (v128) -> (v128)
210; CHECK-NEXT:  # %bb.0:
211; CHECK-NEXT:    local.get 0
212; CHECK-NEXT:    i16x8.extract_lane_u 1
213; CHECK-NEXT:    i32x4.splat
214; CHECK-NEXT:    local.get 0
215; CHECK-NEXT:    i16x8.extract_lane_u 2
216; CHECK-NEXT:    i32x4.replace_lane 1
217; CHECK-NEXT:    local.get 0
218; CHECK-NEXT:    i16x8.extract_lane_u 3
219; CHECK-NEXT:    i32x4.replace_lane 2
220; CHECK-NEXT:    local.get 0
221; CHECK-NEXT:    i16x8.extract_lane_u 4
222; CHECK-NEXT:    i32x4.replace_lane 3
223; CHECK-NEXT:    i32.const 16
224; CHECK-NEXT:    i32x4.shl
225; CHECK-NEXT:    i32.const 16
226; CHECK-NEXT:    i32x4.shr_s
227; CHECK-NEXT:    # fallthrough-return
228  %lowish = shufflevector <8 x i16> %v, <8 x i16> undef,
229           <4 x i32> <i32 1, i32 2, i32 3, i32 4>
230  %extended = sext <4 x i16> %lowish to <4 x i32>
231  ret <4 x i32> %extended
232}
233