1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=arm64-apple-ios -o - %s | FileCheck %s
3
4define <16 x i32> @no_existing_zext(<16 x i8> %a, <16 x i32> %op) {
5; CHECK-LABEL: no_existing_zext:
6; CHECK:       ; %bb.0: ; %entry
7; CHECK-NEXT:    movi.16b v5, #10
8; CHECK-NEXT:    cmhi.16b v0, v0, v5
9; CHECK-NEXT:    sshll.8h v5, v0, #0
10; CHECK-NEXT:    sshll2.8h v0, v0, #0
11; CHECK-NEXT:    sshll.4s v6, v5, #0
12; CHECK-NEXT:    sshll.4s v7, v0, #0
13; CHECK-NEXT:    sshll2.4s v0, v0, #0
14; CHECK-NEXT:    sshll2.4s v5, v5, #0
15; CHECK-NEXT:    and.16b v4, v4, v0
16; CHECK-NEXT:    and.16b v5, v2, v5
17; CHECK-NEXT:    and.16b v2, v3, v7
18; CHECK-NEXT:    and.16b v0, v1, v6
19; CHECK-NEXT:    mov.16b v1, v5
20; CHECK-NEXT:    mov.16b v3, v4
21; CHECK-NEXT:    ret
22entry:
23  %cmp = icmp ugt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
24  %sel = select <16 x i1> %cmp, <16 x i32> %op, <16 x i32> zeroinitializer
25  ret <16 x i32> %sel
26}
27
28define <16 x i32> @second_compare_operand_not_splat(<16 x i8> %a, <16 x i8> %b) {
29; CHECK-LABEL: second_compare_operand_not_splat:
30; CHECK:       ; %bb.0: ; %entry
31; CHECK-NEXT:    ushll.8h v2, v0, #0
32; CHECK-NEXT:    ushll2.8h v3, v0, #0
33; CHECK-NEXT:    cmgt.16b v0, v0, v1
34; CHECK-NEXT:    ushll.4s v4, v2, #0
35; CHECK-NEXT:    ushll.4s v5, v3, #0
36; CHECK-NEXT:    ushll2.4s v1, v2, #0
37; CHECK-NEXT:    ushll2.4s v2, v3, #0
38; CHECK-NEXT:    sshll.8h v3, v0, #0
39; CHECK-NEXT:    sshll2.8h v0, v0, #0
40; CHECK-NEXT:    sshll.4s v6, v3, #0
41; CHECK-NEXT:    sshll.4s v7, v0, #0
42; CHECK-NEXT:    sshll2.4s v0, v0, #0
43; CHECK-NEXT:    sshll2.4s v16, v3, #0
44; CHECK-NEXT:    and.16b v3, v2, v0
45; CHECK-NEXT:    and.16b v1, v1, v16
46; CHECK-NEXT:    and.16b v2, v5, v7
47; CHECK-NEXT:    and.16b v0, v4, v6
48; CHECK-NEXT:    ret
49entry:
50  %ext = zext <16 x i8> %a to <16 x i32>
51  %cmp = icmp sgt <16 x i8> %a, %b
52  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
53  ret <16 x i32> %sel
54}
55
56define <16 x i32> @same_zext_used_in_cmp_signed_pred_and_select(<16 x i8> %a) {
57; CHECK-LABEL: same_zext_used_in_cmp_signed_pred_and_select:
58; CHECK:       ; %bb.0: ; %entry
59; CHECK-NEXT:    movi.16b v1, #10
60; CHECK-NEXT:    ushll.8h v2, v0, #0
61; CHECK-NEXT:    ushll2.8h v3, v0, #0
62; CHECK-NEXT:    ushll.4s v4, v2, #0
63; CHECK-NEXT:    cmgt.16b v0, v0, v1
64; CHECK-NEXT:    ushll.4s v5, v3, #0
65; CHECK-NEXT:    ushll2.4s v1, v3, #0
66; CHECK-NEXT:    sshll.8h v3, v0, #0
67; CHECK-NEXT:    sshll2.8h v0, v0, #0
68; CHECK-NEXT:    ushll2.4s v2, v2, #0
69; CHECK-NEXT:    sshll.4s v6, v3, #0
70; CHECK-NEXT:    sshll.4s v7, v0, #0
71; CHECK-NEXT:    sshll2.4s v0, v0, #0
72; CHECK-NEXT:    sshll2.4s v16, v3, #0
73; CHECK-NEXT:    and.16b v3, v1, v0
74; CHECK-NEXT:    and.16b v1, v2, v16
75; CHECK-NEXT:    and.16b v2, v5, v7
76; CHECK-NEXT:    and.16b v0, v4, v6
77; CHECK-NEXT:    ret
78entry:
79  %ext = zext <16 x i8> %a to <16 x i32>
80  %cmp = icmp sgt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
81  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
82  ret <16 x i32> %sel
83}
84
85define <8 x i64> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i64(<8 x i8> %a) {
86; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v8i64:
87; CHECK:       ; %bb.0:
88; CHECK-NEXT:    ushll.8h v0, v0, #0
89; CHECK-NEXT:    mov w8, #10
90; CHECK-NEXT:    ushll2.4s v2, v0, #0
91; CHECK-NEXT:    ushll.4s v0, v0, #0
92; CHECK-NEXT:    dup.2d v1, x8
93; CHECK-NEXT:    ushll2.2d v3, v2, #0
94; CHECK-NEXT:    ushll2.2d v4, v0, #0
95; CHECK-NEXT:    ushll.2d v0, v0, #0
96; CHECK-NEXT:    ushll.2d v2, v2, #0
97; CHECK-NEXT:    cmhi.2d v5, v0, v1
98; CHECK-NEXT:    cmhi.2d v6, v2, v1
99; CHECK-NEXT:    cmhi.2d v7, v3, v1
100; CHECK-NEXT:    cmhi.2d v1, v4, v1
101; CHECK-NEXT:    and.16b v3, v3, v7
102; CHECK-NEXT:    and.16b v1, v4, v1
103; CHECK-NEXT:    and.16b v2, v2, v6
104; CHECK-NEXT:    and.16b v0, v0, v5
105; CHECK-NEXT:    ret
106  %ext = zext <8 x i8> %a to <8 x i64>
107  %cmp = icmp ugt <8 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
108  %sel = select <8 x i1> %cmp, <8 x i64> %ext, <8 x i64> zeroinitializer
109  ret <8 x i64> %sel
110}
111
112
113define <16 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v16i32(<16 x i8> %a) {
114; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v16i32:
115; CHECK:       ; %bb.0:
116; CHECK-NEXT:    movi.4s v1, #10
117; CHECK-NEXT:    ushll2.8h v2, v0, #0
118; CHECK-NEXT:    ushll.8h v0, v0, #0
119; CHECK-NEXT:    ushll2.4s v3, v2, #0
120; CHECK-NEXT:    ushll2.4s v4, v0, #0
121; CHECK-NEXT:    ushll.4s v0, v0, #0
122; CHECK-NEXT:    ushll.4s v2, v2, #0
123; CHECK-NEXT:    cmhi.4s v5, v0, v1
124; CHECK-NEXT:    cmhi.4s v6, v2, v1
125; CHECK-NEXT:    cmhi.4s v7, v3, v1
126; CHECK-NEXT:    cmhi.4s v1, v4, v1
127; CHECK-NEXT:    and.16b v3, v3, v7
128; CHECK-NEXT:    and.16b v1, v4, v1
129; CHECK-NEXT:    and.16b v2, v2, v6
130; CHECK-NEXT:    and.16b v0, v0, v5
131; CHECK-NEXT:    ret
132  %ext = zext <16 x i8> %a to <16 x i32>
133  %cmp = icmp ugt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
134  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
135  ret <16 x i32> %sel
136}
137
138define <8 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i32(<8 x i8> %a) {
139; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v8i32:
140; CHECK:       ; %bb.0:
141; CHECK-NEXT:    movi.4s v1, #10
142; CHECK-NEXT:    ushll.8h v0, v0, #0
143; CHECK-NEXT:    ushll2.4s v2, v0, #0
144; CHECK-NEXT:    ushll.4s v0, v0, #0
145; CHECK-NEXT:    cmhi.4s v3, v2, v1
146; CHECK-NEXT:    cmhi.4s v4, v0, v1
147; CHECK-NEXT:    and.16b v1, v2, v3
148; CHECK-NEXT:    and.16b v0, v0, v4
149; CHECK-NEXT:    ret
150  %ext = zext <8 x i8> %a to <8 x i32>
151  %cmp = icmp ugt <8 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
152  %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer
153  ret <8 x i32> %sel
154}
155
156define <8 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i32_2(<8 x i16> %a) {
157; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v8i32_2:
158; CHECK:       ; %bb.0:
159; CHECK-NEXT:    movi.4s v1, #10
160; CHECK-NEXT:    ushll2.4s v2, v0, #0
161; CHECK-NEXT:    ushll.4s v0, v0, #0
162; CHECK-NEXT:    cmhi.4s v3, v2, v1
163; CHECK-NEXT:    cmhi.4s v4, v0, v1
164; CHECK-NEXT:    and.16b v1, v2, v3
165; CHECK-NEXT:    and.16b v0, v0, v4
166; CHECK-NEXT:    ret
167  %ext = zext <8 x i16> %a to <8 x i32>
168  %cmp = icmp ugt <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
169  %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer
170  ret <8 x i32> %sel
171}
172
173
174define <8 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15(<8 x i15> %a) {
175; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15:
176; CHECK:       ; %bb.0:
177; CHECK-NEXT:    movi.4s v1, #10
178; CHECK-NEXT:    bic.8h v0, #128, lsl #8
179; CHECK-NEXT:    ushll2.4s v2, v0, #0
180; CHECK-NEXT:    ushll.4s v0, v0, #0
181; CHECK-NEXT:    cmhi.4s v3, v2, v1
182; CHECK-NEXT:    cmhi.4s v4, v0, v1
183; CHECK-NEXT:    and.16b v1, v2, v3
184; CHECK-NEXT:    and.16b v0, v0, v4
185; CHECK-NEXT:    ret
186  %ext = zext <8 x i15> %a to <8 x i32>
187  %cmp = icmp ugt <8 x i15> %a, <i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10>
188  %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer
189  ret <8 x i32> %sel
190}
191
192define <7 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v7i32(<7 x i16> %a) {
193; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v7i32:
194; CHECK:       ; %bb.0:
195; CHECK-NEXT:    movi.8h v1, #10
196; CHECK-NEXT:    ushll2.4s v2, v0, #0
197; CHECK-NEXT:    cmhi.8h v1, v0, v1
198; CHECK-NEXT:    ushll.4s v0, v0, #0
199; CHECK-NEXT:    sshll.4s v3, v1, #0
200; CHECK-NEXT:    sshll2.4s v1, v1, #0
201; CHECK-NEXT:    and.16b v0, v0, v3
202; CHECK-NEXT:    and.16b v1, v2, v1
203; CHECK-NEXT:    mov.s w1, v0[1]
204; CHECK-NEXT:    mov.s w2, v0[2]
205; CHECK-NEXT:    mov.s w3, v0[3]
206; CHECK-NEXT:    mov.s w5, v1[1]
207; CHECK-NEXT:    mov.s w6, v1[2]
208; CHECK-NEXT:    fmov w0, s0
209; CHECK-NEXT:    fmov w4, s1
210; CHECK-NEXT:    ret
211  %ext = zext <7 x i16> %a to <7 x i32>
212  %cmp = icmp ugt <7 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
213  %sel = select <7 x i1> %cmp, <7 x i32> %ext, <7 x i32> zeroinitializer
214  ret <7 x i32> %sel
215}
216
217define <3 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v3i16(<3 x i8> %a) {
218; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v3i16:
219; CHECK:       ; %bb.0:
220; CHECK-NEXT:    fmov s0, w0
221; CHECK-NEXT:  Lloh0:
222; CHECK-NEXT:    adrp x8, lCPI9_0@PAGE
223; CHECK-NEXT:    mov.h v0[1], w1
224; CHECK-NEXT:  Lloh1:
225; CHECK-NEXT:    ldr d2, [x8, lCPI9_0@PAGEOFF]
226; CHECK-NEXT:    mov.h v0[2], w2
227; CHECK-NEXT:    fmov d1, d0
228; CHECK-NEXT:    bic.4h v1, #255, lsl #8
229; CHECK-NEXT:    cmhi.4h v1, v1, v2
230; CHECK-NEXT:    movi.2d v2, #0x0000ff000000ff
231; CHECK-NEXT:    and.8b v0, v0, v1
232; CHECK-NEXT:    ushll.4s v0, v0, #0
233; CHECK-NEXT:    and.16b v0, v0, v2
234; CHECK-NEXT:    ret
235; CHECK-NEXT:    .loh AdrpLdr Lloh0, Lloh1
236  %ext = zext <3 x i8> %a to <3 x i32>
237  %cmp = icmp ugt <3 x i8> %a, <i8 10, i8 10, i8 10>
238  %sel = select <3 x i1> %cmp, <3 x i32> %ext, <3 x i32> zeroinitializer
239  ret <3 x i32> %sel
240}
241
242define <4 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v4i32(<4 x i16> %a) {
243; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v4i32:
244; CHECK:       ; %bb.0:
245; CHECK-NEXT:    movi.4s v1, #10
246; CHECK-NEXT:    ushll.4s v0, v0, #0
247; CHECK-NEXT:    cmhi.4s v1, v0, v1
248; CHECK-NEXT:    and.16b v0, v0, v1
249; CHECK-NEXT:    ret
250  %ext = zext <4 x i16> %a to <4 x i32>
251  %cmp = icmp ugt <4 x i16> %a, <i16 10, i16 10, i16 10, i16 10>
252  %sel = select <4 x i1> %cmp, <4 x i32> %ext, <4 x i32> zeroinitializer
253  ret <4 x i32> %sel
254}
255
256define <2 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_v2i32(<2 x i16> %a) {
257; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_v2i32:
258; CHECK:       ; %bb.0:
259; CHECK-NEXT:    movi d1, #0x00ffff0000ffff
260; CHECK-NEXT:    movi.2s v2, #10
261; CHECK-NEXT:    and.8b v0, v0, v1
262; CHECK-NEXT:    cmhi.2s v1, v0, v2
263; CHECK-NEXT:    and.8b v0, v0, v1
264; CHECK-NEXT:    ret
265  %ext = zext <2 x i16> %a to <2 x i32>
266  %cmp = icmp ugt <2 x i16> %a, <i16 10, i16 10>
267  %sel = select <2 x i1> %cmp, <2 x i32> %ext, <2 x i32> zeroinitializer
268  ret <2 x i32> %sel
269}
270
271define <8 x i32> @same_zext_used_in_cmp_eq_and_select_v8i32(<8 x i16> %a) {
272; CHECK-LABEL: same_zext_used_in_cmp_eq_and_select_v8i32:
273; CHECK:       ; %bb.0:
274; CHECK-NEXT:    movi.4s v1, #10
275; CHECK-NEXT:    ushll2.4s v2, v0, #0
276; CHECK-NEXT:    ushll.4s v0, v0, #0
277; CHECK-NEXT:    cmeq.4s v3, v2, v1
278; CHECK-NEXT:    cmeq.4s v4, v0, v1
279; CHECK-NEXT:    and.16b v1, v2, v3
280; CHECK-NEXT:    and.16b v0, v0, v4
281; CHECK-NEXT:    ret
282  %ext = zext <8 x i16> %a to <8 x i32>
283  %cmp = icmp eq <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
284  %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer
285  ret <8 x i32> %sel
286}
287
288define <8 x i32> @same_zext_used_in_cmp_eq_and_select_v8i32_from_v8i13(<8 x i13> %a) {
289; CHECK-LABEL: same_zext_used_in_cmp_eq_and_select_v8i32_from_v8i13:
290; CHECK:       ; %bb.0:
291; CHECK-NEXT:    movi.4s v1, #10
292; CHECK-NEXT:    bic.8h v0, #224, lsl #8
293; CHECK-NEXT:    ushll2.4s v2, v0, #0
294; CHECK-NEXT:    ushll.4s v0, v0, #0
295; CHECK-NEXT:    cmeq.4s v3, v2, v1
296; CHECK-NEXT:    cmeq.4s v4, v0, v1
297; CHECK-NEXT:    and.16b v1, v2, v3
298; CHECK-NEXT:    and.16b v0, v0, v4
299; CHECK-NEXT:    ret
300  %ext = zext <8 x i13> %a to <8 x i32>
301  %cmp = icmp eq <8 x i13> %a, <i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10>
302  %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer
303  ret <8 x i32> %sel
304}
305
306define <16 x i32> @same_zext_used_in_cmp_ne_and_select_v8i32(<16 x i8> %a) {
307; CHECK-LABEL: same_zext_used_in_cmp_ne_and_select_v8i32:
308; CHECK:       ; %bb.0:
309; CHECK-NEXT:    movi.4s v1, #10
310; CHECK-NEXT:    ushll2.8h v2, v0, #0
311; CHECK-NEXT:    ushll.8h v0, v0, #0
312; CHECK-NEXT:    ushll2.4s v3, v2, #0
313; CHECK-NEXT:    ushll2.4s v4, v0, #0
314; CHECK-NEXT:    ushll.4s v0, v0, #0
315; CHECK-NEXT:    ushll.4s v2, v2, #0
316; CHECK-NEXT:    cmeq.4s v5, v0, v1
317; CHECK-NEXT:    cmeq.4s v6, v2, v1
318; CHECK-NEXT:    cmeq.4s v7, v3, v1
319; CHECK-NEXT:    cmeq.4s v1, v4, v1
320; CHECK-NEXT:    bic.16b v3, v3, v7
321; CHECK-NEXT:    bic.16b v1, v4, v1
322; CHECK-NEXT:    bic.16b v2, v2, v6
323; CHECK-NEXT:    bic.16b v0, v0, v5
324; CHECK-NEXT:    ret
325  %ext = zext <16 x i8> %a to <16 x i32>
326  %cmp = icmp ne <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
327  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
328  ret <16 x i32> %sel
329}
330
331; A variation of @same_zext_used_in_cmp_unsigned_pred_and_select, with with
332; multiple users of the compare.
333define <16 x i32> @same_zext_used_in_cmp_unsigned_pred_and_select_other_use(<16 x i8> %a, <16 x i64> %v, <16 x i64>* %ptr) {
334; CHECK-LABEL: same_zext_used_in_cmp_unsigned_pred_and_select_other_use:
335; CHECK:       ; %bb.0: ; %entry
336; CHECK-NEXT:    mov.16b v16, v2
337; CHECK-NEXT:    movi.16b v2, #10
338; CHECK-NEXT:    ushll.8h v18, v0, #0
339; CHECK-NEXT:    ushll2.8h v20, v0, #0
340; CHECK-NEXT:    mov.16b v17, v1
341; CHECK-NEXT:    ldr q1, [sp]
342; CHECK-NEXT:    cmhi.16b v0, v0, v2
343; CHECK-NEXT:    ushll.4s v19, v18, #0
344; CHECK-NEXT:    sshll2.8h v21, v0, #0
345; CHECK-NEXT:    sshll.8h v0, v0, #0
346; CHECK-NEXT:    sshll2.4s v22, v21, #0
347; CHECK-NEXT:    sshll.4s v21, v21, #0
348; CHECK-NEXT:    sshll2.2d v23, v22, #0
349; CHECK-NEXT:    sshll.2d v24, v22, #0
350; CHECK-NEXT:    sshll2.4s v25, v0, #0
351; CHECK-NEXT:    sshll2.2d v26, v21, #0
352; CHECK-NEXT:    sshll.2d v28, v21, #0
353; CHECK-NEXT:    sshll2.2d v27, v25, #0
354; CHECK-NEXT:    sshll.4s v0, v0, #0
355; CHECK-NEXT:    and.16b v1, v1, v23
356; CHECK-NEXT:    and.16b v7, v7, v24
357; CHECK-NEXT:    sshll.2d v29, v25, #0
358; CHECK-NEXT:    stp q7, q1, [x0, #96]
359; CHECK-NEXT:    and.16b v1, v6, v26
360; CHECK-NEXT:    and.16b v5, v5, v28
361; CHECK-NEXT:    ushll.4s v2, v20, #0
362; CHECK-NEXT:    stp q5, q1, [x0, #64]
363; CHECK-NEXT:    ushll2.4s v18, v18, #0
364; CHECK-NEXT:    ushll2.4s v20, v20, #0
365; CHECK-NEXT:    and.16b v1, v4, v27
366; CHECK-NEXT:    sshll2.2d v4, v0, #0
367; CHECK-NEXT:    sshll.2d v5, v0, #0
368; CHECK-NEXT:    and.16b v3, v3, v29
369; CHECK-NEXT:    stp q3, q1, [x0, #32]
370; CHECK-NEXT:    and.16b v3, v20, v22
371; CHECK-NEXT:    and.16b v1, v18, v25
372; CHECK-NEXT:    and.16b v2, v2, v21
373; CHECK-NEXT:    and.16b v0, v19, v0
374; CHECK-NEXT:    and.16b v4, v16, v4
375; CHECK-NEXT:    and.16b v5, v17, v5
376; CHECK-NEXT:    stp q5, q4, [x0]
377; CHECK-NEXT:    ret
378entry:
379  %ext = zext <16 x i8> %a to <16 x i32>
380  %cmp = icmp ugt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
381  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
382  %sel.2 = select <16 x i1> %cmp, <16 x i64> %v, <16 x i64> zeroinitializer
383  store <16 x i64> %sel.2, <16 x i64>* %ptr
384  ret <16 x i32> %sel
385}
386
387define <16 x i32> @same_sext_used_in_cmp_signed_pred_and_select_v16i32(<16 x i8> %a) {
388; CHECK-LABEL: same_sext_used_in_cmp_signed_pred_and_select_v16i32:
389; CHECK:       ; %bb.0: ; %entry
390; CHECK-NEXT:    movi.4s v1, #10
391; CHECK-NEXT:    sshll2.8h v2, v0, #0
392; CHECK-NEXT:    sshll.8h v0, v0, #0
393; CHECK-NEXT:    sshll2.4s v3, v2, #0
394; CHECK-NEXT:    sshll2.4s v4, v0, #0
395; CHECK-NEXT:    sshll.4s v0, v0, #0
396; CHECK-NEXT:    sshll.4s v2, v2, #0
397; CHECK-NEXT:    cmgt.4s v5, v0, v1
398; CHECK-NEXT:    cmgt.4s v6, v2, v1
399; CHECK-NEXT:    cmgt.4s v7, v3, v1
400; CHECK-NEXT:    cmgt.4s v1, v4, v1
401; CHECK-NEXT:    and.16b v3, v3, v7
402; CHECK-NEXT:    and.16b v1, v4, v1
403; CHECK-NEXT:    and.16b v2, v2, v6
404; CHECK-NEXT:    and.16b v0, v0, v5
405; CHECK-NEXT:    ret
406entry:
407  %ext = sext <16 x i8> %a to <16 x i32>
408  %cmp = icmp sgt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
409  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
410  ret <16 x i32> %sel
411}
412
413define <8 x i32> @same_sext_used_in_cmp_eq_and_select_v8i32(<8 x i16> %a) {
414; CHECK-LABEL: same_sext_used_in_cmp_eq_and_select_v8i32:
415; CHECK:       ; %bb.0:
416; CHECK-NEXT:    movi.4s v1, #10
417; CHECK-NEXT:    sshll2.4s v2, v0, #0
418; CHECK-NEXT:    sshll.4s v0, v0, #0
419; CHECK-NEXT:    cmeq.4s v3, v2, v1
420; CHECK-NEXT:    cmeq.4s v4, v0, v1
421; CHECK-NEXT:    and.16b v1, v2, v3
422; CHECK-NEXT:    and.16b v0, v0, v4
423; CHECK-NEXT:    ret
424  %ext = sext <8 x i16> %a to <8 x i32>
425  %cmp = icmp eq <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
426  %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer
427  ret <8 x i32> %sel
428}
429
430define <8 x i32> @same_sext_used_in_cmp_eq_and_select_v8i32_from_v8i13(<8 x i13> %a) {
431; CHECK-LABEL: same_sext_used_in_cmp_eq_and_select_v8i32_from_v8i13:
432; CHECK:       ; %bb.0:
433; CHECK-NEXT:    ushll2.4s v2, v0, #0
434; CHECK-NEXT:    ushll.4s v0, v0, #0
435; CHECK-NEXT:    movi.4s v1, #10
436; CHECK-NEXT:    shl.4s v2, v2, #19
437; CHECK-NEXT:    shl.4s v0, v0, #19
438; CHECK-NEXT:    sshr.4s v2, v2, #19
439; CHECK-NEXT:    sshr.4s v0, v0, #19
440; CHECK-NEXT:    cmeq.4s v3, v2, v1
441; CHECK-NEXT:    cmeq.4s v4, v0, v1
442; CHECK-NEXT:    and.16b v1, v2, v3
443; CHECK-NEXT:    and.16b v0, v0, v4
444; CHECK-NEXT:    ret
445  %ext = sext <8 x i13> %a to <8 x i32>
446  %cmp = icmp eq <8 x i13> %a, <i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10, i13 10>
447  %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer
448  ret <8 x i32> %sel
449}
450
451define <16 x i32> @same_sext_used_in_cmp_ne_and_select_v8i32(<16 x i8> %a) {
452; CHECK-LABEL: same_sext_used_in_cmp_ne_and_select_v8i32:
453; CHECK:       ; %bb.0:
454; CHECK-NEXT:    movi.4s v1, #10
455; CHECK-NEXT:    sshll2.8h v2, v0, #0
456; CHECK-NEXT:    sshll.8h v0, v0, #0
457; CHECK-NEXT:    sshll2.4s v3, v2, #0
458; CHECK-NEXT:    sshll2.4s v4, v0, #0
459; CHECK-NEXT:    sshll.4s v0, v0, #0
460; CHECK-NEXT:    sshll.4s v2, v2, #0
461; CHECK-NEXT:    cmeq.4s v5, v0, v1
462; CHECK-NEXT:    cmeq.4s v6, v2, v1
463; CHECK-NEXT:    cmeq.4s v7, v3, v1
464; CHECK-NEXT:    cmeq.4s v1, v4, v1
465; CHECK-NEXT:    bic.16b v3, v3, v7
466; CHECK-NEXT:    bic.16b v1, v4, v1
467; CHECK-NEXT:    bic.16b v2, v2, v6
468; CHECK-NEXT:    bic.16b v0, v0, v5
469; CHECK-NEXT:    ret
470  %ext = sext <16 x i8> %a to <16 x i32>
471  %cmp = icmp ne <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
472  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
473  ret <16 x i32> %sel
474}
475
476define <8 x i32> @same_sext_used_in_cmp_signed_pred_and_select_v8i32(<8 x i16> %a) {
477; CHECK-LABEL: same_sext_used_in_cmp_signed_pred_and_select_v8i32:
478; CHECK:       ; %bb.0: ; %entry
479; CHECK-NEXT:    movi.4s v1, #10
480; CHECK-NEXT:    sshll2.4s v2, v0, #0
481; CHECK-NEXT:    sshll.4s v0, v0, #0
482; CHECK-NEXT:    cmgt.4s v3, v2, v1
483; CHECK-NEXT:    cmgt.4s v4, v0, v1
484; CHECK-NEXT:    and.16b v1, v2, v3
485; CHECK-NEXT:    and.16b v0, v0, v4
486; CHECK-NEXT:    ret
487entry:
488  %ext = sext <8 x i16> %a to <8 x i32>
489  %cmp = icmp sgt <8 x i16> %a, <i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10, i16 10>
490  %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer
491  ret <8 x i32> %sel
492}
493
494define <8 x i32> @same_sext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15(<8 x i15> %a) {
495; CHECK-LABEL: same_sext_used_in_cmp_unsigned_pred_and_select_v8i32_from_v8i15:
496; CHECK:       ; %bb.0:
497; CHECK-NEXT:    ushll2.4s v2, v0, #0
498; CHECK-NEXT:    ushll.4s v0, v0, #0
499; CHECK-NEXT:    movi.4s v1, #10
500; CHECK-NEXT:    shl.4s v2, v2, #17
501; CHECK-NEXT:    shl.4s v0, v0, #17
502; CHECK-NEXT:    sshr.4s v2, v2, #17
503; CHECK-NEXT:    sshr.4s v0, v0, #17
504; CHECK-NEXT:    cmge.4s v3, v2, v1
505; CHECK-NEXT:    cmge.4s v4, v0, v1
506; CHECK-NEXT:    and.16b v1, v2, v3
507; CHECK-NEXT:    and.16b v0, v0, v4
508; CHECK-NEXT:    ret
509  %ext = sext <8 x i15> %a to <8 x i32>
510  %cmp = icmp sge <8 x i15> %a, <i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10, i15 10>
511  %sel = select <8 x i1> %cmp, <8 x i32> %ext, <8 x i32> zeroinitializer
512  ret <8 x i32> %sel
513}
514
515define <16 x i32> @same_sext_used_in_cmp_unsigned_pred_and_select(<16 x i8> %a) {
516; CHECK-LABEL: same_sext_used_in_cmp_unsigned_pred_and_select:
517; CHECK:       ; %bb.0: ; %entry
518; CHECK-NEXT:    movi.16b v1, #10
519; CHECK-NEXT:    sshll.8h v3, v0, #0
520; CHECK-NEXT:    sshll2.8h v2, v0, #0
521; CHECK-NEXT:    cmhi.16b v0, v0, v1
522; CHECK-NEXT:    ext.16b v1, v3, v3, #8
523; CHECK-NEXT:    sshll.8h v5, v0, #0
524; CHECK-NEXT:    sshll2.8h v0, v0, #0
525; CHECK-NEXT:    ext.16b v4, v2, v2, #8
526; CHECK-NEXT:    ext.16b v6, v5, v5, #8
527; CHECK-NEXT:    ext.16b v7, v0, v0, #8
528; CHECK-NEXT:    and.8b v0, v2, v0
529; CHECK-NEXT:    sshll.4s v2, v0, #0
530; CHECK-NEXT:    and.8b v0, v3, v5
531; CHECK-NEXT:    and.8b v1, v1, v6
532; CHECK-NEXT:    and.8b v3, v4, v7
533; CHECK-NEXT:    sshll.4s v0, v0, #0
534; CHECK-NEXT:    sshll.4s v1, v1, #0
535; CHECK-NEXT:    sshll.4s v3, v3, #0
536; CHECK-NEXT:    ret
537entry:
538  %ext = sext <16 x i8> %a to <16 x i32>
539  %cmp = icmp ugt <16 x i8> %a, <i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10, i8 10>
540  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
541  ret <16 x i32> %sel
542}
543
544define <16 x i32> @same_zext_used_in_cmp_signed_pred_and_select_can_convert_to_unsigned_pred(<16 x i8> %a) {
545; CHECK-LABEL: same_zext_used_in_cmp_signed_pred_and_select_can_convert_to_unsigned_pred:
546; CHECK:       ; %bb.0: ; %entry
547; CHECK-NEXT:    movi.2d v1, #0xffffffffffffffff
548; CHECK-NEXT:    ushll.8h v2, v0, #0
549; CHECK-NEXT:    ushll2.8h v3, v0, #0
550; CHECK-NEXT:    ushll.4s v4, v2, #0
551; CHECK-NEXT:    cmgt.16b v0, v0, v1
552; CHECK-NEXT:    ushll.4s v5, v3, #0
553; CHECK-NEXT:    ushll2.4s v1, v3, #0
554; CHECK-NEXT:    sshll.8h v3, v0, #0
555; CHECK-NEXT:    sshll2.8h v0, v0, #0
556; CHECK-NEXT:    ushll2.4s v2, v2, #0
557; CHECK-NEXT:    sshll.4s v6, v3, #0
558; CHECK-NEXT:    sshll.4s v7, v0, #0
559; CHECK-NEXT:    sshll2.4s v0, v0, #0
560; CHECK-NEXT:    sshll2.4s v16, v3, #0
561; CHECK-NEXT:    and.16b v3, v1, v0
562; CHECK-NEXT:    and.16b v1, v2, v16
563; CHECK-NEXT:    and.16b v2, v5, v7
564; CHECK-NEXT:    and.16b v0, v4, v6
565; CHECK-NEXT:    ret
566entry:
567  %ext = zext <16 x i8> %a to <16 x i32>
568  %cmp = icmp sgt <16 x i8> %a,  <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
569  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
570  ret <16 x i32> %sel
571}
572
573define void @extension_in_loop_v16i8_to_v16i32(i8* %src, i32* %dst) {
574; CHECK-LABEL: extension_in_loop_v16i8_to_v16i32:
575; CHECK:       ; %bb.0: ; %entry
576; CHECK-NEXT:    movi.2d v0, #0xffffffffffffffff
577; CHECK-NEXT:    mov x8, xzr
578; CHECK-NEXT:  LBB24_1: ; %loop
579; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
580; CHECK-NEXT:    ldr q1, [x0, x8]
581; CHECK-NEXT:    add x8, x8, #16
582; CHECK-NEXT:    cmp x8, #128
583; CHECK-NEXT:    cmgt.16b v2, v1, v0
584; CHECK-NEXT:    ushll2.8h v3, v1, #0
585; CHECK-NEXT:    sshll2.8h v4, v2, #0
586; CHECK-NEXT:    ushll2.4s v5, v3, #0
587; CHECK-NEXT:    ushll.4s v3, v3, #0
588; CHECK-NEXT:    sshll2.4s v6, v4, #0
589; CHECK-NEXT:    sshll.4s v4, v4, #0
590; CHECK-NEXT:    ushll.8h v1, v1, #0
591; CHECK-NEXT:    sshll.8h v2, v2, #0
592; CHECK-NEXT:    and.16b v5, v5, v6
593; CHECK-NEXT:    and.16b v3, v3, v4
594; CHECK-NEXT:    stp q3, q5, [x1, #32]
595; CHECK-NEXT:    sshll2.4s v4, v2, #0
596; CHECK-NEXT:    sshll.4s v2, v2, #0
597; CHECK-NEXT:    ushll2.4s v3, v1, #0
598; CHECK-NEXT:    ushll.4s v1, v1, #0
599; CHECK-NEXT:    and.16b v3, v3, v4
600; CHECK-NEXT:    and.16b v1, v1, v2
601; CHECK-NEXT:    stp q1, q3, [x1], #64
602; CHECK-NEXT:    b.ne LBB24_1
603; CHECK-NEXT:  ; %bb.2: ; %exit
604; CHECK-NEXT:    ret
605entry:
606  br label %loop
607
608loop:
609  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
610  %src.gep = getelementptr i8, i8* %src, i64 %iv
611  %src.gep.cast = bitcast i8* %src.gep to <16 x i8>*
612  %load = load <16 x i8>, <16 x i8>* %src.gep.cast
613  %cmp = icmp sgt <16 x i8> %load,  <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
614  %ext = zext <16 x i8> %load to <16 x i32>
615  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
616  %dst.gep = getelementptr i32, i32* %dst, i64 %iv
617  %dst.gep.cast = bitcast i32* %dst.gep to <16 x i32>*
618  store <16 x i32> %sel, <16 x i32>* %dst.gep.cast
619  %iv.next = add nuw i64 %iv, 16
620  %ec = icmp eq i64 %iv.next, 128
621  br i1 %ec, label %exit, label %loop
622
623exit:
624  ret void
625}
626
627define void @extension_in_loop_as_shuffle_v16i8_to_v16i32(i8* %src, i32* %dst) {
628; CHECK-LABEL: extension_in_loop_as_shuffle_v16i8_to_v16i32:
629; CHECK:       ; %bb.0: ; %entry
630; CHECK-NEXT:  Lloh2:
631; CHECK-NEXT:    adrp x9, lCPI25_0@PAGE
632; CHECK-NEXT:  Lloh3:
633; CHECK-NEXT:    adrp x10, lCPI25_1@PAGE
634; CHECK-NEXT:  Lloh4:
635; CHECK-NEXT:    adrp x11, lCPI25_2@PAGE
636; CHECK-NEXT:  Lloh5:
637; CHECK-NEXT:    adrp x12, lCPI25_3@PAGE
638; CHECK-NEXT:    movi.2d v2, #0xffffffffffffffff
639; CHECK-NEXT:    mov x8, xzr
640; CHECK-NEXT:  Lloh6:
641; CHECK-NEXT:    ldr q0, [x9, lCPI25_0@PAGEOFF]
642; CHECK-NEXT:  Lloh7:
643; CHECK-NEXT:    ldr q1, [x10, lCPI25_1@PAGEOFF]
644; CHECK-NEXT:  Lloh8:
645; CHECK-NEXT:    ldr q3, [x11, lCPI25_2@PAGEOFF]
646; CHECK-NEXT:  Lloh9:
647; CHECK-NEXT:    ldr q4, [x12, lCPI25_3@PAGEOFF]
648; CHECK-NEXT:  LBB25_1: ; %loop
649; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
650; CHECK-NEXT:    ldr q5, [x0, x8]
651; CHECK-NEXT:    add x8, x8, #16
652; CHECK-NEXT:    cmp x8, #128
653; CHECK-NEXT:    cmgt.16b v6, v5, v2
654; CHECK-NEXT:    tbl.16b v7, { v5 }, v0
655; CHECK-NEXT:    tbl.16b v16, { v5 }, v1
656; CHECK-NEXT:    sshll2.8h v18, v6, #0
657; CHECK-NEXT:    tbl.16b v17, { v5 }, v3
658; CHECK-NEXT:    sshll2.4s v19, v18, #0
659; CHECK-NEXT:    sshll.4s v18, v18, #0
660; CHECK-NEXT:    tbl.16b v5, { v5 }, v4
661; CHECK-NEXT:    sshll.8h v6, v6, #0
662; CHECK-NEXT:    and.16b v7, v7, v19
663; CHECK-NEXT:    and.16b v16, v16, v18
664; CHECK-NEXT:    stp q16, q7, [x1, #32]
665; CHECK-NEXT:    sshll2.4s v7, v6, #0
666; CHECK-NEXT:    sshll.4s v6, v6, #0
667; CHECK-NEXT:    and.16b v7, v17, v7
668; CHECK-NEXT:    and.16b v5, v5, v6
669; CHECK-NEXT:    stp q5, q7, [x1], #64
670; CHECK-NEXT:    b.ne LBB25_1
671; CHECK-NEXT:  ; %bb.2: ; %exit
672; CHECK-NEXT:    ret
673; CHECK-NEXT:    .loh AdrpLdr Lloh5, Lloh9
674; CHECK-NEXT:    .loh AdrpLdr Lloh4, Lloh8
675; CHECK-NEXT:    .loh AdrpLdr Lloh3, Lloh7
676; CHECK-NEXT:    .loh AdrpLdr Lloh2, Lloh6
677entry:
678  br label %loop
679
680loop:
681  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
682  %src.gep = getelementptr i8, i8* %src, i64 %iv
683  %src.gep.cast = bitcast i8* %src.gep to <16 x i8>*
684  %load = load <16 x i8>, <16 x i8>* %src.gep.cast
685  %cmp = icmp sgt <16 x i8> %load,  <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
686  %ext.shuf = shufflevector <16 x i8> %load, <16 x i8> zeroinitializer, <64 x i32> <i32 16, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 5, i32 16, i32 16, i32 16, i32 6, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 9, i32 16, i32 16, i32 16, i32 10, i32 16, i32 16, i32 16, i32 11, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 15>
687  %ext = bitcast <64 x i8> %ext.shuf to <16 x i32>
688  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
689  %dst.gep = getelementptr i32, i32* %dst, i64 %iv
690  %dst.gep.cast = bitcast i32* %dst.gep to <16 x i32>*
691  store <16 x i32> %sel, <16 x i32>* %dst.gep.cast
692  %iv.next = add nuw i64 %iv, 16
693  %ec = icmp eq i64 %iv.next, 128
694  br i1 %ec, label %exit, label %loop
695
696exit:
697  ret void
698}
699
700define void @shuffle_in_loop_is_no_extend_v16i8_to_v16i32(i8* %src, i32* %dst) {
701; CHECK-LABEL: shuffle_in_loop_is_no_extend_v16i8_to_v16i32:
702; CHECK:       ; %bb.0: ; %entry
703; CHECK-NEXT:  Lloh10:
704; CHECK-NEXT:    adrp x9, lCPI26_0@PAGE
705; CHECK-NEXT:  Lloh11:
706; CHECK-NEXT:    adrp x10, lCPI26_1@PAGE
707; CHECK-NEXT:  Lloh12:
708; CHECK-NEXT:    adrp x11, lCPI26_2@PAGE
709; CHECK-NEXT:  Lloh13:
710; CHECK-NEXT:    adrp x12, lCPI26_3@PAGE
711; CHECK-NEXT:    movi.2d v2, #0xffffffffffffffff
712; CHECK-NEXT:    mov x8, xzr
713; CHECK-NEXT:  Lloh14:
714; CHECK-NEXT:    ldr q0, [x9, lCPI26_0@PAGEOFF]
715; CHECK-NEXT:  Lloh15:
716; CHECK-NEXT:    ldr q1, [x10, lCPI26_1@PAGEOFF]
717; CHECK-NEXT:  Lloh16:
718; CHECK-NEXT:    ldr q3, [x11, lCPI26_2@PAGEOFF]
719; CHECK-NEXT:  Lloh17:
720; CHECK-NEXT:    ldr q4, [x12, lCPI26_3@PAGEOFF]
721; CHECK-NEXT:  LBB26_1: ; %loop
722; CHECK-NEXT:    ; =>This Inner Loop Header: Depth=1
723; CHECK-NEXT:    ldr q5, [x0, x8]
724; CHECK-NEXT:    add x8, x8, #16
725; CHECK-NEXT:    cmp x8, #128
726; CHECK-NEXT:    cmgt.16b v6, v5, v2
727; CHECK-NEXT:    tbl.16b v7, { v5 }, v0
728; CHECK-NEXT:    tbl.16b v16, { v5 }, v1
729; CHECK-NEXT:    sshll2.8h v18, v6, #0
730; CHECK-NEXT:    tbl.16b v17, { v5 }, v3
731; CHECK-NEXT:    sshll2.4s v19, v18, #0
732; CHECK-NEXT:    sshll.4s v18, v18, #0
733; CHECK-NEXT:    tbl.16b v5, { v5 }, v4
734; CHECK-NEXT:    sshll.8h v6, v6, #0
735; CHECK-NEXT:    and.16b v7, v7, v19
736; CHECK-NEXT:    and.16b v16, v16, v18
737; CHECK-NEXT:    stp q16, q7, [x1, #32]
738; CHECK-NEXT:    sshll2.4s v7, v6, #0
739; CHECK-NEXT:    sshll.4s v6, v6, #0
740; CHECK-NEXT:    and.16b v7, v17, v7
741; CHECK-NEXT:    and.16b v5, v5, v6
742; CHECK-NEXT:    stp q5, q7, [x1], #64
743; CHECK-NEXT:    b.ne LBB26_1
744; CHECK-NEXT:  ; %bb.2: ; %exit
745; CHECK-NEXT:    ret
746; CHECK-NEXT:    .loh AdrpLdr Lloh13, Lloh17
747; CHECK-NEXT:    .loh AdrpLdr Lloh12, Lloh16
748; CHECK-NEXT:    .loh AdrpLdr Lloh11, Lloh15
749; CHECK-NEXT:    .loh AdrpLdr Lloh10, Lloh14
750entry:
751  br label %loop
752
753loop:
754  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
755  %src.gep = getelementptr i8, i8* %src, i64 %iv
756  %src.gep.cast = bitcast i8* %src.gep to <16 x i8>*
757  %load = load <16 x i8>, <16 x i8>* %src.gep.cast
758  %cmp = icmp sgt <16 x i8> %load,  <i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>
759  %ext.shuf = shufflevector <16 x i8> %load, <16 x i8> zeroinitializer, <64 x i32> <i32 1, i32 16, i32 16, i32 0, i32 16, i32 16, i32 16, i32 1, i32 16, i32 16, i32 16, i32 2, i32 16, i32 16, i32 16, i32 3, i32 16, i32 16, i32 16, i32 4, i32 16, i32 16, i32 16, i32 5, i32 16, i32 16, i32 16, i32 6, i32 16, i32 16, i32 16, i32 7, i32 16, i32 16, i32 16, i32 8, i32 16, i32 16, i32 16, i32 9, i32 16, i32 16, i32 16, i32 10, i32 16, i32 16, i32 16, i32 11, i32 16, i32 16, i32 16, i32 12, i32 16, i32 16, i32 16, i32 13, i32 16, i32 16, i32 16, i32 14, i32 16, i32 16, i32 16, i32 15>
760  %ext = bitcast <64 x i8> %ext.shuf to <16 x i32>
761  %sel = select <16 x i1> %cmp, <16 x i32> %ext, <16 x i32> zeroinitializer
762  %dst.gep = getelementptr i32, i32* %dst, i64 %iv
763  %dst.gep.cast = bitcast i32* %dst.gep to <16 x i32>*
764  store <16 x i32> %sel, <16 x i32>* %dst.gep.cast
765  %iv.next = add nuw i64 %iv, 16
766  %ec = icmp eq i64 %iv.next, 128
767  br i1 %ec, label %exit, label %loop
768
769exit:
770  ret void
771}
772