1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mcpu=pwr9 -O3 -verify-machineinstrs -ppc-vsr-nums-as-vr \
3; RUN:     -mtriple=powerpc64le-unknown-linux-gnu \
4; RUN:     < %s | FileCheck %s --check-prefixes=P9LE
5
6; RUN: llc -mcpu=pwr9 -O3 -verify-machineinstrs -ppc-vsr-nums-as-vr \
7; RUN:     -mtriple=powerpc64-unknown-linux-gnu \
8; RUN:     < %s | FileCheck %s --check-prefixes=P9BE
9
10; RUN: llc -mcpu=pwr9 -O3 -verify-machineinstrs -ppc-vsr-nums-as-vr \
11; RUN:     -mtriple=powerpc64-ibm-aix-xcoff -vec-extabi \
12; RUN:     < %s | FileCheck %s --check-prefixes=P9BE-AIX
13
14; RUN: llc -mcpu=pwr9 -O3 -verify-machineinstrs -ppc-vsr-nums-as-vr \
15; RUN:     -mtriple=powerpc-ibm-aix-xcoff -vec-extabi \
16; RUN:     < %s | FileCheck %s --check-prefixes=P9BE-AIX32
17
18define void @test64(i8* nocapture readonly %pix2, i32 signext %i_pix2) {
19; P9LE-LABEL: test64:
20; P9LE:       # %bb.0: # %entry
21; P9LE-NEXT:    add 5, 3, 4
22; P9LE-NEXT:    lxsdx 2, 3, 4
23; P9LE-NEXT:    addis 3, 2, .LCPI0_0@toc@ha
24; P9LE-NEXT:    xxlxor 4, 4, 4
25; P9LE-NEXT:    addi 3, 3, .LCPI0_0@toc@l
26; P9LE-NEXT:    lxv 3, 0(3)
27; P9LE-NEXT:    addis 3, 2, .LCPI0_1@toc@ha
28; P9LE-NEXT:    addi 3, 3, .LCPI0_1@toc@l
29; P9LE-NEXT:    vperm 2, 2, 4, 3
30; P9LE-NEXT:    lxsd 3, 4(5)
31; P9LE-NEXT:    lxv 4, 0(3)
32; P9LE-NEXT:    vperm 3, 3, 3, 4
33; P9LE-NEXT:    vspltisw 4, 8
34; P9LE-NEXT:    vnegw 3, 3
35; P9LE-NEXT:    vadduwm 4, 4, 4
36; P9LE-NEXT:    vslw 3, 3, 4
37; P9LE-NEXT:    vsubuwm 2, 3, 2
38; P9LE-NEXT:    xxswapd 0, 2
39; P9LE-NEXT:    stxv 0, 0(3)
40; P9LE-NEXT:    blr
41;
42; P9BE-LABEL: test64:
43; P9BE:       # %bb.0: # %entry
44; P9BE-NEXT:    add 5, 3, 4
45; P9BE-NEXT:    lxsdx 2, 3, 4
46; P9BE-NEXT:    addis 3, 2, .LCPI0_0@toc@ha
47; P9BE-NEXT:    xxlxor 4, 4, 4
48; P9BE-NEXT:    addi 3, 3, .LCPI0_0@toc@l
49; P9BE-NEXT:    lxv 3, 0(3)
50; P9BE-NEXT:    addis 3, 2, .LCPI0_1@toc@ha
51; P9BE-NEXT:    addi 3, 3, .LCPI0_1@toc@l
52; P9BE-NEXT:    vperm 2, 4, 2, 3
53; P9BE-NEXT:    lxsd 3, 4(5)
54; P9BE-NEXT:    lxv 4, 0(3)
55; P9BE-NEXT:    vperm 3, 3, 3, 4
56; P9BE-NEXT:    vspltisw 4, 8
57; P9BE-NEXT:    vnegw 3, 3
58; P9BE-NEXT:    vadduwm 4, 4, 4
59; P9BE-NEXT:    vslw 3, 3, 4
60; P9BE-NEXT:    vsubuwm 2, 3, 2
61; P9BE-NEXT:    xxswapd 0, 2
62; P9BE-NEXT:    stxv 0, 0(3)
63; P9BE-NEXT:    blr
64;
65; P9BE-AIX-LABEL: test64:
66; P9BE-AIX:       # %bb.0: # %entry
67; P9BE-AIX-NEXT:    add 5, 3, 4
68; P9BE-AIX-NEXT:    lxsdx 2, 3, 4
69; P9BE-AIX-NEXT:    ld 3, L..C0(2) # %const.0
70; P9BE-AIX-NEXT:    xxlxor 4, 4, 4
71; P9BE-AIX-NEXT:    lxv 3, 0(3)
72; P9BE-AIX-NEXT:    ld 3, L..C1(2) # %const.1
73; P9BE-AIX-NEXT:    vperm 2, 4, 2, 3
74; P9BE-AIX-NEXT:    lxsd 3, 4(5)
75; P9BE-AIX-NEXT:    lxv 4, 0(3)
76; P9BE-AIX-NEXT:    vperm 3, 3, 3, 4
77; P9BE-AIX-NEXT:    vspltisw 4, 8
78; P9BE-AIX-NEXT:    vnegw 3, 3
79; P9BE-AIX-NEXT:    vadduwm 4, 4, 4
80; P9BE-AIX-NEXT:    vslw 3, 3, 4
81; P9BE-AIX-NEXT:    vsubuwm 2, 3, 2
82; P9BE-AIX-NEXT:    xxswapd 0, 2
83; P9BE-AIX-NEXT:    stxv 0, 0(3)
84; P9BE-AIX-NEXT:    blr
85;
86; P9BE-AIX32-LABEL: test64:
87; P9BE-AIX32:       # %bb.0: # %entry
88; P9BE-AIX32-NEXT:    lwzux 4, 3, 4
89; P9BE-AIX32-NEXT:    xxlxor 4, 4, 4
90; P9BE-AIX32-NEXT:    stw 4, -48(1)
91; P9BE-AIX32-NEXT:    lwz 4, 4(3)
92; P9BE-AIX32-NEXT:    lxv 0, -48(1)
93; P9BE-AIX32-NEXT:    stw 4, -32(1)
94; P9BE-AIX32-NEXT:    lwz 4, L..C0(2) # %const.0
95; P9BE-AIX32-NEXT:    lwz 3, 8(3)
96; P9BE-AIX32-NEXT:    lxv 1, -32(1)
97; P9BE-AIX32-NEXT:    lxv 3, 0(4)
98; P9BE-AIX32-NEXT:    stw 3, -16(1)
99; P9BE-AIX32-NEXT:    lwz 3, L..C1(2) # %const.1
100; P9BE-AIX32-NEXT:    xxmrghw 2, 0, 1
101; P9BE-AIX32-NEXT:    lxv 0, -16(1)
102; P9BE-AIX32-NEXT:    vperm 2, 4, 2, 3
103; P9BE-AIX32-NEXT:    lxv 4, 0(3)
104; P9BE-AIX32-NEXT:    xxmrghw 3, 1, 0
105; P9BE-AIX32-NEXT:    vperm 3, 3, 3, 4
106; P9BE-AIX32-NEXT:    vspltisw 4, 8
107; P9BE-AIX32-NEXT:    vnegw 3, 3
108; P9BE-AIX32-NEXT:    vadduwm 4, 4, 4
109; P9BE-AIX32-NEXT:    vslw 3, 3, 4
110; P9BE-AIX32-NEXT:    vsubuwm 2, 3, 2
111; P9BE-AIX32-NEXT:    xxswapd 0, 2
112; P9BE-AIX32-NEXT:    stxv 0, 0(3)
113; P9BE-AIX32-NEXT:    blr
114entry:
115  %idx.ext63 = sext i32 %i_pix2 to i64
116  %add.ptr64 = getelementptr inbounds i8, i8* %pix2, i64 %idx.ext63
117  %arrayidx5.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 4
118  %0 = bitcast i8* %add.ptr64 to <4 x i16>*
119  %1 = load <4 x i16>, <4 x i16>* %0, align 1
120  %reorder_shuffle117 = shufflevector <4 x i16> %1, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
121  %2 = zext <4 x i16> %reorder_shuffle117 to <4 x i32>
122  %3 = sub nsw <4 x i32> zeroinitializer, %2
123  %4 = bitcast i8* %arrayidx5.1 to <4 x i16>*
124  %5 = load <4 x i16>, <4 x i16>* %4, align 1
125  %reorder_shuffle115 = shufflevector <4 x i16> %5, <4 x i16> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
126  %6 = zext <4 x i16> %reorder_shuffle115 to <4 x i32>
127  %7 = sub nsw <4 x i32> zeroinitializer, %6
128  %8 = shl nsw <4 x i32> %7, <i32 16, i32 16, i32 16, i32 16>
129  %9 = add nsw <4 x i32> %8, %3
130  %10 = sub nsw <4 x i32> %9, zeroinitializer
131  %11 = shufflevector <4 x i32> undef, <4 x i32> %10, <4 x i32> <i32 2, i32 7, i32 0, i32 5>
132  %12 = add nsw <4 x i32> zeroinitializer, %11
133  %13 = shufflevector <4 x i32> %12, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
134  store <4 x i32> %13, <4 x i32>* undef, align 16
135  ret void
136}
137
138define void @test32(i8* nocapture readonly %pix2, i32 signext %i_pix2) {
139; P9LE-LABEL: test32:
140; P9LE:       # %bb.0: # %entry
141; P9LE-NEXT:    add 5, 3, 4
142; P9LE-NEXT:    lxsiwzx 2, 3, 4
143; P9LE-NEXT:    addis 3, 2, .LCPI1_0@toc@ha
144; P9LE-NEXT:    xxlxor 3, 3, 3
145; P9LE-NEXT:    addi 3, 3, .LCPI1_0@toc@l
146; P9LE-NEXT:    lxv 4, 0(3)
147; P9LE-NEXT:    li 3, 4
148; P9LE-NEXT:    lxsiwzx 5, 5, 3
149; P9LE-NEXT:    vperm 2, 2, 3, 4
150; P9LE-NEXT:    vperm 3, 5, 3, 4
151; P9LE-NEXT:    vspltisw 4, 8
152; P9LE-NEXT:    vnegw 3, 3
153; P9LE-NEXT:    vadduwm 4, 4, 4
154; P9LE-NEXT:    vslw 3, 3, 4
155; P9LE-NEXT:    vsubuwm 2, 3, 2
156; P9LE-NEXT:    xxswapd 0, 2
157; P9LE-NEXT:    stxv 0, 0(3)
158; P9LE-NEXT:    blr
159;
160; P9BE-LABEL: test32:
161; P9BE:       # %bb.0: # %entry
162; P9BE-NEXT:    add 5, 3, 4
163; P9BE-NEXT:    lxsiwzx 2, 3, 4
164; P9BE-NEXT:    addis 3, 2, .LCPI1_0@toc@ha
165; P9BE-NEXT:    xxlxor 3, 3, 3
166; P9BE-NEXT:    addi 3, 3, .LCPI1_0@toc@l
167; P9BE-NEXT:    lxv 4, 0(3)
168; P9BE-NEXT:    li 3, 4
169; P9BE-NEXT:    lxsiwzx 5, 5, 3
170; P9BE-NEXT:    vperm 2, 3, 2, 4
171; P9BE-NEXT:    vperm 3, 3, 5, 4
172; P9BE-NEXT:    vspltisw 4, 8
173; P9BE-NEXT:    vnegw 3, 3
174; P9BE-NEXT:    vadduwm 4, 4, 4
175; P9BE-NEXT:    vslw 3, 3, 4
176; P9BE-NEXT:    vsubuwm 2, 3, 2
177; P9BE-NEXT:    xxswapd 0, 2
178; P9BE-NEXT:    stxv 0, 0(3)
179; P9BE-NEXT:    blr
180;
181; P9BE-AIX-LABEL: test32:
182; P9BE-AIX:       # %bb.0: # %entry
183; P9BE-AIX-NEXT:    add 5, 3, 4
184; P9BE-AIX-NEXT:    lxsiwzx 2, 3, 4
185; P9BE-AIX-NEXT:    ld 3, L..C2(2) # %const.0
186; P9BE-AIX-NEXT:    xxlxor 3, 3, 3
187; P9BE-AIX-NEXT:    lxv 4, 0(3)
188; P9BE-AIX-NEXT:    li 3, 4
189; P9BE-AIX-NEXT:    lxsiwzx 5, 5, 3
190; P9BE-AIX-NEXT:    vperm 2, 3, 2, 4
191; P9BE-AIX-NEXT:    vperm 3, 3, 5, 4
192; P9BE-AIX-NEXT:    vspltisw 4, 8
193; P9BE-AIX-NEXT:    vnegw 3, 3
194; P9BE-AIX-NEXT:    vadduwm 4, 4, 4
195; P9BE-AIX-NEXT:    vslw 3, 3, 4
196; P9BE-AIX-NEXT:    vsubuwm 2, 3, 2
197; P9BE-AIX-NEXT:    xxswapd 0, 2
198; P9BE-AIX-NEXT:    stxv 0, 0(3)
199; P9BE-AIX-NEXT:    blr
200;
201; P9BE-AIX32-LABEL: test32:
202; P9BE-AIX32:       # %bb.0: # %entry
203; P9BE-AIX32-NEXT:    add 5, 3, 4
204; P9BE-AIX32-NEXT:    lxsiwzx 2, 3, 4
205; P9BE-AIX32-NEXT:    lwz 3, L..C2(2) # %const.0
206; P9BE-AIX32-NEXT:    xxlxor 3, 3, 3
207; P9BE-AIX32-NEXT:    lxv 4, 0(3)
208; P9BE-AIX32-NEXT:    li 3, 4
209; P9BE-AIX32-NEXT:    lxsiwzx 5, 5, 3
210; P9BE-AIX32-NEXT:    vperm 2, 3, 2, 4
211; P9BE-AIX32-NEXT:    vperm 3, 3, 5, 4
212; P9BE-AIX32-NEXT:    vspltisw 4, 8
213; P9BE-AIX32-NEXT:    vnegw 3, 3
214; P9BE-AIX32-NEXT:    vadduwm 4, 4, 4
215; P9BE-AIX32-NEXT:    vslw 3, 3, 4
216; P9BE-AIX32-NEXT:    vsubuwm 2, 3, 2
217; P9BE-AIX32-NEXT:    xxswapd 0, 2
218; P9BE-AIX32-NEXT:    stxv 0, 0(3)
219; P9BE-AIX32-NEXT:    blr
220entry:
221  %idx.ext63 = sext i32 %i_pix2 to i64
222  %add.ptr64 = getelementptr inbounds i8, i8* %pix2, i64 %idx.ext63
223  %arrayidx5.1 = getelementptr inbounds i8, i8* %add.ptr64, i64 4
224  %0 = bitcast i8* %add.ptr64 to <4 x i8>*
225  %1 = load <4 x i8>, <4 x i8>* %0, align 1
226  %reorder_shuffle117 = shufflevector <4 x i8> %1, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
227  %2 = zext <4 x i8> %reorder_shuffle117 to <4 x i32>
228  %3 = sub nsw <4 x i32> zeroinitializer, %2
229  %4 = bitcast i8* %arrayidx5.1 to <4 x i8>*
230  %5 = load <4 x i8>, <4 x i8>* %4, align 1
231  %reorder_shuffle115 = shufflevector <4 x i8> %5, <4 x i8> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
232  %6 = zext <4 x i8> %reorder_shuffle115 to <4 x i32>
233  %7 = sub nsw <4 x i32> zeroinitializer, %6
234  %8 = shl nsw <4 x i32> %7, <i32 16, i32 16, i32 16, i32 16>
235  %9 = add nsw <4 x i32> %8, %3
236  %10 = sub nsw <4 x i32> %9, zeroinitializer
237  %11 = shufflevector <4 x i32> undef, <4 x i32> %10, <4 x i32> <i32 2, i32 7, i32 0, i32 5>
238  %12 = add nsw <4 x i32> zeroinitializer, %11
239  %13 = shufflevector <4 x i32> %12, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
240  store <4 x i32> %13, <4 x i32>* undef, align 16
241  ret void
242}
243
244define void @test16(i16* nocapture readonly %sums, i32 signext %delta, i32 signext %thresh) {
245; P9LE-LABEL: test16:
246; P9LE:       # %bb.0: # %entry
247; P9LE-NEXT:    sldi 4, 4, 1
248; P9LE-NEXT:    li 7, 16
249; P9LE-NEXT:    add 6, 3, 4
250; P9LE-NEXT:    lxsihzx 4, 3, 4
251; P9LE-NEXT:    addis 3, 2, .LCPI2_0@toc@ha
252; P9LE-NEXT:    lxsihzx 2, 6, 7
253; P9LE-NEXT:    li 6, 0
254; P9LE-NEXT:    addi 3, 3, .LCPI2_0@toc@l
255; P9LE-NEXT:    mtvsrd 3, 6
256; P9LE-NEXT:    vmrghh 4, 3, 4
257; P9LE-NEXT:    vmrghh 2, 3, 2
258; P9LE-NEXT:    vsplth 3, 3, 3
259; P9LE-NEXT:    xxmrglw 3, 4, 3
260; P9LE-NEXT:    lxv 4, 0(3)
261; P9LE-NEXT:    li 3, 0
262; P9LE-NEXT:    vperm 2, 2, 3, 4
263; P9LE-NEXT:    xxspltw 3, 2, 2
264; P9LE-NEXT:    vadduwm 2, 2, 3
265; P9LE-NEXT:    vextuwrx 3, 3, 2
266; P9LE-NEXT:    cmpw 3, 5
267; P9LE-NEXT:    bgelr+ 0
268; P9LE-NEXT:  # %bb.1: # %if.then
269;
270; P9BE-LABEL: test16:
271; P9BE:       # %bb.0: # %entry
272; P9BE-NEXT:    sldi 4, 4, 1
273; P9BE-NEXT:    li 7, 16
274; P9BE-NEXT:    add 6, 3, 4
275; P9BE-NEXT:    lxsihzx 5, 3, 4
276; P9BE-NEXT:    addis 3, 2, .LCPI2_1@toc@ha
277; P9BE-NEXT:    lxsihzx 2, 6, 7
278; P9BE-NEXT:    addis 6, 2, .LCPI2_0@toc@ha
279; P9BE-NEXT:    addi 3, 3, .LCPI2_1@toc@l
280; P9BE-NEXT:    addi 6, 6, .LCPI2_0@toc@l
281; P9BE-NEXT:    lxv 3, 0(6)
282; P9BE-NEXT:    li 6, 0
283; P9BE-NEXT:    mtvsrwz 4, 6
284; P9BE-NEXT:    vperm 2, 4, 2, 3
285; P9BE-NEXT:    vperm 3, 4, 5, 3
286; P9BE-NEXT:    vsplth 4, 4, 3
287; P9BE-NEXT:    xxmrghw 3, 4, 3
288; P9BE-NEXT:    lxv 4, 0(3)
289; P9BE-NEXT:    li 3, 0
290; P9BE-NEXT:    vperm 2, 3, 2, 4
291; P9BE-NEXT:    xxspltw 3, 2, 1
292; P9BE-NEXT:    vadduwm 2, 2, 3
293; P9BE-NEXT:    vextuwlx 3, 3, 2
294; P9BE-NEXT:    cmpw 3, 5
295; P9BE-NEXT:    bgelr+ 0
296; P9BE-NEXT:  # %bb.1: # %if.then
297;
298; P9BE-AIX-LABEL: test16:
299; P9BE-AIX:       # %bb.0: # %entry
300; P9BE-AIX-NEXT:    sldi 4, 4, 1
301; P9BE-AIX-NEXT:    li 7, 16
302; P9BE-AIX-NEXT:    add 6, 3, 4
303; P9BE-AIX-NEXT:    lxsihzx 5, 3, 4
304; P9BE-AIX-NEXT:    ld 3, L..C3(2) # %const.1
305; P9BE-AIX-NEXT:    lxsihzx 2, 6, 7
306; P9BE-AIX-NEXT:    ld 6, L..C4(2) # %const.0
307; P9BE-AIX-NEXT:    lxv 3, 0(6)
308; P9BE-AIX-NEXT:    li 6, 0
309; P9BE-AIX-NEXT:    mtvsrwz 4, 6
310; P9BE-AIX-NEXT:    vperm 2, 4, 2, 3
311; P9BE-AIX-NEXT:    vperm 3, 4, 5, 3
312; P9BE-AIX-NEXT:    vsplth 4, 4, 3
313; P9BE-AIX-NEXT:    xxmrghw 3, 4, 3
314; P9BE-AIX-NEXT:    lxv 4, 0(3)
315; P9BE-AIX-NEXT:    li 3, 0
316; P9BE-AIX-NEXT:    vperm 2, 3, 2, 4
317; P9BE-AIX-NEXT:    xxspltw 3, 2, 1
318; P9BE-AIX-NEXT:    vadduwm 2, 2, 3
319; P9BE-AIX-NEXT:    vextuwlx 3, 3, 2
320; P9BE-AIX-NEXT:    cmpw 3, 5
321; P9BE-AIX-NEXT:    bgelr+ 0
322; P9BE-AIX-NEXT:  # %bb.1: # %if.then
323;
324; P9BE-AIX32-LABEL: test16:
325; P9BE-AIX32:       # %bb.0: # %entry
326; P9BE-AIX32-NEXT:    slwi 4, 4, 1
327; P9BE-AIX32-NEXT:    li 6, 0
328; P9BE-AIX32-NEXT:    lhzux 4, 3, 4
329; P9BE-AIX32-NEXT:    lhz 3, 16(3)
330; P9BE-AIX32-NEXT:    sth 6, -64(1)
331; P9BE-AIX32-NEXT:    lxv 2, -64(1)
332; P9BE-AIX32-NEXT:    sth 4, -48(1)
333; P9BE-AIX32-NEXT:    lxv 4, -48(1)
334; P9BE-AIX32-NEXT:    sth 3, -32(1)
335; P9BE-AIX32-NEXT:    lwz 3, L..C3(2) # %const.0
336; P9BE-AIX32-NEXT:    lxv 3, -32(1)
337; P9BE-AIX32-NEXT:    vmrghh 4, 2, 4
338; P9BE-AIX32-NEXT:    vmrghh 3, 2, 3
339; P9BE-AIX32-NEXT:    vsplth 2, 2, 0
340; P9BE-AIX32-NEXT:    xxmrghw 2, 2, 4
341; P9BE-AIX32-NEXT:    lxv 4, 0(3)
342; P9BE-AIX32-NEXT:    vperm 2, 2, 3, 4
343; P9BE-AIX32-NEXT:    xxspltw 3, 2, 1
344; P9BE-AIX32-NEXT:    vadduwm 2, 2, 3
345; P9BE-AIX32-NEXT:    stxv 2, -16(1)
346; P9BE-AIX32-NEXT:    lwz 3, -16(1)
347; P9BE-AIX32-NEXT:    cmpw 3, 5
348; P9BE-AIX32-NEXT:    bgelr+ 0
349; P9BE-AIX32-NEXT:  # %bb.1: # %if.then
350entry:
351  %idxprom = sext i32 %delta to i64
352  %add14 = add nsw i32 %delta, 8
353  %idxprom15 = sext i32 %add14 to i64
354  br label %for.body
355
356for.body:                                         ; preds = %entry
357  %arrayidx8 = getelementptr inbounds i16, i16* %sums, i64 %idxprom
358  %0 = load i16, i16* %arrayidx8, align 2
359  %arrayidx16 = getelementptr inbounds i16, i16* %sums, i64 %idxprom15
360  %1 = load i16, i16* %arrayidx16, align 2
361  %2 = insertelement <4 x i16> undef, i16 %0, i32 2
362  %3 = insertelement <4 x i16> %2, i16 %1, i32 3
363  %4 = zext <4 x i16> %3 to <4 x i32>
364  %5 = sub nsw <4 x i32> zeroinitializer, %4
365  %6 = sub nsw <4 x i32> zeroinitializer, %5
366  %7 = select <4 x i1> undef, <4 x i32> %6, <4 x i32> %5
367  %bin.rdx = add <4 x i32> %7, zeroinitializer
368  %rdx.shuf54 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
369  %bin.rdx55 = add <4 x i32> %bin.rdx, %rdx.shuf54
370  %8 = extractelement <4 x i32> %bin.rdx55, i32 0
371  %op.extra = add nuw i32 %8, 0
372  %cmp25 = icmp slt i32 %op.extra, %thresh
373  br i1 %cmp25, label %if.then, label %if.end
374
375if.then:                                          ; preds = %for.body
376  unreachable
377
378if.end:                                           ; preds = %for.body
379  ret void
380}
381
382define void @test8(i8* nocapture readonly %sums, i32 signext %delta, i32 signext %thresh) {
383; P9LE-LABEL: test8:
384; P9LE:       # %bb.0: # %entry
385; P9LE-NEXT:    add 6, 3, 4
386; P9LE-NEXT:    lxsibzx 2, 3, 4
387; P9LE-NEXT:    li 3, 0
388; P9LE-NEXT:    mtvsrd 3, 3
389; P9LE-NEXT:    li 3, 8
390; P9LE-NEXT:    lxsibzx 5, 6, 3
391; P9LE-NEXT:    vspltb 4, 3, 7
392; P9LE-NEXT:    addis 3, 2, .LCPI3_0@toc@ha
393; P9LE-NEXT:    vmrghb 2, 3, 2
394; P9LE-NEXT:    addi 3, 3, .LCPI3_0@toc@l
395; P9LE-NEXT:    vmrglh 2, 2, 4
396; P9LE-NEXT:    vmrghb 3, 3, 5
397; P9LE-NEXT:    xxmrglw 2, 2, 4
398; P9LE-NEXT:    vmrglh 3, 3, 4
399; P9LE-NEXT:    xxmrglw 3, 4, 3
400; P9LE-NEXT:    lxv 4, 0(3)
401; P9LE-NEXT:    li 3, 0
402; P9LE-NEXT:    vperm 2, 3, 2, 4
403; P9LE-NEXT:    xxspltw 3, 2, 2
404; P9LE-NEXT:    vadduwm 2, 2, 3
405; P9LE-NEXT:    vextuwrx 3, 3, 2
406; P9LE-NEXT:    cmpw 3, 5
407; P9LE-NEXT:    bgelr+ 0
408; P9LE-NEXT:  # %bb.1: # %if.then
409;
410; P9BE-LABEL: test8:
411; P9BE:       # %bb.0: # %entry
412; P9BE-NEXT:    add 6, 3, 4
413; P9BE-NEXT:    li 7, 8
414; P9BE-NEXT:    lxsibzx 5, 3, 4
415; P9BE-NEXT:    addis 3, 2, .LCPI3_1@toc@ha
416; P9BE-NEXT:    lxsibzx 2, 6, 7
417; P9BE-NEXT:    addis 6, 2, .LCPI3_0@toc@ha
418; P9BE-NEXT:    addi 3, 3, .LCPI3_1@toc@l
419; P9BE-NEXT:    addi 6, 6, .LCPI3_0@toc@l
420; P9BE-NEXT:    lxv 3, 0(6)
421; P9BE-NEXT:    li 6, 0
422; P9BE-NEXT:    mtvsrwz 4, 6
423; P9BE-NEXT:    vperm 2, 4, 2, 3
424; P9BE-NEXT:    vperm 3, 4, 5, 3
425; P9BE-NEXT:    vspltb 4, 4, 7
426; P9BE-NEXT:    vmrghh 3, 3, 4
427; P9BE-NEXT:    xxspltw 4, 4, 0
428; P9BE-NEXT:    xxmrghw 2, 3, 2
429; P9BE-NEXT:    lxv 3, 0(3)
430; P9BE-NEXT:    li 3, 0
431; P9BE-NEXT:    vperm 2, 4, 2, 3
432; P9BE-NEXT:    xxspltw 3, 2, 1
433; P9BE-NEXT:    vadduwm 2, 2, 3
434; P9BE-NEXT:    vextuwlx 3, 3, 2
435; P9BE-NEXT:    cmpw 3, 5
436; P9BE-NEXT:    bgelr+ 0
437; P9BE-NEXT:  # %bb.1: # %if.then
438;
439; P9BE-AIX-LABEL: test8:
440; P9BE-AIX:       # %bb.0: # %entry
441; P9BE-AIX-NEXT:    add 6, 3, 4
442; P9BE-AIX-NEXT:    li 7, 8
443; P9BE-AIX-NEXT:    lxsibzx 5, 3, 4
444; P9BE-AIX-NEXT:    ld 3, L..C5(2) # %const.1
445; P9BE-AIX-NEXT:    lxsibzx 2, 6, 7
446; P9BE-AIX-NEXT:    ld 6, L..C6(2) # %const.0
447; P9BE-AIX-NEXT:    lxv 3, 0(6)
448; P9BE-AIX-NEXT:    li 6, 0
449; P9BE-AIX-NEXT:    mtvsrwz 4, 6
450; P9BE-AIX-NEXT:    vperm 2, 4, 2, 3
451; P9BE-AIX-NEXT:    vperm 3, 4, 5, 3
452; P9BE-AIX-NEXT:    vspltb 4, 4, 7
453; P9BE-AIX-NEXT:    vmrghh 3, 3, 4
454; P9BE-AIX-NEXT:    xxspltw 4, 4, 0
455; P9BE-AIX-NEXT:    xxmrghw 2, 3, 2
456; P9BE-AIX-NEXT:    lxv 3, 0(3)
457; P9BE-AIX-NEXT:    li 3, 0
458; P9BE-AIX-NEXT:    vperm 2, 4, 2, 3
459; P9BE-AIX-NEXT:    xxspltw 3, 2, 1
460; P9BE-AIX-NEXT:    vadduwm 2, 2, 3
461; P9BE-AIX-NEXT:    vextuwlx 3, 3, 2
462; P9BE-AIX-NEXT:    cmpw 3, 5
463; P9BE-AIX-NEXT:    bgelr+ 0
464; P9BE-AIX-NEXT:  # %bb.1: # %if.then
465;
466; P9BE-AIX32-LABEL: test8:
467; P9BE-AIX32:       # %bb.0: # %entry
468; P9BE-AIX32-NEXT:    add 6, 3, 4
469; P9BE-AIX32-NEXT:    li 7, 8
470; P9BE-AIX32-NEXT:    lxsibzx 5, 3, 4
471; P9BE-AIX32-NEXT:    lwz 3, L..C4(2) # %const.1
472; P9BE-AIX32-NEXT:    lxsibzx 2, 6, 7
473; P9BE-AIX32-NEXT:    lwz 6, L..C5(2) # %const.0
474; P9BE-AIX32-NEXT:    lxv 3, 0(6)
475; P9BE-AIX32-NEXT:    li 6, 0
476; P9BE-AIX32-NEXT:    mtvsrwz 4, 6
477; P9BE-AIX32-NEXT:    vperm 2, 4, 2, 3
478; P9BE-AIX32-NEXT:    vperm 3, 4, 5, 3
479; P9BE-AIX32-NEXT:    vspltb 4, 4, 7
480; P9BE-AIX32-NEXT:    vmrghh 3, 3, 4
481; P9BE-AIX32-NEXT:    xxspltw 4, 4, 0
482; P9BE-AIX32-NEXT:    xxmrghw 2, 3, 2
483; P9BE-AIX32-NEXT:    lxv 3, 0(3)
484; P9BE-AIX32-NEXT:    vperm 2, 4, 2, 3
485; P9BE-AIX32-NEXT:    xxspltw 3, 2, 1
486; P9BE-AIX32-NEXT:    vadduwm 2, 2, 3
487; P9BE-AIX32-NEXT:    stxv 2, -16(1)
488; P9BE-AIX32-NEXT:    lwz 3, -16(1)
489; P9BE-AIX32-NEXT:    cmpw 3, 5
490; P9BE-AIX32-NEXT:    bgelr+ 0
491; P9BE-AIX32-NEXT:  # %bb.1: # %if.then
492entry:
493  %idxprom = sext i32 %delta to i64
494  %add14 = add nsw i32 %delta, 8
495  %idxprom15 = sext i32 %add14 to i64
496  br label %for.body
497
498for.body:                                         ; preds = %entry
499  %arrayidx8 = getelementptr inbounds i8, i8* %sums, i64 %idxprom
500  %0 = load i8, i8* %arrayidx8, align 2
501  %arrayidx16 = getelementptr inbounds i8, i8* %sums, i64 %idxprom15
502  %1 = load i8, i8* %arrayidx16, align 2
503  %2 = insertelement <4 x i8> undef, i8 %0, i32 2
504  %3 = insertelement <4 x i8> %2, i8 %1, i32 3
505  %4 = zext <4 x i8> %3 to <4 x i32>
506  %5 = sub nsw <4 x i32> zeroinitializer, %4
507  %6 = sub nsw <4 x i32> zeroinitializer, %5
508  %7 = select <4 x i1> undef, <4 x i32> %6, <4 x i32> %5
509  %bin.rdx = add <4 x i32> %7, zeroinitializer
510  %rdx.shuf54 = shufflevector <4 x i32> %bin.rdx, <4 x i32> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
511  %bin.rdx55 = add <4 x i32> %bin.rdx, %rdx.shuf54
512  %8 = extractelement <4 x i32> %bin.rdx55, i32 0
513  %op.extra = add nuw i32 %8, 0
514  %cmp25 = icmp slt i32 %op.extra, %thresh
515  br i1 %cmp25, label %if.then, label %if.end
516
517if.then:                                          ; preds = %for.body
518  unreachable
519
520if.end:                                           ; preds = %for.body
521  ret void
522}
523