1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -loop-vectorize -dce -instcombine -force-vector-interleave=1 -force-vector-width=8 -S < %s | FileCheck %s
3
4define i8 @reduction_and_trunc(i8* noalias nocapture %ptr) {
5; CHECK-LABEL: @reduction_and_trunc(
6; CHECK-NEXT:  entry:
7; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
8; CHECK:       vector.ph:
9; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
10; CHECK:       vector.body:
11; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
12; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i8> [ <i8 0, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1, i8 -1>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
13; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
14; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i8, i8* [[PTR:%.*]], i64 [[TMP0]]
15; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i8* [[TMP1]] to <8 x i8>*
16; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i8>, <8 x i8>* [[TMP2]], align 1
17; CHECK-NEXT:    [[TMP3]] = and <8 x i8> [[VEC_PHI]], [[WIDE_LOAD]]
18; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
19; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
20; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
21; CHECK:       middle.block:
22; CHECK-NEXT:    [[TMP5:%.*]] = call i8 @llvm.vector.reduce.and.v8i8(<8 x i8> [[TMP3]])
23; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
24; CHECK:       scalar.ph:
25; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
26; CHECK:       for.body:
27; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
28; CHECK:       for.end:
29; CHECK-NEXT:    [[AND_LCSSA_OFF0:%.*]] = phi i8 [ poison, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
30; CHECK-NEXT:    ret i8 [[AND_LCSSA_OFF0]]
31;
32entry:
33  br label %for.body
34
35for.body:
36  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
37  %sum.02p = phi i32 [ %and, %for.body ], [ 0, %entry ]
38  %sum.02 = and i32 %sum.02p, 255
39  %gep = getelementptr inbounds i8, i8* %ptr, i32 %iv
40  %load = load i8, i8* %gep
41  %ext = zext i8 %load to i32
42  %and = and i32 %sum.02, %ext
43  %iv.next = add i32 %iv, 1
44  %exitcond = icmp eq i32 %iv.next, 256
45  br i1 %exitcond, label %for.end, label %for.body
46
47for.end:
48  %ret = trunc i32 %and to i8
49  ret i8 %ret
50}
51
52define i16 @reduction_or_trunc(i16* noalias nocapture %ptr) {
53; CHECK-LABEL: @reduction_or_trunc(
54; CHECK-NEXT:  entry:
55; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
56; CHECK:       vector.ph:
57; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
58; CHECK:       vector.body:
59; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
60; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
61; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
62; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 [[TMP0]]
63; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16* [[TMP1]] to <8 x i16>*
64; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP2]], align 2
65; CHECK-NEXT:    [[TMP3]] = or <8 x i16> [[VEC_PHI]], [[WIDE_LOAD]]
66; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
67; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
68; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
69; CHECK:       middle.block:
70; CHECK-NEXT:    [[TMP5:%.*]] = call i16 @llvm.vector.reduce.or.v8i16(<8 x i16> [[TMP3]])
71; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
72; CHECK:       scalar.ph:
73; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
74; CHECK:       for.body:
75; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
76; CHECK:       for.end:
77; CHECK-NEXT:    [[XOR_LCSSA_OFF0:%.*]] = phi i16 [ poison, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
78; CHECK-NEXT:    ret i16 [[XOR_LCSSA_OFF0]]
79;
80entry:
81  br label %for.body
82
83for.body:
84  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
85  %sum.02p = phi i32 [ %xor, %for.body ], [ 0, %entry ]
86  %sum.02 = and i32 %sum.02p, 65535
87  %gep = getelementptr inbounds i16, i16* %ptr, i32 %iv
88  %load = load i16, i16* %gep
89  %ext = zext i16 %load to i32
90  %xor = or i32 %sum.02, %ext
91  %iv.next = add i32 %iv, 1
92  %exitcond = icmp eq i32 %iv.next, 256
93  br i1 %exitcond, label %for.end, label %for.body
94
95for.end:
96  %ret = trunc i32 %xor to i16
97  ret i16 %ret
98}
99
100define i16 @reduction_xor_trunc(i16* noalias nocapture %ptr) {
101; CHECK-LABEL: @reduction_xor_trunc(
102; CHECK-NEXT:  entry:
103; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
104; CHECK:       vector.ph:
105; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
106; CHECK:       vector.body:
107; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
108; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <8 x i16> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
109; CHECK-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
110; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[PTR:%.*]], i64 [[TMP0]]
111; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16* [[TMP1]] to <8 x i16>*
112; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <8 x i16>, <8 x i16>* [[TMP2]], align 2
113; CHECK-NEXT:    [[TMP3]] = xor <8 x i16> [[VEC_PHI]], [[WIDE_LOAD]]
114; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
115; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
116; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
117; CHECK:       middle.block:
118; CHECK-NEXT:    [[TMP5:%.*]] = call i16 @llvm.vector.reduce.xor.v8i16(<8 x i16> [[TMP3]])
119; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
120; CHECK:       scalar.ph:
121; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
122; CHECK:       for.body:
123; CHECK-NEXT:    br i1 poison, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
124; CHECK:       for.end:
125; CHECK-NEXT:    [[XOR_LCSSA_OFF0:%.*]] = phi i16 [ poison, [[FOR_BODY]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
126; CHECK-NEXT:    ret i16 [[XOR_LCSSA_OFF0]]
127;
128entry:
129  br label %for.body
130
131for.body:
132  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
133  %sum.02p = phi i32 [ %xor, %for.body ], [ 0, %entry ]
134  %sum.02 = and i32 %sum.02p, 65535
135  %gep = getelementptr inbounds i16, i16* %ptr, i32 %iv
136  %load = load i16, i16* %gep
137  %ext = zext i16 %load to i32
138  %xor = xor i32 %sum.02, %ext
139  %iv.next = add i32 %iv, 1
140  %exitcond = icmp eq i32 %iv.next, 256
141  br i1 %exitcond, label %for.end, label %for.body
142
143for.end:
144  %ret = trunc i32 %xor to i16
145  ret i16 %ret
146}
147
148define i8 @reduction_smin_trunc(i8* noalias nocapture %ptr) {
149; CHECK-LABEL: @reduction_smin_trunc(
150; CHECK-NOT: vector.body
151; CHECK-NOT: <8 x
152; CHECK: ret
153entry:
154  br label %for.body
155
156for.body:
157  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
158  %sum.02p = phi i32 [ %min, %for.body ], [ 256, %entry ]
159  %sum.02 = and i32 %sum.02p, 255
160  %gep = getelementptr inbounds i8, i8* %ptr, i32 %iv
161  %load = load i8, i8* %gep
162  %ext = sext i8 %load to i32
163  %icmp = icmp slt i32 %sum.02, %ext
164  %min = select i1 %icmp, i32 %sum.02, i32 %ext
165  %iv.next = add i32 %iv, 1
166  %exitcond = icmp eq i32 %iv.next, 256
167  br i1 %exitcond, label %for.end, label %for.body
168
169for.end:
170  %ret = trunc i32 %min to i8
171  ret i8 %ret
172}
173
174define i8 @reduction_umin_trunc(i8* noalias nocapture %ptr) {
175; CHECK-LABEL: @reduction_umin_trunc(
176; CHECK-NOT: vector.body
177; CHECK-NOT: <8 x
178; CHECK: ret
179entry:
180  br label %for.body
181
182for.body:
183  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
184  %sum.02p = phi i32 [ %min, %for.body ], [ 0, %entry ]
185  %sum.02 = and i32 %sum.02p, 255
186  %gep = getelementptr inbounds i8, i8* %ptr, i32 %iv
187  %load = load i8, i8* %gep
188  %ext = zext i8 %load to i32
189  %icmp = icmp ult i32 %sum.02, %ext
190  %min = select i1 %icmp, i32 %sum.02, i32 %ext
191  %iv.next = add i32 %iv, 1
192  %exitcond = icmp eq i32 %iv.next, 256
193  br i1 %exitcond, label %for.end, label %for.body
194
195for.end:
196  %ret = trunc i32 %min to i8
197  ret i8 %ret
198}
199
200define i16 @reduction_smax_trunc(i16* noalias nocapture %ptr) {
201; CHECK-LABEL: @reduction_smax_trunc(
202; CHECK-NOT: vector.body
203; CHECK-NOT: <8 x
204; CHECK: ret
205entry:
206  br label %for.body
207
208for.body:
209  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
210  %sum.02p = phi i32 [ %min, %for.body ], [ 0, %entry ]
211  %sum.02 = and i32 %sum.02p, 65535
212  %gep = getelementptr inbounds i16, i16* %ptr, i32 %iv
213  %load = load i16, i16* %gep
214  %ext = sext i16 %load to i32
215  %icmp = icmp sgt i32 %sum.02, %ext
216  %min = select i1 %icmp, i32 %sum.02, i32 %ext
217  %iv.next = add i32 %iv, 1
218  %exitcond = icmp eq i32 %iv.next, 256
219  br i1 %exitcond, label %for.end, label %for.body
220
221for.end:
222  %ret = trunc i32 %min to i16
223  ret i16 %ret
224}
225
226define i16 @reduction_umax_trunc(i16* noalias nocapture %ptr) {
227; CHECK-LABEL: @reduction_umax_trunc(
228; CHECK-NOT: vector.body
229; CHECK-NOT: <8 x
230; CHECK: ret
231entry:
232  br label %for.body
233
234for.body:
235  %iv = phi i32 [ %iv.next, %for.body ], [ 0, %entry ]
236  %sum.02p = phi i32 [ %min, %for.body ], [ 0, %entry ]
237  %sum.02 = and i32 %sum.02p, 65535
238  %gep = getelementptr inbounds i16, i16* %ptr, i32 %iv
239  %load = load i16, i16* %gep
240  %ext = zext i16 %load to i32
241  %icmp = icmp ugt i32 %sum.02, %ext
242  %min = select i1 %icmp, i32 %sum.02, i32 %ext
243  %iv.next = add i32 %iv, 1
244  %exitcond = icmp eq i32 %iv.next, 256
245  br i1 %exitcond, label %for.end, label %for.body
246
247for.end:
248  %ret = trunc i32 %min to i16
249  ret i16 %ret
250}
251