1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -loop-vectorize -force-vector-width=2 < %s | FileCheck %s
3; RUN: opt -S -loop-vectorize -force-vector-width=2  -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck --check-prefix TAILFOLD %s
4
5target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
6
7define void @bottom_tested(i16* %p, i32 %n) {
8; CHECK-LABEL: @bottom_tested(
9; CHECK-NEXT:  entry:
10; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0
11; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0
12; CHECK-NEXT:    [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1
13; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2
14; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
15; CHECK:       vector.ph:
16; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
17; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
18; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
19; CHECK:       vector.body:
20; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
21; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 0
22; CHECK-NEXT:    [[TMP3:%.*]] = sext i32 [[TMP2]] to i64
23; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP3]]
24; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 0
25; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i16* [[TMP5]] to <2 x i16>*
26; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP6]], align 4
27; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
28; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
29; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
30; CHECK:       middle.block:
31; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
32; CHECK-NEXT:    br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
33; CHECK:       scalar.ph:
34; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
35; CHECK-NEXT:    br label [[FOR_COND:%.*]]
36; CHECK:       for.cond:
37; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
38; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
39; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
40; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
41; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
42; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
43; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], [[LOOP2:!llvm.loop !.*]]
44; CHECK:       if.end:
45; CHECK-NEXT:    ret void
46;
47; TAILFOLD-LABEL: @bottom_tested(
48; TAILFOLD-NEXT:  entry:
49; TAILFOLD-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0
50; TAILFOLD-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0
51; TAILFOLD-NEXT:    [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1
52; TAILFOLD-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
53; TAILFOLD:       vector.ph:
54; TAILFOLD-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP1]], 1
55; TAILFOLD-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2
56; TAILFOLD-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
57; TAILFOLD-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
58; TAILFOLD-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
59; TAILFOLD-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
60; TAILFOLD-NEXT:    br label [[VECTOR_BODY:%.*]]
61; TAILFOLD:       vector.body:
62; TAILFOLD-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
63; TAILFOLD-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
64; TAILFOLD-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 0
65; TAILFOLD-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 1
66; TAILFOLD-NEXT:    [[TMP4:%.*]] = icmp ule <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
67; TAILFOLD-NEXT:    [[TMP5:%.*]] = sext <2 x i32> [[VEC_IND]] to <2 x i64>
68; TAILFOLD-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
69; TAILFOLD-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
70; TAILFOLD:       pred.store.if:
71; TAILFOLD-NEXT:    [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
72; TAILFOLD-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]]
73; TAILFOLD-NEXT:    store i16 0, i16* [[TMP8]], align 4
74; TAILFOLD-NEXT:    br label [[PRED_STORE_CONTINUE]]
75; TAILFOLD:       pred.store.continue:
76; TAILFOLD-NEXT:    [[TMP9:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
77; TAILFOLD-NEXT:    br i1 [[TMP9]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
78; TAILFOLD:       pred.store.if1:
79; TAILFOLD-NEXT:    [[TMP10:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
80; TAILFOLD-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[TMP10]]
81; TAILFOLD-NEXT:    store i16 0, i16* [[TMP11]], align 4
82; TAILFOLD-NEXT:    br label [[PRED_STORE_CONTINUE2]]
83; TAILFOLD:       pred.store.continue2:
84; TAILFOLD-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
85; TAILFOLD-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
86; TAILFOLD-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
87; TAILFOLD-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
88; TAILFOLD:       middle.block:
89; TAILFOLD-NEXT:    br i1 true, label [[IF_END:%.*]], label [[SCALAR_PH]]
90; TAILFOLD:       scalar.ph:
91; TAILFOLD-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
92; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
93; TAILFOLD:       for.cond:
94; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
95; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
96; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
97; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
98; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
99; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
100; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], [[LOOP2:!llvm.loop !.*]]
101; TAILFOLD:       if.end:
102; TAILFOLD-NEXT:    ret void
103;
104entry:
105  br label %for.cond
106
107for.cond:
108  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
109  %iprom = sext i32 %i to i64
110  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
111  store i16 0, i16* %b, align 4
112  %inc = add nsw i32 %i, 1
113  %cmp = icmp slt i32 %i, %n
114  br i1 %cmp, label %for.cond, label %if.end
115
116if.end:
117  ret void
118}
119
120define void @early_exit(i16* %p, i32 %n) {
121; CHECK-LABEL: @early_exit(
122; CHECK-NEXT:  entry:
123; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0
124; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0
125; CHECK-NEXT:    [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1
126; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP1]], 2
127; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
128; CHECK:       vector.ph:
129; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
130; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
131; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 2, i32 [[N_MOD_VF]]
132; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP1]], [[TMP3]]
133; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
134; CHECK:       vector.body:
135; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
136; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
137; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[INDEX]], 0
138; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[INDEX]], 1
139; CHECK-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP4]] to i64
140; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP6]]
141; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[TMP7]], i32 0
142; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP8]] to <2 x i16>*
143; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP9]], align 4
144; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
145; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
146; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
147; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]]
148; CHECK:       middle.block:
149; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
150; CHECK-NEXT:    br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
151; CHECK:       scalar.ph:
152; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
153; CHECK-NEXT:    br label [[FOR_COND:%.*]]
154; CHECK:       for.cond:
155; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
156; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
157; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]]
158; CHECK:       for.body:
159; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
160; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
161; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
162; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
163; CHECK-NEXT:    br label [[FOR_COND]], [[LOOP5:!llvm.loop !.*]]
164; CHECK:       if.end:
165; CHECK-NEXT:    ret void
166;
167; TAILFOLD-LABEL: @early_exit(
168; TAILFOLD-NEXT:  entry:
169; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
170; TAILFOLD:       for.cond:
171; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
172; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
173; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
174; TAILFOLD:       for.body:
175; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
176; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
177; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
178; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
179; TAILFOLD-NEXT:    br label [[FOR_COND]]
180; TAILFOLD:       if.end:
181; TAILFOLD-NEXT:    ret void
182;
183entry:
184  br label %for.cond
185
186for.cond:
187  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
188  %cmp = icmp slt i32 %i, %n
189  br i1 %cmp, label %for.body, label %if.end
190
191for.body:
192  %iprom = sext i32 %i to i64
193  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
194  store i16 0, i16* %b, align 4
195  %inc = add nsw i32 %i, 1
196  br label %for.cond
197
198if.end:
199  ret void
200}
201
202; Same as early_exit, but with optsize to prevent the use of
203; a scalar epilogue.  -- Can't vectorize this in either case.
204define void @optsize(i16* %p, i32 %n) optsize {
205; CHECK-LABEL: @optsize(
206; CHECK-NEXT:  entry:
207; CHECK-NEXT:    br label [[FOR_COND:%.*]]
208; CHECK:       for.cond:
209; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
210; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
211; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
212; CHECK:       for.body:
213; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
214; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
215; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
216; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
217; CHECK-NEXT:    br label [[FOR_COND]]
218; CHECK:       if.end:
219; CHECK-NEXT:    ret void
220;
221; TAILFOLD-LABEL: @optsize(
222; TAILFOLD-NEXT:  entry:
223; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
224; TAILFOLD:       for.cond:
225; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
226; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
227; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
228; TAILFOLD:       for.body:
229; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
230; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
231; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
232; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
233; TAILFOLD-NEXT:    br label [[FOR_COND]]
234; TAILFOLD:       if.end:
235; TAILFOLD-NEXT:    ret void
236;
237entry:
238  br label %for.cond
239
240for.cond:
241  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
242  %cmp = icmp slt i32 %i, %n
243  br i1 %cmp, label %for.body, label %if.end
244
245for.body:
246  %iprom = sext i32 %i to i64
247  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
248  store i16 0, i16* %b, align 4
249  %inc = add nsw i32 %i, 1
250  br label %for.cond
251
252if.end:
253  ret void
254}
255
256
257; multiple exit - no values inside the loop used outside
258define void @multiple_unique_exit(i16* %p, i32 %n) {
259; CHECK-LABEL: @multiple_unique_exit(
260; CHECK-NEXT:  entry:
261; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0
262; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0
263; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096
264; CHECK-NEXT:    [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096
265; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1
266; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 2
267; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
268; CHECK:       vector.ph:
269; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2
270; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
271; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 2, i32 [[N_MOD_VF]]
272; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]]
273; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
274; CHECK:       vector.body:
275; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
276; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
277; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[INDEX]], 0
278; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[INDEX]], 1
279; CHECK-NEXT:    [[TMP7:%.*]] = sext i32 [[TMP5]] to i64
280; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]]
281; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0
282; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <2 x i16>*
283; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP10]], align 4
284; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
285; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
286; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
287; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]]
288; CHECK:       middle.block:
289; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
290; CHECK-NEXT:    br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
291; CHECK:       scalar.ph:
292; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
293; CHECK-NEXT:    br label [[FOR_COND:%.*]]
294; CHECK:       for.cond:
295; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
296; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
297; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]]
298; CHECK:       for.body:
299; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
300; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
301; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
302; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
303; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
304; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP7:!llvm.loop !.*]]
305; CHECK:       if.end:
306; CHECK-NEXT:    ret void
307;
308; TAILFOLD-LABEL: @multiple_unique_exit(
309; TAILFOLD-NEXT:  entry:
310; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
311; TAILFOLD:       for.cond:
312; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
313; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
314; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
315; TAILFOLD:       for.body:
316; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
317; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
318; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
319; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
320; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
321; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
322; TAILFOLD:       if.end:
323; TAILFOLD-NEXT:    ret void
324;
325entry:
326  br label %for.cond
327
328for.cond:
329  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
330  %cmp = icmp slt i32 %i, %n
331  br i1 %cmp, label %for.body, label %if.end
332
333for.body:
334  %iprom = sext i32 %i to i64
335  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
336  store i16 0, i16* %b, align 4
337  %inc = add nsw i32 %i, 1
338  %cmp2 = icmp slt i32 %i, 2096
339  br i1 %cmp2, label %for.cond, label %if.end
340
341if.end:
342  ret void
343}
344
345; multiple exit - with an lcssa phi
346define i32 @multiple_unique_exit2(i16* %p, i32 %n) {
347; CHECK-LABEL: @multiple_unique_exit2(
348; CHECK-NEXT:  entry:
349; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0
350; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0
351; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096
352; CHECK-NEXT:    [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096
353; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1
354; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 2
355; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
356; CHECK:       vector.ph:
357; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2
358; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
359; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 2, i32 [[N_MOD_VF]]
360; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]]
361; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
362; CHECK:       vector.body:
363; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
364; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
365; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[INDEX]], 0
366; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[INDEX]], 1
367; CHECK-NEXT:    [[TMP7:%.*]] = sext i32 [[TMP5]] to i64
368; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]]
369; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0
370; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <2 x i16>*
371; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP10]], align 4
372; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
373; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
374; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
375; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]]
376; CHECK:       middle.block:
377; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
378; CHECK-NEXT:    [[IND_ESCAPE:%.*]] = sub i32 [[N_VEC]], 1
379; CHECK-NEXT:    [[IND_ESCAPE1:%.*]] = sub i32 [[N_VEC]], 1
380; CHECK-NEXT:    br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
381; CHECK:       scalar.ph:
382; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
383; CHECK-NEXT:    br label [[FOR_COND:%.*]]
384; CHECK:       for.cond:
385; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
386; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
387; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]]
388; CHECK:       for.body:
389; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
390; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
391; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
392; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
393; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
394; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP9:!llvm.loop !.*]]
395; CHECK:       if.end:
396; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ], [ [[IND_ESCAPE1]], [[MIDDLE_BLOCK]] ]
397; CHECK-NEXT:    ret i32 [[I_LCSSA]]
398;
399; TAILFOLD-LABEL: @multiple_unique_exit2(
400; TAILFOLD-NEXT:  entry:
401; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
402; TAILFOLD:       for.cond:
403; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
404; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
405; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
406; TAILFOLD:       for.body:
407; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
408; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
409; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
410; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
411; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
412; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
413; TAILFOLD:       if.end:
414; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ]
415; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
416;
417entry:
418  br label %for.cond
419
420for.cond:
421  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
422  %cmp = icmp slt i32 %i, %n
423  br i1 %cmp, label %for.body, label %if.end
424
425for.body:
426  %iprom = sext i32 %i to i64
427  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
428  store i16 0, i16* %b, align 4
429  %inc = add nsw i32 %i, 1
430  %cmp2 = icmp slt i32 %i, 2096
431  br i1 %cmp2, label %for.cond, label %if.end
432
433if.end:
434  ret i32 %i
435}
436
437; multiple exit w/a non lcssa phi
438define i32 @multiple_unique_exit3(i16* %p, i32 %n) {
439; CHECK-LABEL: @multiple_unique_exit3(
440; CHECK-NEXT:  entry:
441; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0
442; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0
443; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096
444; CHECK-NEXT:    [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096
445; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1
446; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 2
447; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
448; CHECK:       vector.ph:
449; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2
450; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
451; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 2, i32 [[N_MOD_VF]]
452; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]]
453; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
454; CHECK:       vector.body:
455; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
456; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
457; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[INDEX]], 0
458; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[INDEX]], 1
459; CHECK-NEXT:    [[TMP7:%.*]] = sext i32 [[TMP5]] to i64
460; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]]
461; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0
462; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <2 x i16>*
463; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP10]], align 4
464; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
465; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
466; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
467; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]]
468; CHECK:       middle.block:
469; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
470; CHECK-NEXT:    br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
471; CHECK:       scalar.ph:
472; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
473; CHECK-NEXT:    br label [[FOR_COND:%.*]]
474; CHECK:       for.cond:
475; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
476; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
477; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]]
478; CHECK:       for.body:
479; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
480; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
481; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
482; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
483; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
484; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP11:!llvm.loop !.*]]
485; CHECK:       if.end:
486; CHECK-NEXT:    [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ], [ 0, [[MIDDLE_BLOCK]] ]
487; CHECK-NEXT:    ret i32 [[EXIT]]
488;
489; TAILFOLD-LABEL: @multiple_unique_exit3(
490; TAILFOLD-NEXT:  entry:
491; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
492; TAILFOLD:       for.cond:
493; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
494; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
495; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
496; TAILFOLD:       for.body:
497; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
498; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
499; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
500; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
501; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
502; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
503; TAILFOLD:       if.end:
504; TAILFOLD-NEXT:    [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ]
505; TAILFOLD-NEXT:    ret i32 [[EXIT]]
506;
507entry:
508  br label %for.cond
509
510for.cond:
511  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
512  %cmp = icmp slt i32 %i, %n
513  br i1 %cmp, label %for.body, label %if.end
514
515for.body:
516  %iprom = sext i32 %i to i64
517  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
518  store i16 0, i16* %b, align 4
519  %inc = add nsw i32 %i, 1
520  %cmp2 = icmp slt i32 %i, 2096
521  br i1 %cmp2, label %for.cond, label %if.end
522
523if.end:
524  %exit = phi i32 [0, %for.cond], [1, %for.body]
525  ret i32 %exit
526}
527
528; multiple exits w/distinct target blocks
529define i32 @multiple_exit_blocks(i16* %p, i32 %n) {
530; CHECK-LABEL: @multiple_exit_blocks(
531; CHECK-NEXT:  entry:
532; CHECK-NEXT:    br label [[FOR_COND:%.*]]
533; CHECK:       for.cond:
534; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
535; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
536; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
537; CHECK:       for.body:
538; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
539; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
540; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
541; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
542; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
543; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
544; CHECK:       if.end:
545; CHECK-NEXT:    ret i32 0
546; CHECK:       if.end2:
547; CHECK-NEXT:    ret i32 1
548;
549; TAILFOLD-LABEL: @multiple_exit_blocks(
550; TAILFOLD-NEXT:  entry:
551; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
552; TAILFOLD:       for.cond:
553; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
554; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
555; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
556; TAILFOLD:       for.body:
557; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
558; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
559; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
560; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
561; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
562; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
563; TAILFOLD:       if.end:
564; TAILFOLD-NEXT:    ret i32 0
565; TAILFOLD:       if.end2:
566; TAILFOLD-NEXT:    ret i32 1
567;
568entry:
569  br label %for.cond
570
571for.cond:
572  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
573  %cmp = icmp slt i32 %i, %n
574  br i1 %cmp, label %for.body, label %if.end
575
576for.body:
577  %iprom = sext i32 %i to i64
578  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
579  store i16 0, i16* %b, align 4
580  %inc = add nsw i32 %i, 1
581  %cmp2 = icmp slt i32 %i, 2096
582  br i1 %cmp2, label %for.cond, label %if.end2
583
584if.end:
585  ret i32 0
586
587if.end2:
588  ret i32 1
589}
590
591; unique exit case but with a switch as two edges between the same pair of
592; blocks is an often missed edge case
593define i32 @multiple_exit_switch(i16* %p, i32 %n) {
594; CHECK-LABEL: @multiple_exit_switch(
595; CHECK-NEXT:  entry:
596; CHECK-NEXT:    br label [[FOR_COND:%.*]]
597; CHECK:       for.cond:
598; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
599; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
600; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
601; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
602; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
603; CHECK-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
604; CHECK-NEXT:    i32 2096, label [[IF_END:%.*]]
605; CHECK-NEXT:    i32 2097, label [[IF_END]]
606; CHECK-NEXT:    ]
607; CHECK:       if.end:
608; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ]
609; CHECK-NEXT:    ret i32 [[I_LCSSA]]
610;
611; TAILFOLD-LABEL: @multiple_exit_switch(
612; TAILFOLD-NEXT:  entry:
613; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
614; TAILFOLD:       for.cond:
615; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
616; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
617; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
618; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
619; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
620; TAILFOLD-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
621; TAILFOLD-NEXT:    i32 2096, label [[IF_END:%.*]]
622; TAILFOLD-NEXT:    i32 2097, label [[IF_END]]
623; TAILFOLD-NEXT:    ]
624; TAILFOLD:       if.end:
625; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ]
626; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
627;
628entry:
629  br label %for.cond
630
631for.cond:
632  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
633  %iprom = sext i32 %i to i64
634  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
635  store i16 0, i16* %b, align 4
636  %inc = add nsw i32 %i, 1
637  switch i32 %i, label %for.cond [
638  i32 2096, label %if.end
639  i32 2097, label %if.end
640  ]
641
642if.end:
643  ret i32 %i
644}
645
646; multiple exit case but with a switch as multiple exiting edges from
647; a single block is a commonly missed edge case
648define i32 @multiple_exit_switch2(i16* %p, i32 %n) {
649; CHECK-LABEL: @multiple_exit_switch2(
650; CHECK-NEXT:  entry:
651; CHECK-NEXT:    br label [[FOR_COND:%.*]]
652; CHECK:       for.cond:
653; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
654; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
655; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
656; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
657; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
658; CHECK-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
659; CHECK-NEXT:    i32 2096, label [[IF_END:%.*]]
660; CHECK-NEXT:    i32 2097, label [[IF_END2:%.*]]
661; CHECK-NEXT:    ]
662; CHECK:       if.end:
663; CHECK-NEXT:    ret i32 0
664; CHECK:       if.end2:
665; CHECK-NEXT:    ret i32 1
666;
667; TAILFOLD-LABEL: @multiple_exit_switch2(
668; TAILFOLD-NEXT:  entry:
669; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
670; TAILFOLD:       for.cond:
671; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
672; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
673; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
674; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
675; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
676; TAILFOLD-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
677; TAILFOLD-NEXT:    i32 2096, label [[IF_END:%.*]]
678; TAILFOLD-NEXT:    i32 2097, label [[IF_END2:%.*]]
679; TAILFOLD-NEXT:    ]
680; TAILFOLD:       if.end:
681; TAILFOLD-NEXT:    ret i32 0
682; TAILFOLD:       if.end2:
683; TAILFOLD-NEXT:    ret i32 1
684;
685entry:
686  br label %for.cond
687
688for.cond:
689  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
690  %iprom = sext i32 %i to i64
691  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
692  store i16 0, i16* %b, align 4
693  %inc = add nsw i32 %i, 1
694  switch i32 %i, label %for.cond [
695  i32 2096, label %if.end
696  i32 2097, label %if.end2
697  ]
698
699if.end:
700  ret i32 0
701
702if.end2:
703  ret i32 1
704}
705
706define i32 @multiple_latch1(i16* %p) {
707; CHECK-LABEL: @multiple_latch1(
708; CHECK-NEXT:  entry:
709; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
710; CHECK:       for.body:
711; CHECK-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
712; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
713; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
714; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
715; CHECK:       for.second:
716; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
717; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
718; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
719; CHECK-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
720; CHECK-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
721; CHECK:       for.body.backedge:
722; CHECK-NEXT:    br label [[FOR_BODY]]
723; CHECK:       for.end:
724; CHECK-NEXT:    ret i32 0
725;
726; TAILFOLD-LABEL: @multiple_latch1(
727; TAILFOLD-NEXT:  entry:
728; TAILFOLD-NEXT:    br label [[FOR_BODY:%.*]]
729; TAILFOLD:       for.body:
730; TAILFOLD-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
731; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
732; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
733; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
734; TAILFOLD:       for.second:
735; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
736; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
737; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
738; TAILFOLD-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
739; TAILFOLD-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
740; TAILFOLD:       for.body.backedge:
741; TAILFOLD-NEXT:    br label [[FOR_BODY]]
742; TAILFOLD:       for.end:
743; TAILFOLD-NEXT:    ret i32 0
744;
745entry:
746  br label %for.body
747
748for.body:
749  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body.backedge]
750  %inc = add nsw i32 %i.02, 1
751  %cmp = icmp slt i32 %inc, 16
752  br i1 %cmp, label %for.body.backedge, label %for.second
753
754for.second:
755  %iprom = sext i32 %i.02 to i64
756  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
757  store i16 0, i16* %b, align 4
758  %cmps = icmp sgt i32 %inc, 16
759  br i1 %cmps, label %for.body.backedge, label %for.end
760
761for.body.backedge:
762  br label %for.body
763
764for.end:
765  ret i32 0
766}
767
768
769; two back branches - loop simplify with convert this to the same form
770; as previous before vectorizer sees it, but show that.
771define i32 @multiple_latch2(i16* %p) {
772; CHECK-LABEL: @multiple_latch2(
773; CHECK-NEXT:  entry:
774; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
775; CHECK:       for.body:
776; CHECK-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
777; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
778; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
779; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
780; CHECK:       for.body.backedge:
781; CHECK-NEXT:    br label [[FOR_BODY]]
782; CHECK:       for.second:
783; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
784; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
785; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
786; CHECK-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
787; CHECK-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
788; CHECK:       for.end:
789; CHECK-NEXT:    ret i32 0
790;
791; TAILFOLD-LABEL: @multiple_latch2(
792; TAILFOLD-NEXT:  entry:
793; TAILFOLD-NEXT:    br label [[FOR_BODY:%.*]]
794; TAILFOLD:       for.body:
795; TAILFOLD-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
796; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
797; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
798; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
799; TAILFOLD:       for.body.backedge:
800; TAILFOLD-NEXT:    br label [[FOR_BODY]]
801; TAILFOLD:       for.second:
802; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
803; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
804; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
805; TAILFOLD-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
806; TAILFOLD-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
807; TAILFOLD:       for.end:
808; TAILFOLD-NEXT:    ret i32 0
809;
810entry:
811  br label %for.body
812
813for.body:
814  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ], [%inc, %for.second]
815  %inc = add nsw i32 %i.02, 1
816  %cmp = icmp slt i32 %inc, 16
817  br i1 %cmp, label %for.body, label %for.second
818
819for.second:
820  %iprom = sext i32 %i.02 to i64
821  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
822  store i16 0, i16* %b, align 4
823  %cmps = icmp sgt i32 %inc, 16
824  br i1 %cmps, label %for.body, label %for.end
825
826for.end:
827  ret i32 0
828}
829
830
831; Check interaction between block predication and early exits.  We need the
832; condition on the early exit to remain dead (i.e. not be used when forming
833; the predicate mask).
834define void @scalar_predication(float* %addr) {
835; CHECK-LABEL: @scalar_predication(
836; CHECK-NEXT:  entry:
837; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
838; CHECK:       vector.ph:
839; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
840; CHECK:       vector.body:
841; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
842; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
843; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
844; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[TMP0]]
845; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, float* [[TMP1]], i32 0
846; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <2 x float>*
847; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
848; CHECK-NEXT:    [[TMP4:%.*]] = fcmp oeq <2 x float> [[WIDE_LOAD]], zeroinitializer
849; CHECK-NEXT:    [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true>
850; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
851; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
852; CHECK:       pred.store.if:
853; CHECK-NEXT:    store float 1.000000e+01, float* [[TMP1]], align 4
854; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
855; CHECK:       pred.store.continue:
856; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
857; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
858; CHECK:       pred.store.if1:
859; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 1
860; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr float, float* [[ADDR]], i64 [[TMP8]]
861; CHECK-NEXT:    store float 1.000000e+01, float* [[TMP9]], align 4
862; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
863; CHECK:       pred.store.continue2:
864; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
865; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
866; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
867; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP12:!llvm.loop !.*]]
868; CHECK:       middle.block:
869; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 201, 200
870; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
871; CHECK:       scalar.ph:
872; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
873; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
874; CHECK:       loop.header:
875; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
876; CHECK-NEXT:    [[GEP:%.*]] = getelementptr float, float* [[ADDR]], i64 [[IV]]
877; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
878; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP_BODY:%.*]]
879; CHECK:       loop.body:
880; CHECK-NEXT:    [[TMP11:%.*]] = load float, float* [[GEP]], align 4
881; CHECK-NEXT:    [[PRED:%.*]] = fcmp oeq float [[TMP11]], 0.000000e+00
882; CHECK-NEXT:    br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]]
883; CHECK:       then:
884; CHECK-NEXT:    store float 1.000000e+01, float* [[GEP]], align 4
885; CHECK-NEXT:    br label [[LOOP_LATCH]]
886; CHECK:       loop.latch:
887; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
888; CHECK-NEXT:    br label [[LOOP_HEADER]], [[LOOP13:!llvm.loop !.*]]
889; CHECK:       exit:
890; CHECK-NEXT:    ret void
891;
892; TAILFOLD-LABEL: @scalar_predication(
893; TAILFOLD-NEXT:  entry:
894; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
895; TAILFOLD:       loop.header:
896; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
897; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[IV]]
898; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
899; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]]
900; TAILFOLD:       loop.body:
901; TAILFOLD-NEXT:    [[TMP0:%.*]] = load float, float* [[GEP]], align 4
902; TAILFOLD-NEXT:    [[PRED:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00
903; TAILFOLD-NEXT:    br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]]
904; TAILFOLD:       then:
905; TAILFOLD-NEXT:    store float 1.000000e+01, float* [[GEP]], align 4
906; TAILFOLD-NEXT:    br label [[LOOP_LATCH]]
907; TAILFOLD:       loop.latch:
908; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
909; TAILFOLD-NEXT:    br label [[LOOP_HEADER]]
910; TAILFOLD:       exit:
911; TAILFOLD-NEXT:    ret void
912;
913entry:
914  br label %loop.header
915
916loop.header:
917  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
918  %gep = getelementptr float, float* %addr, i64 %iv
919  %exitcond.not = icmp eq i64 %iv, 200
920  br i1 %exitcond.not, label %exit, label %loop.body
921
922loop.body:
923  %0 = load float, float* %gep, align 4
924  %pred = fcmp oeq float %0, 0.0
925  br i1 %pred, label %loop.latch, label %then
926
927then:
928  store float 10.0, float* %gep, align 4
929  br label %loop.latch
930
931loop.latch:
932  %iv.next = add nuw nsw i64 %iv, 1
933  br label %loop.header
934
935exit:
936  ret void
937}
938
939define i32 @me_reduction(i32* %addr) {
940; CHECK-LABEL: @me_reduction(
941; CHECK-NEXT:  entry:
942; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
943; CHECK:       vector.ph:
944; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
945; CHECK:       vector.body:
946; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
947; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
948; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
949; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
950; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
951; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[TMP0]]
952; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 0
953; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>*
954; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP4]], align 4
955; CHECK-NEXT:    [[TMP5]] = add <2 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
956; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
957; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
958; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
959; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP14:!llvm.loop !.*]]
960; CHECK:       middle.block:
961; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <2 x i32> <i32 1, i32 undef>
962; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <2 x i32> [[TMP5]], [[RDX_SHUF]]
963; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i32> [[BIN_RDX]], i32 0
964; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 201, 200
965; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
966; CHECK:       scalar.ph:
967; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
968; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
969; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
970; CHECK:       loop.header:
971; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
972; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
973; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR]], i64 [[IV]]
974; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
975; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP_LATCH]]
976; CHECK:       loop.latch:
977; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[GEP]], align 4
978; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP8]]
979; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
980; CHECK-NEXT:    [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400
981; CHECK-NEXT:    br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], [[LOOP15:!llvm.loop !.*]]
982; CHECK:       exit:
983; CHECK-NEXT:    [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
984; CHECK-NEXT:    ret i32 [[LCSSA]]
985;
986; TAILFOLD-LABEL: @me_reduction(
987; TAILFOLD-NEXT:  entry:
988; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
989; TAILFOLD:       loop.header:
990; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
991; TAILFOLD-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
992; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
993; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
994; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
995; TAILFOLD:       loop.latch:
996; TAILFOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
997; TAILFOLD-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
998; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
999; TAILFOLD-NEXT:    [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400
1000; TAILFOLD-NEXT:    br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]]
1001; TAILFOLD:       exit:
1002; TAILFOLD-NEXT:    [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ]
1003; TAILFOLD-NEXT:    ret i32 [[LCSSA]]
1004;
1005entry:
1006  br label %loop.header
1007
1008loop.header:
1009  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1010  %accum = phi i32 [0, %entry], [%accum.next, %loop.latch]
1011  %gep = getelementptr i32, i32* %addr, i64 %iv
1012  %exitcond.not = icmp eq i64 %iv, 200
1013  br i1 %exitcond.not, label %exit, label %loop.latch
1014
1015loop.latch:
1016  %0 = load i32, i32* %gep, align 4
1017  %accum.next = add i32 %accum, %0
1018  %iv.next = add nuw nsw i64 %iv, 1
1019  %exitcond2.not = icmp eq i64 %iv, 400
1020  br i1 %exitcond2.not, label %exit, label %loop.header
1021
1022exit:
1023  %lcssa = phi i32 [0, %loop.header], [%accum.next, %loop.latch]
1024  ret i32 %lcssa
1025}
1026
1027; TODO: The current definition of reduction is too strict, we can vectorize
1028; this.  There's an analogous single exit case where we extract the N-1
1029; value of the reduction that we can also handle.  If we fix the later, the
1030; multiple exit case probably falls out.
1031define i32 @me_reduction2(i32* %addr) {
1032; CHECK-LABEL: @me_reduction2(
1033; CHECK-NEXT:  entry:
1034; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
1035; CHECK:       loop.header:
1036; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1037; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1038; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
1039; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1040; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1041; CHECK:       loop.latch:
1042; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
1043; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1044; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1045; CHECK-NEXT:    br label [[LOOP_HEADER]]
1046; CHECK:       exit:
1047; CHECK-NEXT:    [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ]
1048; CHECK-NEXT:    ret i32 [[ACCUM_LCSSA]]
1049;
1050; TAILFOLD-LABEL: @me_reduction2(
1051; TAILFOLD-NEXT:  entry:
1052; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1053; TAILFOLD:       loop.header:
1054; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1055; TAILFOLD-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1056; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
1057; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1058; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1059; TAILFOLD:       loop.latch:
1060; TAILFOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
1061; TAILFOLD-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1062; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1063; TAILFOLD-NEXT:    br label [[LOOP_HEADER]]
1064; TAILFOLD:       exit:
1065; TAILFOLD-NEXT:    [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ]
1066; TAILFOLD-NEXT:    ret i32 [[ACCUM_LCSSA]]
1067;
1068entry:
1069  br label %loop.header
1070
1071loop.header:
1072  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1073  %accum = phi i32 [0, %entry], [%accum.next, %loop.latch]
1074  %gep = getelementptr i32, i32* %addr, i64 %iv
1075  %exitcond.not = icmp eq i64 %iv, 200
1076  br i1 %exitcond.not, label %exit, label %loop.latch
1077
1078loop.latch:
1079  %0 = load i32, i32* %gep, align 4
1080  %accum.next = add i32 %accum, %0
1081  %iv.next = add nuw nsw i64 %iv, 1
1082  br label %loop.header
1083
1084exit:
1085  ret i32 %accum
1086}
1087
1088