1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -loop-vectorize -force-vector-width=2 < %s | FileCheck %s
3; RUN: opt -S -loop-vectorize -force-vector-width=2  -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck --check-prefix TAILFOLD %s
4
5target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
6
7define void @bottom_tested(i16* %p, i32 %n) {
8; CHECK-LABEL: @bottom_tested(
9; CHECK-NEXT:  entry:
10; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
11; CHECK-NEXT:    [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1
12; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2
13; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
14; CHECK:       vector.ph:
15; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
16; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
17; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
18; CHECK:       vector.body:
19; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
20; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 0
21; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
22; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP2]]
23; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 0
24; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <2 x i16>*
25; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP5]], align 4
26; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
27; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
28; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
29; CHECK:       middle.block:
30; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
31; CHECK-NEXT:    br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
32; CHECK:       scalar.ph:
33; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
34; CHECK-NEXT:    br label [[FOR_COND:%.*]]
35; CHECK:       for.cond:
36; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
37; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
38; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
39; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
40; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
41; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
42; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP2:![0-9]+]]
43; CHECK:       if.end:
44; CHECK-NEXT:    ret void
45;
46; TAILFOLD-LABEL: @bottom_tested(
47; TAILFOLD-NEXT:  entry:
48; TAILFOLD-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
49; TAILFOLD-NEXT:    [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1
50; TAILFOLD-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
51; TAILFOLD:       vector.ph:
52; TAILFOLD-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP0]], 1
53; TAILFOLD-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2
54; TAILFOLD-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
55; TAILFOLD-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP0]], 1
56; TAILFOLD-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
57; TAILFOLD-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
58; TAILFOLD-NEXT:    br label [[VECTOR_BODY:%.*]]
59; TAILFOLD:       vector.body:
60; TAILFOLD-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
61; TAILFOLD-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
62; TAILFOLD-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 0
63; TAILFOLD-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 1
64; TAILFOLD-NEXT:    [[TMP3:%.*]] = icmp ule <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
65; TAILFOLD-NEXT:    [[TMP4:%.*]] = sext <2 x i32> [[VEC_IND]] to <2 x i64>
66; TAILFOLD-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
67; TAILFOLD-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
68; TAILFOLD:       pred.store.if:
69; TAILFOLD-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
70; TAILFOLD-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP6]]
71; TAILFOLD-NEXT:    store i16 0, i16* [[TMP7]], align 4
72; TAILFOLD-NEXT:    br label [[PRED_STORE_CONTINUE]]
73; TAILFOLD:       pred.store.continue:
74; TAILFOLD-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
75; TAILFOLD-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
76; TAILFOLD:       pred.store.if1:
77; TAILFOLD-NEXT:    [[TMP9:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
78; TAILFOLD-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[TMP9]]
79; TAILFOLD-NEXT:    store i16 0, i16* [[TMP10]], align 4
80; TAILFOLD-NEXT:    br label [[PRED_STORE_CONTINUE2]]
81; TAILFOLD:       pred.store.continue2:
82; TAILFOLD-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
83; TAILFOLD-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
84; TAILFOLD-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
85; TAILFOLD-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
86; TAILFOLD:       middle.block:
87; TAILFOLD-NEXT:    br i1 true, label [[IF_END:%.*]], label [[SCALAR_PH]]
88; TAILFOLD:       scalar.ph:
89; TAILFOLD-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
90; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
91; TAILFOLD:       for.cond:
92; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
93; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
94; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
95; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
96; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
97; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
98; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP2:![0-9]+]]
99; TAILFOLD:       if.end:
100; TAILFOLD-NEXT:    ret void
101;
102entry:
103  br label %for.cond
104
105for.cond:
106  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
107  %iprom = sext i32 %i to i64
108  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
109  store i16 0, i16* %b, align 4
110  %inc = add nsw i32 %i, 1
111  %cmp = icmp slt i32 %i, %n
112  br i1 %cmp, label %for.cond, label %if.end
113
114if.end:
115  ret void
116}
117
118define void @early_exit(i16* %p, i32 %n) {
119; CHECK-LABEL: @early_exit(
120; CHECK-NEXT:  entry:
121; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
122; CHECK-NEXT:    [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1
123; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
124; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
125; CHECK:       vector.ph:
126; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
127; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
128; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
129; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
130; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
131; CHECK:       vector.body:
132; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
133; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
134; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
135; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
136; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
137; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
138; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
139; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
140; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
141; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
142; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
143; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
144; CHECK:       middle.block:
145; CHECK-NEXT:    br label [[SCALAR_PH]]
146; CHECK:       scalar.ph:
147; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
148; CHECK-NEXT:    br label [[FOR_COND:%.*]]
149; CHECK:       for.cond:
150; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
151; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
152; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
153; CHECK:       for.body:
154; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
155; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
156; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
157; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
158; CHECK-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
159; CHECK:       if.end:
160; CHECK-NEXT:    ret void
161;
162; TAILFOLD-LABEL: @early_exit(
163; TAILFOLD-NEXT:  entry:
164; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
165; TAILFOLD:       for.cond:
166; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
167; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
168; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
169; TAILFOLD:       for.body:
170; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
171; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
172; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
173; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
174; TAILFOLD-NEXT:    br label [[FOR_COND]]
175; TAILFOLD:       if.end:
176; TAILFOLD-NEXT:    ret void
177;
178entry:
179  br label %for.cond
180
181for.cond:
182  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
183  %cmp = icmp slt i32 %i, %n
184  br i1 %cmp, label %for.body, label %if.end
185
186for.body:
187  %iprom = sext i32 %i to i64
188  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
189  store i16 0, i16* %b, align 4
190  %inc = add nsw i32 %i, 1
191  br label %for.cond
192
193if.end:
194  ret void
195}
196
197; Same as early_exit, but with optsize to prevent the use of
198; a scalar epilogue.  -- Can't vectorize this in either case.
199define void @optsize(i16* %p, i32 %n) optsize {
200; CHECK-LABEL: @optsize(
201; CHECK-NEXT:  entry:
202; CHECK-NEXT:    br label [[FOR_COND:%.*]]
203; CHECK:       for.cond:
204; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
205; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
206; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
207; CHECK:       for.body:
208; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
209; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
210; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
211; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
212; CHECK-NEXT:    br label [[FOR_COND]]
213; CHECK:       if.end:
214; CHECK-NEXT:    ret void
215;
216; TAILFOLD-LABEL: @optsize(
217; TAILFOLD-NEXT:  entry:
218; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
219; TAILFOLD:       for.cond:
220; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
221; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
222; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
223; TAILFOLD:       for.body:
224; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
225; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
226; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
227; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
228; TAILFOLD-NEXT:    br label [[FOR_COND]]
229; TAILFOLD:       if.end:
230; TAILFOLD-NEXT:    ret void
231;
232entry:
233  br label %for.cond
234
235for.cond:
236  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
237  %cmp = icmp slt i32 %i, %n
238  br i1 %cmp, label %for.body, label %if.end
239
240for.body:
241  %iprom = sext i32 %i to i64
242  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
243  store i16 0, i16* %b, align 4
244  %inc = add nsw i32 %i, 1
245  br label %for.cond
246
247if.end:
248  ret void
249}
250
251
252; multiple exit - no values inside the loop used outside
253define void @multiple_unique_exit(i16* %p, i32 %n) {
254; CHECK-LABEL: @multiple_unique_exit(
255; CHECK-NEXT:  entry:
256; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
257; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
258; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
259; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
260; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
261; CHECK:       vector.ph:
262; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
263; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
264; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
265; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
266; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
267; CHECK:       vector.body:
268; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
269; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
270; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
271; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
272; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
273; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
274; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
275; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
276; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
277; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
278; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
279; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
280; CHECK:       middle.block:
281; CHECK-NEXT:    br label [[SCALAR_PH]]
282; CHECK:       scalar.ph:
283; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
284; CHECK-NEXT:    br label [[FOR_COND:%.*]]
285; CHECK:       for.cond:
286; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
287; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
288; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
289; CHECK:       for.body:
290; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
291; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
292; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
293; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
294; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
295; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP7:![0-9]+]]
296; CHECK:       if.end:
297; CHECK-NEXT:    ret void
298;
299; TAILFOLD-LABEL: @multiple_unique_exit(
300; TAILFOLD-NEXT:  entry:
301; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
302; TAILFOLD:       for.cond:
303; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
304; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
305; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
306; TAILFOLD:       for.body:
307; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
308; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
309; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
310; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
311; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
312; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
313; TAILFOLD:       if.end:
314; TAILFOLD-NEXT:    ret void
315;
316entry:
317  br label %for.cond
318
319for.cond:
320  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
321  %cmp = icmp slt i32 %i, %n
322  br i1 %cmp, label %for.body, label %if.end
323
324for.body:
325  %iprom = sext i32 %i to i64
326  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
327  store i16 0, i16* %b, align 4
328  %inc = add nsw i32 %i, 1
329  %cmp2 = icmp slt i32 %i, 2096
330  br i1 %cmp2, label %for.cond, label %if.end
331
332if.end:
333  ret void
334}
335
336; multiple exit - with an lcssa phi
337define i32 @multiple_unique_exit2(i16* %p, i32 %n) {
338; CHECK-LABEL: @multiple_unique_exit2(
339; CHECK-NEXT:  entry:
340; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
341; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
342; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
343; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
344; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
345; CHECK:       vector.ph:
346; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
347; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
348; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
349; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
350; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
351; CHECK:       vector.body:
352; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
353; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
354; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
355; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
356; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
357; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
358; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
359; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
360; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
361; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
362; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
363; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
364; CHECK:       middle.block:
365; CHECK-NEXT:    br label [[SCALAR_PH]]
366; CHECK:       scalar.ph:
367; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
368; CHECK-NEXT:    br label [[FOR_COND:%.*]]
369; CHECK:       for.cond:
370; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
371; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
372; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
373; CHECK:       for.body:
374; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
375; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
376; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
377; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
378; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
379; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]]
380; CHECK:       if.end:
381; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ]
382; CHECK-NEXT:    ret i32 [[I_LCSSA]]
383;
384; TAILFOLD-LABEL: @multiple_unique_exit2(
385; TAILFOLD-NEXT:  entry:
386; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
387; TAILFOLD:       for.cond:
388; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
389; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
390; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
391; TAILFOLD:       for.body:
392; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
393; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
394; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
395; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
396; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
397; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
398; TAILFOLD:       if.end:
399; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ]
400; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
401;
402entry:
403  br label %for.cond
404
405for.cond:
406  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
407  %cmp = icmp slt i32 %i, %n
408  br i1 %cmp, label %for.body, label %if.end
409
410for.body:
411  %iprom = sext i32 %i to i64
412  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
413  store i16 0, i16* %b, align 4
414  %inc = add nsw i32 %i, 1
415  %cmp2 = icmp slt i32 %i, 2096
416  br i1 %cmp2, label %for.cond, label %if.end
417
418if.end:
419  ret i32 %i
420}
421
422; multiple exit w/a non lcssa phi
423define i32 @multiple_unique_exit3(i16* %p, i32 %n) {
424; CHECK-LABEL: @multiple_unique_exit3(
425; CHECK-NEXT:  entry:
426; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
427; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
428; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
429; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
430; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
431; CHECK:       vector.ph:
432; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
433; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
434; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
435; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
436; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
437; CHECK:       vector.body:
438; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
439; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
440; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
441; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
442; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
443; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
444; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
445; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
446; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
447; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
448; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
449; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
450; CHECK:       middle.block:
451; CHECK-NEXT:    br label [[SCALAR_PH]]
452; CHECK:       scalar.ph:
453; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
454; CHECK-NEXT:    br label [[FOR_COND:%.*]]
455; CHECK:       for.cond:
456; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
457; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
458; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
459; CHECK:       for.body:
460; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
461; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
462; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
463; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
464; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
465; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP11:![0-9]+]]
466; CHECK:       if.end:
467; CHECK-NEXT:    [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ]
468; CHECK-NEXT:    ret i32 [[EXIT]]
469;
470; TAILFOLD-LABEL: @multiple_unique_exit3(
471; TAILFOLD-NEXT:  entry:
472; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
473; TAILFOLD:       for.cond:
474; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
475; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
476; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
477; TAILFOLD:       for.body:
478; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
479; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
480; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
481; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
482; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
483; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
484; TAILFOLD:       if.end:
485; TAILFOLD-NEXT:    [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ]
486; TAILFOLD-NEXT:    ret i32 [[EXIT]]
487;
488entry:
489  br label %for.cond
490
491for.cond:
492  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
493  %cmp = icmp slt i32 %i, %n
494  br i1 %cmp, label %for.body, label %if.end
495
496for.body:
497  %iprom = sext i32 %i to i64
498  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
499  store i16 0, i16* %b, align 4
500  %inc = add nsw i32 %i, 1
501  %cmp2 = icmp slt i32 %i, 2096
502  br i1 %cmp2, label %for.cond, label %if.end
503
504if.end:
505  %exit = phi i32 [0, %for.cond], [1, %for.body]
506  ret i32 %exit
507}
508
509; multiple exits w/distinct target blocks
510define i32 @multiple_exit_blocks(i16* %p, i32 %n) {
511; CHECK-LABEL: @multiple_exit_blocks(
512; CHECK-NEXT:  entry:
513; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
514; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
515; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
516; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
517; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
518; CHECK:       vector.ph:
519; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
520; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
521; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
522; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
523; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
524; CHECK:       vector.body:
525; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
526; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
527; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
528; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
529; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
530; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
531; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
532; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
533; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
534; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
535; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
536; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
537; CHECK:       middle.block:
538; CHECK-NEXT:    br label [[SCALAR_PH]]
539; CHECK:       scalar.ph:
540; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
541; CHECK-NEXT:    br label [[FOR_COND:%.*]]
542; CHECK:       for.cond:
543; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
544; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
545; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
546; CHECK:       for.body:
547; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
548; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
549; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
550; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
551; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
552; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP13:![0-9]+]]
553; CHECK:       if.end:
554; CHECK-NEXT:    ret i32 0
555; CHECK:       if.end2:
556; CHECK-NEXT:    ret i32 1
557;
558; TAILFOLD-LABEL: @multiple_exit_blocks(
559; TAILFOLD-NEXT:  entry:
560; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
561; TAILFOLD:       for.cond:
562; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
563; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
564; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
565; TAILFOLD:       for.body:
566; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
567; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
568; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
569; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
570; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
571; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
572; TAILFOLD:       if.end:
573; TAILFOLD-NEXT:    ret i32 0
574; TAILFOLD:       if.end2:
575; TAILFOLD-NEXT:    ret i32 1
576;
577entry:
578  br label %for.cond
579
580for.cond:
581  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
582  %cmp = icmp slt i32 %i, %n
583  br i1 %cmp, label %for.body, label %if.end
584
585for.body:
586  %iprom = sext i32 %i to i64
587  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
588  store i16 0, i16* %b, align 4
589  %inc = add nsw i32 %i, 1
590  %cmp2 = icmp slt i32 %i, 2096
591  br i1 %cmp2, label %for.cond, label %if.end2
592
593if.end:
594  ret i32 0
595
596if.end2:
597  ret i32 1
598}
599
600; LCSSA, common value each exit
601define i32 @multiple_exit_blocks2(i16* %p, i32 %n) {
602; CHECK-LABEL: @multiple_exit_blocks2(
603; CHECK-NEXT:  entry:
604; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
605; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
606; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
607; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
608; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
609; CHECK:       vector.ph:
610; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
611; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
612; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
613; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
614; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
615; CHECK:       vector.body:
616; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
617; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
618; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
619; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
620; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
621; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
622; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
623; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
624; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
625; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
626; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
627; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
628; CHECK:       middle.block:
629; CHECK-NEXT:    br label [[SCALAR_PH]]
630; CHECK:       scalar.ph:
631; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
632; CHECK-NEXT:    br label [[FOR_COND:%.*]]
633; CHECK:       for.cond:
634; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
635; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
636; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
637; CHECK:       for.body:
638; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
639; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
640; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
641; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
642; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
643; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP15:![0-9]+]]
644; CHECK:       if.end:
645; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
646; CHECK-NEXT:    ret i32 [[I_LCSSA]]
647; CHECK:       if.end2:
648; CHECK-NEXT:    [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ]
649; CHECK-NEXT:    ret i32 [[I_LCSSA1]]
650;
651; TAILFOLD-LABEL: @multiple_exit_blocks2(
652; TAILFOLD-NEXT:  entry:
653; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
654; TAILFOLD:       for.cond:
655; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
656; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
657; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
658; TAILFOLD:       for.body:
659; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
660; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
661; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
662; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
663; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
664; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
665; TAILFOLD:       if.end:
666; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
667; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
668; TAILFOLD:       if.end2:
669; TAILFOLD-NEXT:    [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ]
670; TAILFOLD-NEXT:    ret i32 [[I_LCSSA1]]
671;
672entry:
673  br label %for.cond
674
675for.cond:
676  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
677  %cmp = icmp slt i32 %i, %n
678  br i1 %cmp, label %for.body, label %if.end
679
680for.body:
681  %iprom = sext i32 %i to i64
682  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
683  store i16 0, i16* %b, align 4
684  %inc = add nsw i32 %i, 1
685  %cmp2 = icmp slt i32 %i, 2096
686  br i1 %cmp2, label %for.cond, label %if.end2
687
688if.end:
689  ret i32 %i
690
691if.end2:
692  ret i32 %i
693}
694
695; LCSSA, distinct value each exit
696define i32 @multiple_exit_blocks3(i16* %p, i32 %n) {
697; CHECK-LABEL: @multiple_exit_blocks3(
698; CHECK-NEXT:  entry:
699; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
700; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
701; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
702; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
703; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
704; CHECK:       vector.ph:
705; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
706; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
707; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
708; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
709; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
710; CHECK:       vector.body:
711; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
712; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
713; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
714; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[INDEX]], 1
715; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
716; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
717; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
718; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
719; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
720; CHECK-NEXT:    [[TMP9:%.*]] = add nsw <2 x i32> [[VEC_IND]], <i32 1, i32 1>
721; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
722; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
723; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
724; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
725; CHECK:       middle.block:
726; CHECK-NEXT:    br label [[SCALAR_PH]]
727; CHECK:       scalar.ph:
728; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
729; CHECK-NEXT:    br label [[FOR_COND:%.*]]
730; CHECK:       for.cond:
731; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
732; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
733; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
734; CHECK:       for.body:
735; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
736; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
737; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
738; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
739; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
740; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP17:![0-9]+]]
741; CHECK:       if.end:
742; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
743; CHECK-NEXT:    ret i32 [[I_LCSSA]]
744; CHECK:       if.end2:
745; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ]
746; CHECK-NEXT:    ret i32 [[INC_LCSSA]]
747;
748; TAILFOLD-LABEL: @multiple_exit_blocks3(
749; TAILFOLD-NEXT:  entry:
750; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
751; TAILFOLD:       for.cond:
752; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
753; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
754; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
755; TAILFOLD:       for.body:
756; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
757; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
758; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
759; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
760; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
761; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
762; TAILFOLD:       if.end:
763; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
764; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
765; TAILFOLD:       if.end2:
766; TAILFOLD-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ]
767; TAILFOLD-NEXT:    ret i32 [[INC_LCSSA]]
768;
769entry:
770  br label %for.cond
771
772for.cond:
773  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
774  %cmp = icmp slt i32 %i, %n
775  br i1 %cmp, label %for.body, label %if.end
776
777for.body:
778  %iprom = sext i32 %i to i64
779  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
780  store i16 0, i16* %b, align 4
781  %inc = add nsw i32 %i, 1
782  %cmp2 = icmp slt i32 %i, 2096
783  br i1 %cmp2, label %for.cond, label %if.end2
784
785if.end:
786  ret i32 %i
787
788if.end2:
789  ret i32 %inc
790}
791
792; unique exit case but with a switch as two edges between the same pair of
793; blocks is an often missed edge case
794define i32 @multiple_exit_switch(i16* %p, i32 %n) {
795; CHECK-LABEL: @multiple_exit_switch(
796; CHECK-NEXT:  entry:
797; CHECK-NEXT:    br label [[FOR_COND:%.*]]
798; CHECK:       for.cond:
799; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
800; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
801; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
802; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
803; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
804; CHECK-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
805; CHECK-NEXT:    i32 2096, label [[IF_END:%.*]]
806; CHECK-NEXT:    i32 2097, label [[IF_END]]
807; CHECK-NEXT:    ]
808; CHECK:       if.end:
809; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ]
810; CHECK-NEXT:    ret i32 [[I_LCSSA]]
811;
812; TAILFOLD-LABEL: @multiple_exit_switch(
813; TAILFOLD-NEXT:  entry:
814; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
815; TAILFOLD:       for.cond:
816; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
817; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
818; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
819; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
820; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
821; TAILFOLD-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
822; TAILFOLD-NEXT:    i32 2096, label [[IF_END:%.*]]
823; TAILFOLD-NEXT:    i32 2097, label [[IF_END]]
824; TAILFOLD-NEXT:    ]
825; TAILFOLD:       if.end:
826; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ]
827; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
828;
829entry:
830  br label %for.cond
831
832for.cond:
833  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
834  %iprom = sext i32 %i to i64
835  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
836  store i16 0, i16* %b, align 4
837  %inc = add nsw i32 %i, 1
838  switch i32 %i, label %for.cond [
839  i32 2096, label %if.end
840  i32 2097, label %if.end
841  ]
842
843if.end:
844  ret i32 %i
845}
846
847; multiple exit case but with a switch as multiple exiting edges from
848; a single block is a commonly missed edge case
849define i32 @multiple_exit_switch2(i16* %p, i32 %n) {
850; CHECK-LABEL: @multiple_exit_switch2(
851; CHECK-NEXT:  entry:
852; CHECK-NEXT:    br label [[FOR_COND:%.*]]
853; CHECK:       for.cond:
854; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
855; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
856; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
857; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
858; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
859; CHECK-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
860; CHECK-NEXT:    i32 2096, label [[IF_END:%.*]]
861; CHECK-NEXT:    i32 2097, label [[IF_END2:%.*]]
862; CHECK-NEXT:    ]
863; CHECK:       if.end:
864; CHECK-NEXT:    ret i32 0
865; CHECK:       if.end2:
866; CHECK-NEXT:    ret i32 1
867;
868; TAILFOLD-LABEL: @multiple_exit_switch2(
869; TAILFOLD-NEXT:  entry:
870; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
871; TAILFOLD:       for.cond:
872; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
873; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
874; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
875; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
876; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
877; TAILFOLD-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
878; TAILFOLD-NEXT:    i32 2096, label [[IF_END:%.*]]
879; TAILFOLD-NEXT:    i32 2097, label [[IF_END2:%.*]]
880; TAILFOLD-NEXT:    ]
881; TAILFOLD:       if.end:
882; TAILFOLD-NEXT:    ret i32 0
883; TAILFOLD:       if.end2:
884; TAILFOLD-NEXT:    ret i32 1
885;
886entry:
887  br label %for.cond
888
889for.cond:
890  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
891  %iprom = sext i32 %i to i64
892  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
893  store i16 0, i16* %b, align 4
894  %inc = add nsw i32 %i, 1
895  switch i32 %i, label %for.cond [
896  i32 2096, label %if.end
897  i32 2097, label %if.end2
898  ]
899
900if.end:
901  ret i32 0
902
903if.end2:
904  ret i32 1
905}
906
907define i32 @multiple_latch1(i16* %p) {
908; CHECK-LABEL: @multiple_latch1(
909; CHECK-NEXT:  entry:
910; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
911; CHECK:       for.body:
912; CHECK-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
913; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
914; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
915; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
916; CHECK:       for.second:
917; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
918; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
919; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
920; CHECK-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
921; CHECK-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
922; CHECK:       for.body.backedge:
923; CHECK-NEXT:    br label [[FOR_BODY]]
924; CHECK:       for.end:
925; CHECK-NEXT:    ret i32 0
926;
927; TAILFOLD-LABEL: @multiple_latch1(
928; TAILFOLD-NEXT:  entry:
929; TAILFOLD-NEXT:    br label [[FOR_BODY:%.*]]
930; TAILFOLD:       for.body:
931; TAILFOLD-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
932; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
933; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
934; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
935; TAILFOLD:       for.second:
936; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
937; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
938; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
939; TAILFOLD-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
940; TAILFOLD-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
941; TAILFOLD:       for.body.backedge:
942; TAILFOLD-NEXT:    br label [[FOR_BODY]]
943; TAILFOLD:       for.end:
944; TAILFOLD-NEXT:    ret i32 0
945;
946entry:
947  br label %for.body
948
949for.body:
950  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body.backedge]
951  %inc = add nsw i32 %i.02, 1
952  %cmp = icmp slt i32 %inc, 16
953  br i1 %cmp, label %for.body.backedge, label %for.second
954
955for.second:
956  %iprom = sext i32 %i.02 to i64
957  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
958  store i16 0, i16* %b, align 4
959  %cmps = icmp sgt i32 %inc, 16
960  br i1 %cmps, label %for.body.backedge, label %for.end
961
962for.body.backedge:
963  br label %for.body
964
965for.end:
966  ret i32 0
967}
968
969
970; two back branches - loop simplify with convert this to the same form
971; as previous before vectorizer sees it, but show that.
972define i32 @multiple_latch2(i16* %p) {
973; CHECK-LABEL: @multiple_latch2(
974; CHECK-NEXT:  entry:
975; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
976; CHECK:       for.body:
977; CHECK-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
978; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
979; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
980; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
981; CHECK:       for.body.backedge:
982; CHECK-NEXT:    br label [[FOR_BODY]]
983; CHECK:       for.second:
984; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
985; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
986; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
987; CHECK-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
988; CHECK-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
989; CHECK:       for.end:
990; CHECK-NEXT:    ret i32 0
991;
992; TAILFOLD-LABEL: @multiple_latch2(
993; TAILFOLD-NEXT:  entry:
994; TAILFOLD-NEXT:    br label [[FOR_BODY:%.*]]
995; TAILFOLD:       for.body:
996; TAILFOLD-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
997; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
998; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
999; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
1000; TAILFOLD:       for.body.backedge:
1001; TAILFOLD-NEXT:    br label [[FOR_BODY]]
1002; TAILFOLD:       for.second:
1003; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
1004; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
1005; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
1006; TAILFOLD-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
1007; TAILFOLD-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
1008; TAILFOLD:       for.end:
1009; TAILFOLD-NEXT:    ret i32 0
1010;
1011entry:
1012  br label %for.body
1013
1014for.body:
1015  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ], [%inc, %for.second]
1016  %inc = add nsw i32 %i.02, 1
1017  %cmp = icmp slt i32 %inc, 16
1018  br i1 %cmp, label %for.body, label %for.second
1019
1020for.second:
1021  %iprom = sext i32 %i.02 to i64
1022  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
1023  store i16 0, i16* %b, align 4
1024  %cmps = icmp sgt i32 %inc, 16
1025  br i1 %cmps, label %for.body, label %for.end
1026
1027for.end:
1028  ret i32 0
1029}
1030
1031
1032; Check interaction between block predication and early exits.  We need the
1033; condition on the early exit to remain dead (i.e. not be used when forming
1034; the predicate mask).
1035define void @scalar_predication(float* %addr) {
1036; CHECK-LABEL: @scalar_predication(
1037; CHECK-NEXT:  entry:
1038; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1039; CHECK:       vector.ph:
1040; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1041; CHECK:       vector.body:
1042; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
1043; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
1044; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1045; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[TMP0]]
1046; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, float* [[TMP1]], i32 0
1047; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <2 x float>*
1048; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
1049; CHECK-NEXT:    [[TMP4:%.*]] = fcmp oeq <2 x float> [[WIDE_LOAD]], zeroinitializer
1050; CHECK-NEXT:    [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true>
1051; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
1052; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
1053; CHECK:       pred.store.if:
1054; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr float, float* [[ADDR]], i64 [[TMP0]]
1055; CHECK-NEXT:    store float 1.000000e+01, float* [[TMP7]], align 4
1056; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
1057; CHECK:       pred.store.continue:
1058; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
1059; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
1060; CHECK:       pred.store.if1:
1061; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 1
1062; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr float, float* [[ADDR]], i64 [[TMP9]]
1063; CHECK-NEXT:    store float 1.000000e+01, float* [[TMP10]], align 4
1064; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
1065; CHECK:       pred.store.continue2:
1066; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1067; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1068; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
1069; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
1070; CHECK:       middle.block:
1071; CHECK-NEXT:    br label [[SCALAR_PH]]
1072; CHECK:       scalar.ph:
1073; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1074; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
1075; CHECK:       loop.header:
1076; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1077; CHECK-NEXT:    [[GEP:%.*]] = getelementptr float, float* [[ADDR]], i64 [[IV]]
1078; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1079; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]]
1080; CHECK:       loop.body:
1081; CHECK-NEXT:    [[TMP12:%.*]] = load float, float* [[GEP]], align 4
1082; CHECK-NEXT:    [[PRED:%.*]] = fcmp oeq float [[TMP12]], 0.000000e+00
1083; CHECK-NEXT:    br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]]
1084; CHECK:       then:
1085; CHECK-NEXT:    store float 1.000000e+01, float* [[GEP]], align 4
1086; CHECK-NEXT:    br label [[LOOP_LATCH]]
1087; CHECK:       loop.latch:
1088; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1089; CHECK-NEXT:    br label [[LOOP_HEADER]], !llvm.loop [[LOOP19:![0-9]+]]
1090; CHECK:       exit:
1091; CHECK-NEXT:    ret void
1092;
1093; TAILFOLD-LABEL: @scalar_predication(
1094; TAILFOLD-NEXT:  entry:
1095; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1096; TAILFOLD:       loop.header:
1097; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1098; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[IV]]
1099; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1100; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]]
1101; TAILFOLD:       loop.body:
1102; TAILFOLD-NEXT:    [[TMP0:%.*]] = load float, float* [[GEP]], align 4
1103; TAILFOLD-NEXT:    [[PRED:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00
1104; TAILFOLD-NEXT:    br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]]
1105; TAILFOLD:       then:
1106; TAILFOLD-NEXT:    store float 1.000000e+01, float* [[GEP]], align 4
1107; TAILFOLD-NEXT:    br label [[LOOP_LATCH]]
1108; TAILFOLD:       loop.latch:
1109; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1110; TAILFOLD-NEXT:    br label [[LOOP_HEADER]]
1111; TAILFOLD:       exit:
1112; TAILFOLD-NEXT:    ret void
1113;
1114entry:
1115  br label %loop.header
1116
1117loop.header:
1118  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1119  %gep = getelementptr float, float* %addr, i64 %iv
1120  %exitcond.not = icmp eq i64 %iv, 200
1121  br i1 %exitcond.not, label %exit, label %loop.body
1122
1123loop.body:
1124  %0 = load float, float* %gep, align 4
1125  %pred = fcmp oeq float %0, 0.0
1126  br i1 %pred, label %loop.latch, label %then
1127
1128then:
1129  store float 10.0, float* %gep, align 4
1130  br label %loop.latch
1131
1132loop.latch:
1133  %iv.next = add nuw nsw i64 %iv, 1
1134  br label %loop.header
1135
1136exit:
1137  ret void
1138}
1139
1140define i32 @me_reduction(i32* %addr) {
1141; CHECK-LABEL: @me_reduction(
1142; CHECK-NEXT:  entry:
1143; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1144; CHECK:       vector.ph:
1145; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1146; CHECK:       vector.body:
1147; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1148; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1149; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
1150; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1151; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[TMP0]]
1152; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 0
1153; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>*
1154; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP4]], align 4
1155; CHECK-NEXT:    [[TMP5]] = add <2 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
1156; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1157; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1158; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
1159; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
1160; CHECK:       middle.block:
1161; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP5]])
1162; CHECK-NEXT:    br label [[SCALAR_PH]]
1163; CHECK:       scalar.ph:
1164; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1165; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
1166; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
1167; CHECK:       loop.header:
1168; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1169; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1170; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR]], i64 [[IV]]
1171; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1172; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1173; CHECK:       loop.latch:
1174; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[GEP]], align 4
1175; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP8]]
1176; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1177; CHECK-NEXT:    [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400
1178; CHECK-NEXT:    br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP21:![0-9]+]]
1179; CHECK:       exit:
1180; CHECK-NEXT:    [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ]
1181; CHECK-NEXT:    ret i32 [[LCSSA]]
1182;
1183; TAILFOLD-LABEL: @me_reduction(
1184; TAILFOLD-NEXT:  entry:
1185; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1186; TAILFOLD:       loop.header:
1187; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1188; TAILFOLD-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1189; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
1190; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1191; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1192; TAILFOLD:       loop.latch:
1193; TAILFOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
1194; TAILFOLD-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1195; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1196; TAILFOLD-NEXT:    [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400
1197; TAILFOLD-NEXT:    br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]]
1198; TAILFOLD:       exit:
1199; TAILFOLD-NEXT:    [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ]
1200; TAILFOLD-NEXT:    ret i32 [[LCSSA]]
1201;
1202entry:
1203  br label %loop.header
1204
1205loop.header:
1206  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1207  %accum = phi i32 [0, %entry], [%accum.next, %loop.latch]
1208  %gep = getelementptr i32, i32* %addr, i64 %iv
1209  %exitcond.not = icmp eq i64 %iv, 200
1210  br i1 %exitcond.not, label %exit, label %loop.latch
1211
1212loop.latch:
1213  %0 = load i32, i32* %gep, align 4
1214  %accum.next = add i32 %accum, %0
1215  %iv.next = add nuw nsw i64 %iv, 1
1216  %exitcond2.not = icmp eq i64 %iv, 400
1217  br i1 %exitcond2.not, label %exit, label %loop.header
1218
1219exit:
1220  %lcssa = phi i32 [0, %loop.header], [%accum.next, %loop.latch]
1221  ret i32 %lcssa
1222}
1223
1224; TODO: The current definition of reduction is too strict, we can vectorize
1225; this.  There's an analogous single exit case where we extract the N-1
1226; value of the reduction that we can also handle.  If we fix the later, the
1227; multiple exit case probably falls out.
1228define i32 @me_reduction2(i32* %addr) {
1229; CHECK-LABEL: @me_reduction2(
1230; CHECK-NEXT:  entry:
1231; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
1232; CHECK:       loop.header:
1233; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1234; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1235; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
1236; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1237; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1238; CHECK:       loop.latch:
1239; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
1240; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1241; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1242; CHECK-NEXT:    br label [[LOOP_HEADER]]
1243; CHECK:       exit:
1244; CHECK-NEXT:    [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ]
1245; CHECK-NEXT:    ret i32 [[ACCUM_LCSSA]]
1246;
1247; TAILFOLD-LABEL: @me_reduction2(
1248; TAILFOLD-NEXT:  entry:
1249; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1250; TAILFOLD:       loop.header:
1251; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1252; TAILFOLD-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1253; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
1254; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1255; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1256; TAILFOLD:       loop.latch:
1257; TAILFOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
1258; TAILFOLD-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1259; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1260; TAILFOLD-NEXT:    br label [[LOOP_HEADER]]
1261; TAILFOLD:       exit:
1262; TAILFOLD-NEXT:    [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ]
1263; TAILFOLD-NEXT:    ret i32 [[ACCUM_LCSSA]]
1264;
1265entry:
1266  br label %loop.header
1267
1268loop.header:
1269  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1270  %accum = phi i32 [0, %entry], [%accum.next, %loop.latch]
1271  %gep = getelementptr i32, i32* %addr, i64 %iv
1272  %exitcond.not = icmp eq i64 %iv, 200
1273  br i1 %exitcond.not, label %exit, label %loop.latch
1274
1275loop.latch:
1276  %0 = load i32, i32* %gep, align 4
1277  %accum.next = add i32 %accum, %0
1278  %iv.next = add nuw nsw i64 %iv, 1
1279  br label %loop.header
1280
1281exit:
1282  ret i32 %accum
1283}
1284
1285