1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -loop-vectorize -force-vector-width=2 < %s | FileCheck %s
3; RUN: opt -S -loop-vectorize -force-vector-width=2  -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck --check-prefix TAILFOLD %s
4
5target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
6
7define void @bottom_tested(i16* %p, i32 %n) {
8; CHECK-LABEL: @bottom_tested(
9; CHECK-NEXT:  entry:
10; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
11; CHECK-NEXT:    [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1
12; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2
13; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
14; CHECK:       vector.ph:
15; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
16; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
17; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
18; CHECK:       vector.body:
19; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
20; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 0
21; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
22; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP2]]
23; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 0
24; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <2 x i16>*
25; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP5]], align 4
26; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
27; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
28; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
29; CHECK:       middle.block:
30; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
31; CHECK-NEXT:    br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
32; CHECK:       scalar.ph:
33; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
34; CHECK-NEXT:    br label [[FOR_COND:%.*]]
35; CHECK:       for.cond:
36; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
37; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
38; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
39; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
40; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
41; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
42; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP2:![0-9]+]]
43; CHECK:       if.end:
44; CHECK-NEXT:    ret void
45;
46; TAILFOLD-LABEL: @bottom_tested(
47; TAILFOLD-NEXT:  entry:
48; TAILFOLD-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
49; TAILFOLD-NEXT:    [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1
50; TAILFOLD-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
51; TAILFOLD:       vector.ph:
52; TAILFOLD-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP0]], 1
53; TAILFOLD-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2
54; TAILFOLD-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
55; TAILFOLD-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP0]], 1
56; TAILFOLD-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
57; TAILFOLD-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
58; TAILFOLD-NEXT:    br label [[VECTOR_BODY:%.*]]
59; TAILFOLD:       vector.body:
60; TAILFOLD-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
61; TAILFOLD-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
62; TAILFOLD-NEXT:    [[TMP3:%.*]] = icmp ule <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
63; TAILFOLD-NEXT:    [[TMP4:%.*]] = sext <2 x i32> [[VEC_IND]] to <2 x i64>
64; TAILFOLD-NEXT:    [[TMP5:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
65; TAILFOLD-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
66; TAILFOLD:       pred.store.if:
67; TAILFOLD-NEXT:    [[TMP6:%.*]] = extractelement <2 x i64> [[TMP4]], i32 0
68; TAILFOLD-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP6]]
69; TAILFOLD-NEXT:    store i16 0, i16* [[TMP7]], align 4
70; TAILFOLD-NEXT:    br label [[PRED_STORE_CONTINUE]]
71; TAILFOLD:       pred.store.continue:
72; TAILFOLD-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
73; TAILFOLD-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
74; TAILFOLD:       pred.store.if1:
75; TAILFOLD-NEXT:    [[TMP9:%.*]] = extractelement <2 x i64> [[TMP4]], i32 1
76; TAILFOLD-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[TMP9]]
77; TAILFOLD-NEXT:    store i16 0, i16* [[TMP10]], align 4
78; TAILFOLD-NEXT:    br label [[PRED_STORE_CONTINUE2]]
79; TAILFOLD:       pred.store.continue2:
80; TAILFOLD-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
81; TAILFOLD-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
82; TAILFOLD-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
83; TAILFOLD-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
84; TAILFOLD:       middle.block:
85; TAILFOLD-NEXT:    br i1 true, label [[IF_END:%.*]], label [[SCALAR_PH]]
86; TAILFOLD:       scalar.ph:
87; TAILFOLD-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
88; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
89; TAILFOLD:       for.cond:
90; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
91; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
92; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
93; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
94; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
95; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
96; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP2:![0-9]+]]
97; TAILFOLD:       if.end:
98; TAILFOLD-NEXT:    ret void
99;
100entry:
101  br label %for.cond
102
103for.cond:
104  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
105  %iprom = sext i32 %i to i64
106  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
107  store i16 0, i16* %b, align 4
108  %inc = add nsw i32 %i, 1
109  %cmp = icmp slt i32 %i, %n
110  br i1 %cmp, label %for.cond, label %if.end
111
112if.end:
113  ret void
114}
115
116define void @early_exit(i16* %p, i32 %n) {
117; CHECK-LABEL: @early_exit(
118; CHECK-NEXT:  entry:
119; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
120; CHECK-NEXT:    [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1
121; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
122; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
123; CHECK:       vector.ph:
124; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
125; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
126; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
127; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
128; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
129; CHECK:       vector.body:
130; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
131; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
132; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
133; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
134; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
135; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
136; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
137; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
138; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
139; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
140; CHECK:       middle.block:
141; CHECK-NEXT:    br label [[SCALAR_PH]]
142; CHECK:       scalar.ph:
143; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
144; CHECK-NEXT:    br label [[FOR_COND:%.*]]
145; CHECK:       for.cond:
146; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
147; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
148; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
149; CHECK:       for.body:
150; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
151; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
152; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
153; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
154; CHECK-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
155; CHECK:       if.end:
156; CHECK-NEXT:    ret void
157;
158; TAILFOLD-LABEL: @early_exit(
159; TAILFOLD-NEXT:  entry:
160; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
161; TAILFOLD:       for.cond:
162; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
163; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
164; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
165; TAILFOLD:       for.body:
166; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
167; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
168; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
169; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
170; TAILFOLD-NEXT:    br label [[FOR_COND]]
171; TAILFOLD:       if.end:
172; TAILFOLD-NEXT:    ret void
173;
174entry:
175  br label %for.cond
176
177for.cond:
178  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
179  %cmp = icmp slt i32 %i, %n
180  br i1 %cmp, label %for.body, label %if.end
181
182for.body:
183  %iprom = sext i32 %i to i64
184  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
185  store i16 0, i16* %b, align 4
186  %inc = add nsw i32 %i, 1
187  br label %for.cond
188
189if.end:
190  ret void
191}
192
193; Same as early_exit, but with optsize to prevent the use of
194; a scalar epilogue.  -- Can't vectorize this in either case.
195define void @optsize(i16* %p, i32 %n) optsize {
196; CHECK-LABEL: @optsize(
197; CHECK-NEXT:  entry:
198; CHECK-NEXT:    br label [[FOR_COND:%.*]]
199; CHECK:       for.cond:
200; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
201; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
202; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
203; CHECK:       for.body:
204; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
205; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
206; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
207; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
208; CHECK-NEXT:    br label [[FOR_COND]]
209; CHECK:       if.end:
210; CHECK-NEXT:    ret void
211;
212; TAILFOLD-LABEL: @optsize(
213; TAILFOLD-NEXT:  entry:
214; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
215; TAILFOLD:       for.cond:
216; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
217; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
218; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
219; TAILFOLD:       for.body:
220; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
221; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
222; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
223; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
224; TAILFOLD-NEXT:    br label [[FOR_COND]]
225; TAILFOLD:       if.end:
226; TAILFOLD-NEXT:    ret void
227;
228entry:
229  br label %for.cond
230
231for.cond:
232  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
233  %cmp = icmp slt i32 %i, %n
234  br i1 %cmp, label %for.body, label %if.end
235
236for.body:
237  %iprom = sext i32 %i to i64
238  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
239  store i16 0, i16* %b, align 4
240  %inc = add nsw i32 %i, 1
241  br label %for.cond
242
243if.end:
244  ret void
245}
246
247
248; multiple exit - no values inside the loop used outside
249define void @multiple_unique_exit(i16* %p, i32 %n) {
250; CHECK-LABEL: @multiple_unique_exit(
251; CHECK-NEXT:  entry:
252; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
253; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
254; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
255; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
256; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
257; CHECK:       vector.ph:
258; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
259; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
260; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
261; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
262; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
263; CHECK:       vector.body:
264; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
265; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
266; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
267; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
268; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
269; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
270; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
271; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
272; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
273; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
274; CHECK:       middle.block:
275; CHECK-NEXT:    br label [[SCALAR_PH]]
276; CHECK:       scalar.ph:
277; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
278; CHECK-NEXT:    br label [[FOR_COND:%.*]]
279; CHECK:       for.cond:
280; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
281; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
282; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
283; CHECK:       for.body:
284; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
285; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
286; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
287; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
288; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
289; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP7:![0-9]+]]
290; CHECK:       if.end:
291; CHECK-NEXT:    ret void
292;
293; TAILFOLD-LABEL: @multiple_unique_exit(
294; TAILFOLD-NEXT:  entry:
295; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
296; TAILFOLD:       for.cond:
297; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
298; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
299; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
300; TAILFOLD:       for.body:
301; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
302; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
303; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
304; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
305; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
306; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
307; TAILFOLD:       if.end:
308; TAILFOLD-NEXT:    ret void
309;
310entry:
311  br label %for.cond
312
313for.cond:
314  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
315  %cmp = icmp slt i32 %i, %n
316  br i1 %cmp, label %for.body, label %if.end
317
318for.body:
319  %iprom = sext i32 %i to i64
320  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
321  store i16 0, i16* %b, align 4
322  %inc = add nsw i32 %i, 1
323  %cmp2 = icmp slt i32 %i, 2096
324  br i1 %cmp2, label %for.cond, label %if.end
325
326if.end:
327  ret void
328}
329
330; multiple exit - with an lcssa phi
331define i32 @multiple_unique_exit2(i16* %p, i32 %n) {
332; CHECK-LABEL: @multiple_unique_exit2(
333; CHECK-NEXT:  entry:
334; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
335; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
336; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
337; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
338; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
339; CHECK:       vector.ph:
340; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
341; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
342; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
343; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
344; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
345; CHECK:       vector.body:
346; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
347; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
348; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
349; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
350; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
351; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
352; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
353; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
354; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
355; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
356; CHECK:       middle.block:
357; CHECK-NEXT:    br label [[SCALAR_PH]]
358; CHECK:       scalar.ph:
359; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
360; CHECK-NEXT:    br label [[FOR_COND:%.*]]
361; CHECK:       for.cond:
362; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
363; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
364; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
365; CHECK:       for.body:
366; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
367; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
368; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
369; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
370; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
371; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]]
372; CHECK:       if.end:
373; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ]
374; CHECK-NEXT:    ret i32 [[I_LCSSA]]
375;
376; TAILFOLD-LABEL: @multiple_unique_exit2(
377; TAILFOLD-NEXT:  entry:
378; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
379; TAILFOLD:       for.cond:
380; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
381; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
382; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
383; TAILFOLD:       for.body:
384; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
385; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
386; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
387; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
388; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
389; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
390; TAILFOLD:       if.end:
391; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ]
392; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
393;
394entry:
395  br label %for.cond
396
397for.cond:
398  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
399  %cmp = icmp slt i32 %i, %n
400  br i1 %cmp, label %for.body, label %if.end
401
402for.body:
403  %iprom = sext i32 %i to i64
404  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
405  store i16 0, i16* %b, align 4
406  %inc = add nsw i32 %i, 1
407  %cmp2 = icmp slt i32 %i, 2096
408  br i1 %cmp2, label %for.cond, label %if.end
409
410if.end:
411  ret i32 %i
412}
413
414; multiple exit w/a non lcssa phi
415define i32 @multiple_unique_exit3(i16* %p, i32 %n) {
416; CHECK-LABEL: @multiple_unique_exit3(
417; CHECK-NEXT:  entry:
418; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
419; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
420; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
421; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
422; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
423; CHECK:       vector.ph:
424; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
425; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
426; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
427; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
428; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
429; CHECK:       vector.body:
430; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
431; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
432; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
433; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
434; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
435; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
436; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
437; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
438; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
439; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
440; CHECK:       middle.block:
441; CHECK-NEXT:    br label [[SCALAR_PH]]
442; CHECK:       scalar.ph:
443; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
444; CHECK-NEXT:    br label [[FOR_COND:%.*]]
445; CHECK:       for.cond:
446; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
447; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
448; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
449; CHECK:       for.body:
450; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
451; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
452; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
453; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
454; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
455; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP11:![0-9]+]]
456; CHECK:       if.end:
457; CHECK-NEXT:    [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ]
458; CHECK-NEXT:    ret i32 [[EXIT]]
459;
460; TAILFOLD-LABEL: @multiple_unique_exit3(
461; TAILFOLD-NEXT:  entry:
462; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
463; TAILFOLD:       for.cond:
464; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
465; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
466; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
467; TAILFOLD:       for.body:
468; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
469; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
470; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
471; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
472; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
473; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
474; TAILFOLD:       if.end:
475; TAILFOLD-NEXT:    [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ]
476; TAILFOLD-NEXT:    ret i32 [[EXIT]]
477;
478entry:
479  br label %for.cond
480
481for.cond:
482  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
483  %cmp = icmp slt i32 %i, %n
484  br i1 %cmp, label %for.body, label %if.end
485
486for.body:
487  %iprom = sext i32 %i to i64
488  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
489  store i16 0, i16* %b, align 4
490  %inc = add nsw i32 %i, 1
491  %cmp2 = icmp slt i32 %i, 2096
492  br i1 %cmp2, label %for.cond, label %if.end
493
494if.end:
495  %exit = phi i32 [0, %for.cond], [1, %for.body]
496  ret i32 %exit
497}
498
499; multiple exits w/distinct target blocks
500define i32 @multiple_exit_blocks(i16* %p, i32 %n) {
501; CHECK-LABEL: @multiple_exit_blocks(
502; CHECK-NEXT:  entry:
503; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
504; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
505; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
506; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
507; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
508; CHECK:       vector.ph:
509; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
510; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
511; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
512; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
513; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
514; CHECK:       vector.body:
515; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
516; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
517; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
518; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
519; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
520; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
521; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
522; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
523; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
524; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
525; CHECK:       middle.block:
526; CHECK-NEXT:    br label [[SCALAR_PH]]
527; CHECK:       scalar.ph:
528; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
529; CHECK-NEXT:    br label [[FOR_COND:%.*]]
530; CHECK:       for.cond:
531; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
532; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
533; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
534; CHECK:       for.body:
535; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
536; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
537; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
538; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
539; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
540; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP13:![0-9]+]]
541; CHECK:       if.end:
542; CHECK-NEXT:    ret i32 0
543; CHECK:       if.end2:
544; CHECK-NEXT:    ret i32 1
545;
546; TAILFOLD-LABEL: @multiple_exit_blocks(
547; TAILFOLD-NEXT:  entry:
548; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
549; TAILFOLD:       for.cond:
550; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
551; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
552; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
553; TAILFOLD:       for.body:
554; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
555; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
556; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
557; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
558; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
559; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
560; TAILFOLD:       if.end:
561; TAILFOLD-NEXT:    ret i32 0
562; TAILFOLD:       if.end2:
563; TAILFOLD-NEXT:    ret i32 1
564;
565entry:
566  br label %for.cond
567
568for.cond:
569  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
570  %cmp = icmp slt i32 %i, %n
571  br i1 %cmp, label %for.body, label %if.end
572
573for.body:
574  %iprom = sext i32 %i to i64
575  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
576  store i16 0, i16* %b, align 4
577  %inc = add nsw i32 %i, 1
578  %cmp2 = icmp slt i32 %i, 2096
579  br i1 %cmp2, label %for.cond, label %if.end2
580
581if.end:
582  ret i32 0
583
584if.end2:
585  ret i32 1
586}
587
588; LCSSA, common value each exit
589define i32 @multiple_exit_blocks2(i16* %p, i32 %n) {
590; CHECK-LABEL: @multiple_exit_blocks2(
591; CHECK-NEXT:  entry:
592; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
593; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
594; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
595; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
596; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
597; CHECK:       vector.ph:
598; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
599; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
600; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
601; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
602; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
603; CHECK:       vector.body:
604; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
605; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
606; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
607; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
608; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
609; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
610; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
611; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
612; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
613; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
614; CHECK:       middle.block:
615; CHECK-NEXT:    br label [[SCALAR_PH]]
616; CHECK:       scalar.ph:
617; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
618; CHECK-NEXT:    br label [[FOR_COND:%.*]]
619; CHECK:       for.cond:
620; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
621; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
622; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
623; CHECK:       for.body:
624; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
625; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
626; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
627; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
628; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
629; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP15:![0-9]+]]
630; CHECK:       if.end:
631; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
632; CHECK-NEXT:    ret i32 [[I_LCSSA]]
633; CHECK:       if.end2:
634; CHECK-NEXT:    [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ]
635; CHECK-NEXT:    ret i32 [[I_LCSSA1]]
636;
637; TAILFOLD-LABEL: @multiple_exit_blocks2(
638; TAILFOLD-NEXT:  entry:
639; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
640; TAILFOLD:       for.cond:
641; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
642; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
643; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
644; TAILFOLD:       for.body:
645; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
646; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
647; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
648; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
649; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
650; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
651; TAILFOLD:       if.end:
652; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
653; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
654; TAILFOLD:       if.end2:
655; TAILFOLD-NEXT:    [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ]
656; TAILFOLD-NEXT:    ret i32 [[I_LCSSA1]]
657;
658entry:
659  br label %for.cond
660
661for.cond:
662  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
663  %cmp = icmp slt i32 %i, %n
664  br i1 %cmp, label %for.body, label %if.end
665
666for.body:
667  %iprom = sext i32 %i to i64
668  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
669  store i16 0, i16* %b, align 4
670  %inc = add nsw i32 %i, 1
671  %cmp2 = icmp slt i32 %i, 2096
672  br i1 %cmp2, label %for.cond, label %if.end2
673
674if.end:
675  ret i32 %i
676
677if.end2:
678  ret i32 %i
679}
680
681; LCSSA, distinct value each exit
682define i32 @multiple_exit_blocks3(i16* %p, i32 %n) {
683; CHECK-LABEL: @multiple_exit_blocks3(
684; CHECK-NEXT:  entry:
685; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
686; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
687; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
688; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
689; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
690; CHECK:       vector.ph:
691; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
692; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
693; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
694; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
695; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
696; CHECK:       vector.body:
697; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
698; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
699; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
700; CHECK-NEXT:    [[TMP5:%.*]] = sext i32 [[TMP3]] to i64
701; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP5]]
702; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
703; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <2 x i16>*
704; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP8]], align 4
705; CHECK-NEXT:    [[TMP9:%.*]] = add nsw <2 x i32> [[VEC_IND]], <i32 1, i32 1>
706; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
707; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
708; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
709; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
710; CHECK:       middle.block:
711; CHECK-NEXT:    br label [[SCALAR_PH]]
712; CHECK:       scalar.ph:
713; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
714; CHECK-NEXT:    br label [[FOR_COND:%.*]]
715; CHECK:       for.cond:
716; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
717; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
718; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
719; CHECK:       for.body:
720; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
721; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
722; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
723; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
724; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
725; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP17:![0-9]+]]
726; CHECK:       if.end:
727; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
728; CHECK-NEXT:    ret i32 [[I_LCSSA]]
729; CHECK:       if.end2:
730; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ]
731; CHECK-NEXT:    ret i32 [[INC_LCSSA]]
732;
733; TAILFOLD-LABEL: @multiple_exit_blocks3(
734; TAILFOLD-NEXT:  entry:
735; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
736; TAILFOLD:       for.cond:
737; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
738; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
739; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
740; TAILFOLD:       for.body:
741; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
742; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
743; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
744; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
745; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
746; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
747; TAILFOLD:       if.end:
748; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
749; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
750; TAILFOLD:       if.end2:
751; TAILFOLD-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ]
752; TAILFOLD-NEXT:    ret i32 [[INC_LCSSA]]
753;
754entry:
755  br label %for.cond
756
757for.cond:
758  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
759  %cmp = icmp slt i32 %i, %n
760  br i1 %cmp, label %for.body, label %if.end
761
762for.body:
763  %iprom = sext i32 %i to i64
764  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
765  store i16 0, i16* %b, align 4
766  %inc = add nsw i32 %i, 1
767  %cmp2 = icmp slt i32 %i, 2096
768  br i1 %cmp2, label %for.cond, label %if.end2
769
770if.end:
771  ret i32 %i
772
773if.end2:
774  ret i32 %inc
775}
776
777; unique exit case but with a switch as two edges between the same pair of
778; blocks is an often missed edge case
779define i32 @multiple_exit_switch(i16* %p, i32 %n) {
780; CHECK-LABEL: @multiple_exit_switch(
781; CHECK-NEXT:  entry:
782; CHECK-NEXT:    br label [[FOR_COND:%.*]]
783; CHECK:       for.cond:
784; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
785; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
786; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
787; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
788; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
789; CHECK-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
790; CHECK-NEXT:    i32 2096, label [[IF_END:%.*]]
791; CHECK-NEXT:    i32 2097, label [[IF_END]]
792; CHECK-NEXT:    ]
793; CHECK:       if.end:
794; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ]
795; CHECK-NEXT:    ret i32 [[I_LCSSA]]
796;
797; TAILFOLD-LABEL: @multiple_exit_switch(
798; TAILFOLD-NEXT:  entry:
799; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
800; TAILFOLD:       for.cond:
801; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
802; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
803; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
804; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
805; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
806; TAILFOLD-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
807; TAILFOLD-NEXT:    i32 2096, label [[IF_END:%.*]]
808; TAILFOLD-NEXT:    i32 2097, label [[IF_END]]
809; TAILFOLD-NEXT:    ]
810; TAILFOLD:       if.end:
811; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ]
812; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
813;
814entry:
815  br label %for.cond
816
817for.cond:
818  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
819  %iprom = sext i32 %i to i64
820  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
821  store i16 0, i16* %b, align 4
822  %inc = add nsw i32 %i, 1
823  switch i32 %i, label %for.cond [
824  i32 2096, label %if.end
825  i32 2097, label %if.end
826  ]
827
828if.end:
829  ret i32 %i
830}
831
832; multiple exit case but with a switch as multiple exiting edges from
833; a single block is a commonly missed edge case
834define i32 @multiple_exit_switch2(i16* %p, i32 %n) {
835; CHECK-LABEL: @multiple_exit_switch2(
836; CHECK-NEXT:  entry:
837; CHECK-NEXT:    br label [[FOR_COND:%.*]]
838; CHECK:       for.cond:
839; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
840; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
841; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
842; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
843; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
844; CHECK-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
845; CHECK-NEXT:    i32 2096, label [[IF_END:%.*]]
846; CHECK-NEXT:    i32 2097, label [[IF_END2:%.*]]
847; CHECK-NEXT:    ]
848; CHECK:       if.end:
849; CHECK-NEXT:    ret i32 0
850; CHECK:       if.end2:
851; CHECK-NEXT:    ret i32 1
852;
853; TAILFOLD-LABEL: @multiple_exit_switch2(
854; TAILFOLD-NEXT:  entry:
855; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
856; TAILFOLD:       for.cond:
857; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
858; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
859; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
860; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
861; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
862; TAILFOLD-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
863; TAILFOLD-NEXT:    i32 2096, label [[IF_END:%.*]]
864; TAILFOLD-NEXT:    i32 2097, label [[IF_END2:%.*]]
865; TAILFOLD-NEXT:    ]
866; TAILFOLD:       if.end:
867; TAILFOLD-NEXT:    ret i32 0
868; TAILFOLD:       if.end2:
869; TAILFOLD-NEXT:    ret i32 1
870;
871entry:
872  br label %for.cond
873
874for.cond:
875  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
876  %iprom = sext i32 %i to i64
877  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
878  store i16 0, i16* %b, align 4
879  %inc = add nsw i32 %i, 1
880  switch i32 %i, label %for.cond [
881  i32 2096, label %if.end
882  i32 2097, label %if.end2
883  ]
884
885if.end:
886  ret i32 0
887
888if.end2:
889  ret i32 1
890}
891
892define i32 @multiple_latch1(i16* %p) {
893; CHECK-LABEL: @multiple_latch1(
894; CHECK-NEXT:  entry:
895; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
896; CHECK:       for.body:
897; CHECK-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
898; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
899; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
900; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
901; CHECK:       for.second:
902; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
903; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
904; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
905; CHECK-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
906; CHECK-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
907; CHECK:       for.body.backedge:
908; CHECK-NEXT:    br label [[FOR_BODY]]
909; CHECK:       for.end:
910; CHECK-NEXT:    ret i32 0
911;
912; TAILFOLD-LABEL: @multiple_latch1(
913; TAILFOLD-NEXT:  entry:
914; TAILFOLD-NEXT:    br label [[FOR_BODY:%.*]]
915; TAILFOLD:       for.body:
916; TAILFOLD-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
917; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
918; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
919; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
920; TAILFOLD:       for.second:
921; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
922; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
923; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
924; TAILFOLD-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
925; TAILFOLD-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
926; TAILFOLD:       for.body.backedge:
927; TAILFOLD-NEXT:    br label [[FOR_BODY]]
928; TAILFOLD:       for.end:
929; TAILFOLD-NEXT:    ret i32 0
930;
931entry:
932  br label %for.body
933
934for.body:
935  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body.backedge]
936  %inc = add nsw i32 %i.02, 1
937  %cmp = icmp slt i32 %inc, 16
938  br i1 %cmp, label %for.body.backedge, label %for.second
939
940for.second:
941  %iprom = sext i32 %i.02 to i64
942  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
943  store i16 0, i16* %b, align 4
944  %cmps = icmp sgt i32 %inc, 16
945  br i1 %cmps, label %for.body.backedge, label %for.end
946
947for.body.backedge:
948  br label %for.body
949
950for.end:
951  ret i32 0
952}
953
954
955; two back branches - loop simplify with convert this to the same form
956; as previous before vectorizer sees it, but show that.
957define i32 @multiple_latch2(i16* %p) {
958; CHECK-LABEL: @multiple_latch2(
959; CHECK-NEXT:  entry:
960; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
961; CHECK:       for.body:
962; CHECK-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
963; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
964; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
965; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
966; CHECK:       for.body.backedge:
967; CHECK-NEXT:    br label [[FOR_BODY]]
968; CHECK:       for.second:
969; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
970; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
971; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
972; CHECK-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
973; CHECK-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
974; CHECK:       for.end:
975; CHECK-NEXT:    ret i32 0
976;
977; TAILFOLD-LABEL: @multiple_latch2(
978; TAILFOLD-NEXT:  entry:
979; TAILFOLD-NEXT:    br label [[FOR_BODY:%.*]]
980; TAILFOLD:       for.body:
981; TAILFOLD-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
982; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
983; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
984; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
985; TAILFOLD:       for.body.backedge:
986; TAILFOLD-NEXT:    br label [[FOR_BODY]]
987; TAILFOLD:       for.second:
988; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
989; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
990; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
991; TAILFOLD-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
992; TAILFOLD-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
993; TAILFOLD:       for.end:
994; TAILFOLD-NEXT:    ret i32 0
995;
996entry:
997  br label %for.body
998
999for.body:
1000  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ], [%inc, %for.second]
1001  %inc = add nsw i32 %i.02, 1
1002  %cmp = icmp slt i32 %inc, 16
1003  br i1 %cmp, label %for.body, label %for.second
1004
1005for.second:
1006  %iprom = sext i32 %i.02 to i64
1007  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
1008  store i16 0, i16* %b, align 4
1009  %cmps = icmp sgt i32 %inc, 16
1010  br i1 %cmps, label %for.body, label %for.end
1011
1012for.end:
1013  ret i32 0
1014}
1015
1016
1017; Check interaction between block predication and early exits.  We need the
1018; condition on the early exit to remain dead (i.e. not be used when forming
1019; the predicate mask).
1020define void @scalar_predication(float* %addr) {
1021; CHECK-LABEL: @scalar_predication(
1022; CHECK-NEXT:  entry:
1023; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1024; CHECK:       vector.ph:
1025; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1026; CHECK:       vector.body:
1027; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
1028; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1029; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[TMP0]]
1030; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, float* [[TMP1]], i32 0
1031; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <2 x float>*
1032; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
1033; CHECK-NEXT:    [[TMP4:%.*]] = fcmp oeq <2 x float> [[WIDE_LOAD]], zeroinitializer
1034; CHECK-NEXT:    [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true>
1035; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
1036; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
1037; CHECK:       pred.store.if:
1038; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr float, float* [[ADDR]], i64 [[TMP0]]
1039; CHECK-NEXT:    store float 1.000000e+01, float* [[TMP7]], align 4
1040; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
1041; CHECK:       pred.store.continue:
1042; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
1043; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
1044; CHECK:       pred.store.if1:
1045; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 1
1046; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr float, float* [[ADDR]], i64 [[TMP9]]
1047; CHECK-NEXT:    store float 1.000000e+01, float* [[TMP10]], align 4
1048; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
1049; CHECK:       pred.store.continue2:
1050; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1051; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
1052; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
1053; CHECK:       middle.block:
1054; CHECK-NEXT:    br label [[SCALAR_PH]]
1055; CHECK:       scalar.ph:
1056; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1057; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
1058; CHECK:       loop.header:
1059; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1060; CHECK-NEXT:    [[GEP:%.*]] = getelementptr float, float* [[ADDR]], i64 [[IV]]
1061; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1062; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]]
1063; CHECK:       loop.body:
1064; CHECK-NEXT:    [[TMP12:%.*]] = load float, float* [[GEP]], align 4
1065; CHECK-NEXT:    [[PRED:%.*]] = fcmp oeq float [[TMP12]], 0.000000e+00
1066; CHECK-NEXT:    br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]]
1067; CHECK:       then:
1068; CHECK-NEXT:    store float 1.000000e+01, float* [[GEP]], align 4
1069; CHECK-NEXT:    br label [[LOOP_LATCH]]
1070; CHECK:       loop.latch:
1071; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1072; CHECK-NEXT:    br label [[LOOP_HEADER]], !llvm.loop [[LOOP19:![0-9]+]]
1073; CHECK:       exit:
1074; CHECK-NEXT:    ret void
1075;
1076; TAILFOLD-LABEL: @scalar_predication(
1077; TAILFOLD-NEXT:  entry:
1078; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1079; TAILFOLD:       loop.header:
1080; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1081; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[IV]]
1082; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1083; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]]
1084; TAILFOLD:       loop.body:
1085; TAILFOLD-NEXT:    [[TMP0:%.*]] = load float, float* [[GEP]], align 4
1086; TAILFOLD-NEXT:    [[PRED:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00
1087; TAILFOLD-NEXT:    br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]]
1088; TAILFOLD:       then:
1089; TAILFOLD-NEXT:    store float 1.000000e+01, float* [[GEP]], align 4
1090; TAILFOLD-NEXT:    br label [[LOOP_LATCH]]
1091; TAILFOLD:       loop.latch:
1092; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1093; TAILFOLD-NEXT:    br label [[LOOP_HEADER]]
1094; TAILFOLD:       exit:
1095; TAILFOLD-NEXT:    ret void
1096;
1097entry:
1098  br label %loop.header
1099
1100loop.header:
1101  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1102  %gep = getelementptr float, float* %addr, i64 %iv
1103  %exitcond.not = icmp eq i64 %iv, 200
1104  br i1 %exitcond.not, label %exit, label %loop.body
1105
1106loop.body:
1107  %0 = load float, float* %gep, align 4
1108  %pred = fcmp oeq float %0, 0.0
1109  br i1 %pred, label %loop.latch, label %then
1110
1111then:
1112  store float 10.0, float* %gep, align 4
1113  br label %loop.latch
1114
1115loop.latch:
1116  %iv.next = add nuw nsw i64 %iv, 1
1117  br label %loop.header
1118
1119exit:
1120  ret void
1121}
1122
1123define i32 @me_reduction(i32* %addr) {
1124; CHECK-LABEL: @me_reduction(
1125; CHECK-NEXT:  entry:
1126; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1127; CHECK:       vector.ph:
1128; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1129; CHECK:       vector.body:
1130; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1131; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
1132; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1133; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[TMP0]]
1134; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 0
1135; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>*
1136; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP4]], align 4
1137; CHECK-NEXT:    [[TMP5]] = add <2 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
1138; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1139; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
1140; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
1141; CHECK:       middle.block:
1142; CHECK-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP5]])
1143; CHECK-NEXT:    br label [[SCALAR_PH]]
1144; CHECK:       scalar.ph:
1145; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1146; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
1147; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
1148; CHECK:       loop.header:
1149; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1150; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1151; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR]], i64 [[IV]]
1152; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1153; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1154; CHECK:       loop.latch:
1155; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[GEP]], align 4
1156; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP8]]
1157; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1158; CHECK-NEXT:    [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400
1159; CHECK-NEXT:    br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP21:![0-9]+]]
1160; CHECK:       exit:
1161; CHECK-NEXT:    [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ]
1162; CHECK-NEXT:    ret i32 [[LCSSA]]
1163;
1164; TAILFOLD-LABEL: @me_reduction(
1165; TAILFOLD-NEXT:  entry:
1166; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1167; TAILFOLD:       loop.header:
1168; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1169; TAILFOLD-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1170; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
1171; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1172; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1173; TAILFOLD:       loop.latch:
1174; TAILFOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
1175; TAILFOLD-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1176; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1177; TAILFOLD-NEXT:    [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400
1178; TAILFOLD-NEXT:    br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]]
1179; TAILFOLD:       exit:
1180; TAILFOLD-NEXT:    [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ]
1181; TAILFOLD-NEXT:    ret i32 [[LCSSA]]
1182;
1183entry:
1184  br label %loop.header
1185
1186loop.header:
1187  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1188  %accum = phi i32 [0, %entry], [%accum.next, %loop.latch]
1189  %gep = getelementptr i32, i32* %addr, i64 %iv
1190  %exitcond.not = icmp eq i64 %iv, 200
1191  br i1 %exitcond.not, label %exit, label %loop.latch
1192
1193loop.latch:
1194  %0 = load i32, i32* %gep, align 4
1195  %accum.next = add i32 %accum, %0
1196  %iv.next = add nuw nsw i64 %iv, 1
1197  %exitcond2.not = icmp eq i64 %iv, 400
1198  br i1 %exitcond2.not, label %exit, label %loop.header
1199
1200exit:
1201  %lcssa = phi i32 [0, %loop.header], [%accum.next, %loop.latch]
1202  ret i32 %lcssa
1203}
1204
1205; TODO: The current definition of reduction is too strict, we can vectorize
1206; this.  There's an analogous single exit case where we extract the N-1
1207; value of the reduction that we can also handle.  If we fix the later, the
1208; multiple exit case probably falls out.
1209define i32 @me_reduction2(i32* %addr) {
1210; CHECK-LABEL: @me_reduction2(
1211; CHECK-NEXT:  entry:
1212; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
1213; CHECK:       loop.header:
1214; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1215; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1216; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
1217; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1218; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1219; CHECK:       loop.latch:
1220; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
1221; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1222; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1223; CHECK-NEXT:    br label [[LOOP_HEADER]]
1224; CHECK:       exit:
1225; CHECK-NEXT:    [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ]
1226; CHECK-NEXT:    ret i32 [[ACCUM_LCSSA]]
1227;
1228; TAILFOLD-LABEL: @me_reduction2(
1229; TAILFOLD-NEXT:  entry:
1230; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1231; TAILFOLD:       loop.header:
1232; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1233; TAILFOLD-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1234; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
1235; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1236; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1237; TAILFOLD:       loop.latch:
1238; TAILFOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
1239; TAILFOLD-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1240; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1241; TAILFOLD-NEXT:    br label [[LOOP_HEADER]]
1242; TAILFOLD:       exit:
1243; TAILFOLD-NEXT:    [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ]
1244; TAILFOLD-NEXT:    ret i32 [[ACCUM_LCSSA]]
1245;
1246entry:
1247  br label %loop.header
1248
1249loop.header:
1250  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1251  %accum = phi i32 [0, %entry], [%accum.next, %loop.latch]
1252  %gep = getelementptr i32, i32* %addr, i64 %iv
1253  %exitcond.not = icmp eq i64 %iv, 200
1254  br i1 %exitcond.not, label %exit, label %loop.latch
1255
1256loop.latch:
1257  %0 = load i32, i32* %gep, align 4
1258  %accum.next = add i32 %accum, %0
1259  %iv.next = add nuw nsw i64 %iv, 1
1260  br label %loop.header
1261
1262exit:
1263  ret i32 %accum
1264}
1265
1266