1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -loop-vectorize -force-vector-width=2 < %s | FileCheck %s
3; RUN: opt -S -loop-vectorize -force-vector-width=2  -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck --check-prefix TAILFOLD %s
4
5target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
6
7define void @bottom_tested(i16* %p, i32 %n) {
8; CHECK-LABEL: @bottom_tested(
9; CHECK-NEXT:  entry:
10; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
11; CHECK-NEXT:    [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1
12; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2
13; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
14; CHECK:       vector.ph:
15; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
16; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
17; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
18; CHECK:       vector.body:
19; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
20; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 0
21; CHECK-NEXT:    [[TMP2:%.*]] = sext i32 [[TMP1]] to i64
22; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP2]]
23; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 0
24; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <2 x i16>*
25; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP5]], align 4
26; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
27; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
28; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
29; CHECK:       middle.block:
30; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
31; CHECK-NEXT:    br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
32; CHECK:       scalar.ph:
33; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
34; CHECK-NEXT:    br label [[FOR_COND:%.*]]
35; CHECK:       for.cond:
36; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
37; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
38; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
39; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
40; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
41; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
42; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP2:![0-9]+]]
43; CHECK:       if.end:
44; CHECK-NEXT:    ret void
45;
46; TAILFOLD-LABEL: @bottom_tested(
47; TAILFOLD-NEXT:  entry:
48; TAILFOLD-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
49; TAILFOLD-NEXT:    [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1
50; TAILFOLD-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
51; TAILFOLD:       vector.ph:
52; TAILFOLD-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP0]], 1
53; TAILFOLD-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2
54; TAILFOLD-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
55; TAILFOLD-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP0]], 1
56; TAILFOLD-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
57; TAILFOLD-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
58; TAILFOLD-NEXT:    br label [[VECTOR_BODY:%.*]]
59; TAILFOLD:       vector.body:
60; TAILFOLD-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
61; TAILFOLD-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
62; TAILFOLD-NEXT:    [[TMP1:%.*]] = icmp ule <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
63; TAILFOLD-NEXT:    [[TMP2:%.*]] = sext <2 x i32> [[VEC_IND]] to <2 x i64>
64; TAILFOLD-NEXT:    [[TMP3:%.*]] = extractelement <2 x i1> [[TMP1]], i32 0
65; TAILFOLD-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
66; TAILFOLD:       pred.store.if:
67; TAILFOLD-NEXT:    [[TMP4:%.*]] = extractelement <2 x i64> [[TMP2]], i32 0
68; TAILFOLD-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]]
69; TAILFOLD-NEXT:    store i16 0, i16* [[TMP5]], align 4
70; TAILFOLD-NEXT:    br label [[PRED_STORE_CONTINUE]]
71; TAILFOLD:       pred.store.continue:
72; TAILFOLD-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP1]], i32 1
73; TAILFOLD-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
74; TAILFOLD:       pred.store.if1:
75; TAILFOLD-NEXT:    [[TMP7:%.*]] = extractelement <2 x i64> [[TMP2]], i32 1
76; TAILFOLD-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[TMP7]]
77; TAILFOLD-NEXT:    store i16 0, i16* [[TMP8]], align 4
78; TAILFOLD-NEXT:    br label [[PRED_STORE_CONTINUE2]]
79; TAILFOLD:       pred.store.continue2:
80; TAILFOLD-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
81; TAILFOLD-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
82; TAILFOLD-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
83; TAILFOLD-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
84; TAILFOLD:       middle.block:
85; TAILFOLD-NEXT:    br i1 true, label [[IF_END:%.*]], label [[SCALAR_PH]]
86; TAILFOLD:       scalar.ph:
87; TAILFOLD-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
88; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
89; TAILFOLD:       for.cond:
90; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
91; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
92; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
93; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
94; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
95; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
96; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP2:![0-9]+]]
97; TAILFOLD:       if.end:
98; TAILFOLD-NEXT:    ret void
99;
100entry:
101  br label %for.cond
102
103for.cond:
104  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
105  %iprom = sext i32 %i to i64
106  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
107  store i16 0, i16* %b, align 4
108  %inc = add nsw i32 %i, 1
109  %cmp = icmp slt i32 %i, %n
110  br i1 %cmp, label %for.cond, label %if.end
111
112if.end:
113  ret void
114}
115
116define void @early_exit(i16* %p, i32 %n) {
117; CHECK-LABEL: @early_exit(
118; CHECK-NEXT:  entry:
119; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
120; CHECK-NEXT:    [[TMP0:%.*]] = add nuw i32 [[SMAX]], 1
121; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
122; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
123; CHECK:       vector.ph:
124; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
125; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
126; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
127; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
128; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
129; CHECK:       vector.body:
130; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
131; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
132; CHECK-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
133; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]]
134; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0
135; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>*
136; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4
137; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
138; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
139; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
140; CHECK:       middle.block:
141; CHECK-NEXT:    br label [[SCALAR_PH]]
142; CHECK:       scalar.ph:
143; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
144; CHECK-NEXT:    br label [[FOR_COND:%.*]]
145; CHECK:       for.cond:
146; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
147; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
148; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
149; CHECK:       for.body:
150; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
151; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
152; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
153; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
154; CHECK-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]]
155; CHECK:       if.end:
156; CHECK-NEXT:    ret void
157;
158; TAILFOLD-LABEL: @early_exit(
159; TAILFOLD-NEXT:  entry:
160; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
161; TAILFOLD:       for.cond:
162; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
163; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
164; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
165; TAILFOLD:       for.body:
166; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
167; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
168; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
169; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
170; TAILFOLD-NEXT:    br label [[FOR_COND]]
171; TAILFOLD:       if.end:
172; TAILFOLD-NEXT:    ret void
173;
174entry:
175  br label %for.cond
176
177for.cond:
178  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
179  %cmp = icmp slt i32 %i, %n
180  br i1 %cmp, label %for.body, label %if.end
181
182for.body:
183  %iprom = sext i32 %i to i64
184  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
185  store i16 0, i16* %b, align 4
186  %inc = add nsw i32 %i, 1
187  br label %for.cond
188
189if.end:
190  ret void
191}
192
193define i32 @early_exit_with_live_out(i32* %ptr) {
194; CHECK-LABEL: @early_exit_with_live_out(
195; CHECK-NEXT:  entry:
196; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
197; CHECK:       vector.ph:
198; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
199; CHECK:       vector.body:
200; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
201; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
202; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[TMP0]]
203; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], i32 0
204; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>*
205; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP3]], align 4
206; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>*
207; CHECK-NEXT:    store <2 x i32> <i32 10, i32 10>, <2 x i32>* [[TMP4]], align 4
208; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
209; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 998
210; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
211; CHECK:       middle.block:
212; CHECK-NEXT:    br label [[SCALAR_PH]]
213; CHECK:       scalar.ph:
214; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 998, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
215; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
216; CHECK:       loop.header:
217; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
218; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[PTR]], i64 [[IV]]
219; CHECK-NEXT:    [[L:%.*]] = load i32, i32* [[GEP]], align 4
220; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
221; CHECK-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
222; CHECK-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
223; CHECK:       loop.latch:
224; CHECK-NEXT:    store i32 10, i32* [[GEP]], align 4
225; CHECK-NEXT:    br label [[LOOP_HEADER]], !llvm.loop [[LOOP7:![0-9]+]]
226; CHECK:       exit:
227; CHECK-NEXT:    [[RES_LCSSA:%.*]] = phi i32 [ [[L]], [[LOOP_HEADER]] ]
228; CHECK-NEXT:    ret i32 [[RES_LCSSA]]
229;
230; TAILFOLD-LABEL: @early_exit_with_live_out(
231; TAILFOLD-NEXT:  entry:
232; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
233; TAILFOLD:       loop.header:
234; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
235; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[PTR:%.*]], i64 [[IV]]
236; TAILFOLD-NEXT:    [[L:%.*]] = load i32, i32* [[GEP]], align 4
237; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
238; TAILFOLD-NEXT:    [[EC:%.*]] = icmp eq i64 [[IV_NEXT]], 1000
239; TAILFOLD-NEXT:    br i1 [[EC]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
240; TAILFOLD:       loop.latch:
241; TAILFOLD-NEXT:    store i32 10, i32* [[GEP]], align 4
242; TAILFOLD-NEXT:    br label [[LOOP_HEADER]]
243; TAILFOLD:       exit:
244; TAILFOLD-NEXT:    [[RES_LCSSA:%.*]] = phi i32 [ [[L]], [[LOOP_HEADER]] ]
245; TAILFOLD-NEXT:    ret i32 [[RES_LCSSA]]
246;
247entry:
248  br label %loop.header
249
250loop.header:
251  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
252  %gep = getelementptr i32, i32* %ptr, i64 %iv
253  %l = load i32, i32* %gep
254  %iv.next = add nuw nsw i64 %iv, 1
255  %ec = icmp eq i64 %iv.next, 1000
256  br i1 %ec, label %exit, label %loop.latch
257
258loop.latch:
259  store i32 10, i32* %gep
260  br label %loop.header
261
262exit:
263  %res.lcssa = phi i32 [ %l, %loop.header ]
264  ret i32 %res.lcssa
265}
266
267; Same as early_exit, but with optsize to prevent the use of
268; a scalar epilogue.  -- Can't vectorize this in either case.
269define void @optsize(i16* %p, i32 %n) optsize {
270; CHECK-LABEL: @optsize(
271; CHECK-NEXT:  entry:
272; CHECK-NEXT:    br label [[FOR_COND:%.*]]
273; CHECK:       for.cond:
274; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
275; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
276; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
277; CHECK:       for.body:
278; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
279; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
280; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
281; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
282; CHECK-NEXT:    br label [[FOR_COND]]
283; CHECK:       if.end:
284; CHECK-NEXT:    ret void
285;
286; TAILFOLD-LABEL: @optsize(
287; TAILFOLD-NEXT:  entry:
288; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
289; TAILFOLD:       for.cond:
290; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
291; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
292; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
293; TAILFOLD:       for.body:
294; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
295; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
296; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
297; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
298; TAILFOLD-NEXT:    br label [[FOR_COND]]
299; TAILFOLD:       if.end:
300; TAILFOLD-NEXT:    ret void
301;
302entry:
303  br label %for.cond
304
305for.cond:
306  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
307  %cmp = icmp slt i32 %i, %n
308  br i1 %cmp, label %for.body, label %if.end
309
310for.body:
311  %iprom = sext i32 %i to i64
312  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
313  store i16 0, i16* %b, align 4
314  %inc = add nsw i32 %i, 1
315  br label %for.cond
316
317if.end:
318  ret void
319}
320
321
322; multiple exit - no values inside the loop used outside
323define void @multiple_unique_exit(i16* %p, i32 %n) {
324; CHECK-LABEL: @multiple_unique_exit(
325; CHECK-NEXT:  entry:
326; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
327; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
328; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
329; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
330; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
331; CHECK:       vector.ph:
332; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
333; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
334; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
335; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
336; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
337; CHECK:       vector.body:
338; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
339; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
340; CHECK-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
341; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]]
342; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0
343; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>*
344; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4
345; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
346; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
347; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
348; CHECK:       middle.block:
349; CHECK-NEXT:    br label [[SCALAR_PH]]
350; CHECK:       scalar.ph:
351; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
352; CHECK-NEXT:    br label [[FOR_COND:%.*]]
353; CHECK:       for.cond:
354; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
355; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
356; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
357; CHECK:       for.body:
358; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
359; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
360; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
361; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
362; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
363; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP9:![0-9]+]]
364; CHECK:       if.end:
365; CHECK-NEXT:    ret void
366;
367; TAILFOLD-LABEL: @multiple_unique_exit(
368; TAILFOLD-NEXT:  entry:
369; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
370; TAILFOLD:       for.cond:
371; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
372; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
373; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
374; TAILFOLD:       for.body:
375; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
376; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
377; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
378; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
379; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
380; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
381; TAILFOLD:       if.end:
382; TAILFOLD-NEXT:    ret void
383;
384entry:
385  br label %for.cond
386
387for.cond:
388  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
389  %cmp = icmp slt i32 %i, %n
390  br i1 %cmp, label %for.body, label %if.end
391
392for.body:
393  %iprom = sext i32 %i to i64
394  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
395  store i16 0, i16* %b, align 4
396  %inc = add nsw i32 %i, 1
397  %cmp2 = icmp slt i32 %i, 2096
398  br i1 %cmp2, label %for.cond, label %if.end
399
400if.end:
401  ret void
402}
403
404; multiple exit - with an lcssa phi
405define i32 @multiple_unique_exit2(i16* %p, i32 %n) {
406; CHECK-LABEL: @multiple_unique_exit2(
407; CHECK-NEXT:  entry:
408; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
409; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
410; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
411; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
412; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
413; CHECK:       vector.ph:
414; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
415; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
416; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
417; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
418; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
419; CHECK:       vector.body:
420; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
421; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
422; CHECK-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
423; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]]
424; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0
425; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>*
426; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4
427; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
428; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
429; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
430; CHECK:       middle.block:
431; CHECK-NEXT:    br label [[SCALAR_PH]]
432; CHECK:       scalar.ph:
433; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
434; CHECK-NEXT:    br label [[FOR_COND:%.*]]
435; CHECK:       for.cond:
436; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
437; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
438; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
439; CHECK:       for.body:
440; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
441; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
442; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
443; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
444; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
445; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP11:![0-9]+]]
446; CHECK:       if.end:
447; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ]
448; CHECK-NEXT:    ret i32 [[I_LCSSA]]
449;
450; TAILFOLD-LABEL: @multiple_unique_exit2(
451; TAILFOLD-NEXT:  entry:
452; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
453; TAILFOLD:       for.cond:
454; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
455; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
456; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
457; TAILFOLD:       for.body:
458; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
459; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
460; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
461; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
462; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
463; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
464; TAILFOLD:       if.end:
465; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ]
466; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
467;
468entry:
469  br label %for.cond
470
471for.cond:
472  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
473  %cmp = icmp slt i32 %i, %n
474  br i1 %cmp, label %for.body, label %if.end
475
476for.body:
477  %iprom = sext i32 %i to i64
478  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
479  store i16 0, i16* %b, align 4
480  %inc = add nsw i32 %i, 1
481  %cmp2 = icmp slt i32 %i, 2096
482  br i1 %cmp2, label %for.cond, label %if.end
483
484if.end:
485  ret i32 %i
486}
487
488; multiple exit w/a non lcssa phi
489define i32 @multiple_unique_exit3(i16* %p, i32 %n) {
490; CHECK-LABEL: @multiple_unique_exit3(
491; CHECK-NEXT:  entry:
492; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
493; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
494; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
495; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
496; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
497; CHECK:       vector.ph:
498; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
499; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
500; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
501; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
502; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
503; CHECK:       vector.body:
504; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
505; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
506; CHECK-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
507; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]]
508; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0
509; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>*
510; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4
511; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
512; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
513; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
514; CHECK:       middle.block:
515; CHECK-NEXT:    br label [[SCALAR_PH]]
516; CHECK:       scalar.ph:
517; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
518; CHECK-NEXT:    br label [[FOR_COND:%.*]]
519; CHECK:       for.cond:
520; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
521; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
522; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
523; CHECK:       for.body:
524; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
525; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
526; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
527; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
528; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
529; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], !llvm.loop [[LOOP13:![0-9]+]]
530; CHECK:       if.end:
531; CHECK-NEXT:    [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ]
532; CHECK-NEXT:    ret i32 [[EXIT]]
533;
534; TAILFOLD-LABEL: @multiple_unique_exit3(
535; TAILFOLD-NEXT:  entry:
536; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
537; TAILFOLD:       for.cond:
538; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
539; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
540; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
541; TAILFOLD:       for.body:
542; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
543; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
544; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
545; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
546; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
547; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
548; TAILFOLD:       if.end:
549; TAILFOLD-NEXT:    [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ]
550; TAILFOLD-NEXT:    ret i32 [[EXIT]]
551;
552entry:
553  br label %for.cond
554
555for.cond:
556  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
557  %cmp = icmp slt i32 %i, %n
558  br i1 %cmp, label %for.body, label %if.end
559
560for.body:
561  %iprom = sext i32 %i to i64
562  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
563  store i16 0, i16* %b, align 4
564  %inc = add nsw i32 %i, 1
565  %cmp2 = icmp slt i32 %i, 2096
566  br i1 %cmp2, label %for.cond, label %if.end
567
568if.end:
569  %exit = phi i32 [0, %for.cond], [1, %for.body]
570  ret i32 %exit
571}
572
573; multiple exits w/distinct target blocks
574define i32 @multiple_exit_blocks(i16* %p, i32 %n) {
575; CHECK-LABEL: @multiple_exit_blocks(
576; CHECK-NEXT:  entry:
577; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
578; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
579; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
580; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
581; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
582; CHECK:       vector.ph:
583; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
584; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
585; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
586; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
587; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
588; CHECK:       vector.body:
589; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
590; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
591; CHECK-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
592; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]]
593; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0
594; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>*
595; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4
596; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
597; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
598; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
599; CHECK:       middle.block:
600; CHECK-NEXT:    br label [[SCALAR_PH]]
601; CHECK:       scalar.ph:
602; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
603; CHECK-NEXT:    br label [[FOR_COND:%.*]]
604; CHECK:       for.cond:
605; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
606; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
607; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
608; CHECK:       for.body:
609; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
610; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
611; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
612; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
613; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
614; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP15:![0-9]+]]
615; CHECK:       if.end:
616; CHECK-NEXT:    ret i32 0
617; CHECK:       if.end2:
618; CHECK-NEXT:    ret i32 1
619;
620; TAILFOLD-LABEL: @multiple_exit_blocks(
621; TAILFOLD-NEXT:  entry:
622; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
623; TAILFOLD:       for.cond:
624; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
625; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
626; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
627; TAILFOLD:       for.body:
628; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
629; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
630; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
631; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
632; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
633; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
634; TAILFOLD:       if.end:
635; TAILFOLD-NEXT:    ret i32 0
636; TAILFOLD:       if.end2:
637; TAILFOLD-NEXT:    ret i32 1
638;
639entry:
640  br label %for.cond
641
642for.cond:
643  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
644  %cmp = icmp slt i32 %i, %n
645  br i1 %cmp, label %for.body, label %if.end
646
647for.body:
648  %iprom = sext i32 %i to i64
649  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
650  store i16 0, i16* %b, align 4
651  %inc = add nsw i32 %i, 1
652  %cmp2 = icmp slt i32 %i, 2096
653  br i1 %cmp2, label %for.cond, label %if.end2
654
655if.end:
656  ret i32 0
657
658if.end2:
659  ret i32 1
660}
661
662; LCSSA, common value each exit
663define i32 @multiple_exit_blocks2(i16* %p, i32 %n) {
664; CHECK-LABEL: @multiple_exit_blocks2(
665; CHECK-NEXT:  entry:
666; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
667; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
668; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
669; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
670; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
671; CHECK:       vector.ph:
672; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
673; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
674; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
675; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
676; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
677; CHECK:       vector.body:
678; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
679; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
680; CHECK-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
681; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]]
682; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0
683; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>*
684; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4
685; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
686; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
687; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
688; CHECK:       middle.block:
689; CHECK-NEXT:    br label [[SCALAR_PH]]
690; CHECK:       scalar.ph:
691; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
692; CHECK-NEXT:    br label [[FOR_COND:%.*]]
693; CHECK:       for.cond:
694; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
695; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
696; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
697; CHECK:       for.body:
698; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
699; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
700; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
701; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
702; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
703; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP17:![0-9]+]]
704; CHECK:       if.end:
705; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
706; CHECK-NEXT:    ret i32 [[I_LCSSA]]
707; CHECK:       if.end2:
708; CHECK-NEXT:    [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ]
709; CHECK-NEXT:    ret i32 [[I_LCSSA1]]
710;
711; TAILFOLD-LABEL: @multiple_exit_blocks2(
712; TAILFOLD-NEXT:  entry:
713; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
714; TAILFOLD:       for.cond:
715; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
716; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
717; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
718; TAILFOLD:       for.body:
719; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
720; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
721; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
722; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
723; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
724; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
725; TAILFOLD:       if.end:
726; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
727; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
728; TAILFOLD:       if.end2:
729; TAILFOLD-NEXT:    [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ]
730; TAILFOLD-NEXT:    ret i32 [[I_LCSSA1]]
731;
732entry:
733  br label %for.cond
734
735for.cond:
736  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
737  %cmp = icmp slt i32 %i, %n
738  br i1 %cmp, label %for.body, label %if.end
739
740for.body:
741  %iprom = sext i32 %i to i64
742  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
743  store i16 0, i16* %b, align 4
744  %inc = add nsw i32 %i, 1
745  %cmp2 = icmp slt i32 %i, 2096
746  br i1 %cmp2, label %for.cond, label %if.end2
747
748if.end:
749  ret i32 %i
750
751if.end2:
752  ret i32 %i
753}
754
755; LCSSA, distinct value each exit
756define i32 @multiple_exit_blocks3(i16* %p, i32 %n) {
757; CHECK-LABEL: @multiple_exit_blocks3(
758; CHECK-NEXT:  entry:
759; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 0)
760; CHECK-NEXT:    [[UMIN:%.*]] = call i32 @llvm.umin.i32(i32 [[SMAX]], i32 2096)
761; CHECK-NEXT:    [[TMP0:%.*]] = add nuw nsw i32 [[UMIN]], 1
762; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP0]], 2
763; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
764; CHECK:       vector.ph:
765; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
766; CHECK-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
767; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], i32 2, i32 [[N_MOD_VF]]
768; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[TMP2]]
769; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
770; CHECK:       vector.body:
771; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
772; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 0
773; CHECK-NEXT:    [[TMP4:%.*]] = sext i32 [[TMP3]] to i64
774; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP4]]
775; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0
776; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <2 x i16>*
777; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP7]], align 4
778; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
779; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
780; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
781; CHECK:       middle.block:
782; CHECK-NEXT:    br label [[SCALAR_PH]]
783; CHECK:       scalar.ph:
784; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
785; CHECK-NEXT:    br label [[FOR_COND:%.*]]
786; CHECK:       for.cond:
787; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
788; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
789; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
790; CHECK:       for.body:
791; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
792; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
793; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
794; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
795; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
796; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]], !llvm.loop [[LOOP19:![0-9]+]]
797; CHECK:       if.end:
798; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
799; CHECK-NEXT:    ret i32 [[I_LCSSA]]
800; CHECK:       if.end2:
801; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ]
802; CHECK-NEXT:    ret i32 [[INC_LCSSA]]
803;
804; TAILFOLD-LABEL: @multiple_exit_blocks3(
805; TAILFOLD-NEXT:  entry:
806; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
807; TAILFOLD:       for.cond:
808; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
809; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
810; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
811; TAILFOLD:       for.body:
812; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
813; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
814; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
815; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
816; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
817; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
818; TAILFOLD:       if.end:
819; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
820; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
821; TAILFOLD:       if.end2:
822; TAILFOLD-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ]
823; TAILFOLD-NEXT:    ret i32 [[INC_LCSSA]]
824;
825entry:
826  br label %for.cond
827
828for.cond:
829  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
830  %cmp = icmp slt i32 %i, %n
831  br i1 %cmp, label %for.body, label %if.end
832
833for.body:
834  %iprom = sext i32 %i to i64
835  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
836  store i16 0, i16* %b, align 4
837  %inc = add nsw i32 %i, 1
838  %cmp2 = icmp slt i32 %i, 2096
839  br i1 %cmp2, label %for.cond, label %if.end2
840
841if.end:
842  ret i32 %i
843
844if.end2:
845  ret i32 %inc
846}
847
848; unique exit case but with a switch as two edges between the same pair of
849; blocks is an often missed edge case
850define i32 @multiple_exit_switch(i16* %p, i32 %n) {
851; CHECK-LABEL: @multiple_exit_switch(
852; CHECK-NEXT:  entry:
853; CHECK-NEXT:    br label [[FOR_COND:%.*]]
854; CHECK:       for.cond:
855; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
856; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
857; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
858; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
859; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
860; CHECK-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
861; CHECK-NEXT:    i32 2096, label [[IF_END:%.*]]
862; CHECK-NEXT:    i32 2097, label [[IF_END]]
863; CHECK-NEXT:    ]
864; CHECK:       if.end:
865; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ]
866; CHECK-NEXT:    ret i32 [[I_LCSSA]]
867;
868; TAILFOLD-LABEL: @multiple_exit_switch(
869; TAILFOLD-NEXT:  entry:
870; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
871; TAILFOLD:       for.cond:
872; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
873; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
874; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
875; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
876; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
877; TAILFOLD-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
878; TAILFOLD-NEXT:    i32 2096, label [[IF_END:%.*]]
879; TAILFOLD-NEXT:    i32 2097, label [[IF_END]]
880; TAILFOLD-NEXT:    ]
881; TAILFOLD:       if.end:
882; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ]
883; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
884;
885entry:
886  br label %for.cond
887
888for.cond:
889  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
890  %iprom = sext i32 %i to i64
891  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
892  store i16 0, i16* %b, align 4
893  %inc = add nsw i32 %i, 1
894  switch i32 %i, label %for.cond [
895  i32 2096, label %if.end
896  i32 2097, label %if.end
897  ]
898
899if.end:
900  ret i32 %i
901}
902
903; multiple exit case but with a switch as multiple exiting edges from
904; a single block is a commonly missed edge case
905define i32 @multiple_exit_switch2(i16* %p, i32 %n) {
906; CHECK-LABEL: @multiple_exit_switch2(
907; CHECK-NEXT:  entry:
908; CHECK-NEXT:    br label [[FOR_COND:%.*]]
909; CHECK:       for.cond:
910; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
911; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
912; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
913; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
914; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
915; CHECK-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
916; CHECK-NEXT:    i32 2096, label [[IF_END:%.*]]
917; CHECK-NEXT:    i32 2097, label [[IF_END2:%.*]]
918; CHECK-NEXT:    ]
919; CHECK:       if.end:
920; CHECK-NEXT:    ret i32 0
921; CHECK:       if.end2:
922; CHECK-NEXT:    ret i32 1
923;
924; TAILFOLD-LABEL: @multiple_exit_switch2(
925; TAILFOLD-NEXT:  entry:
926; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
927; TAILFOLD:       for.cond:
928; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
929; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
930; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
931; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
932; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
933; TAILFOLD-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
934; TAILFOLD-NEXT:    i32 2096, label [[IF_END:%.*]]
935; TAILFOLD-NEXT:    i32 2097, label [[IF_END2:%.*]]
936; TAILFOLD-NEXT:    ]
937; TAILFOLD:       if.end:
938; TAILFOLD-NEXT:    ret i32 0
939; TAILFOLD:       if.end2:
940; TAILFOLD-NEXT:    ret i32 1
941;
942entry:
943  br label %for.cond
944
945for.cond:
946  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
947  %iprom = sext i32 %i to i64
948  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
949  store i16 0, i16* %b, align 4
950  %inc = add nsw i32 %i, 1
951  switch i32 %i, label %for.cond [
952  i32 2096, label %if.end
953  i32 2097, label %if.end2
954  ]
955
956if.end:
957  ret i32 0
958
959if.end2:
960  ret i32 1
961}
962
963define i32 @multiple_latch1(i16* %p) {
964; CHECK-LABEL: @multiple_latch1(
965; CHECK-NEXT:  entry:
966; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
967; CHECK:       for.body:
968; CHECK-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
969; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
970; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
971; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
972; CHECK:       for.second:
973; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
974; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
975; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
976; CHECK-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
977; CHECK-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
978; CHECK:       for.body.backedge:
979; CHECK-NEXT:    br label [[FOR_BODY]]
980; CHECK:       for.end:
981; CHECK-NEXT:    ret i32 0
982;
983; TAILFOLD-LABEL: @multiple_latch1(
984; TAILFOLD-NEXT:  entry:
985; TAILFOLD-NEXT:    br label [[FOR_BODY:%.*]]
986; TAILFOLD:       for.body:
987; TAILFOLD-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
988; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
989; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
990; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
991; TAILFOLD:       for.second:
992; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
993; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
994; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
995; TAILFOLD-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
996; TAILFOLD-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
997; TAILFOLD:       for.body.backedge:
998; TAILFOLD-NEXT:    br label [[FOR_BODY]]
999; TAILFOLD:       for.end:
1000; TAILFOLD-NEXT:    ret i32 0
1001;
1002entry:
1003  br label %for.body
1004
1005for.body:
1006  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body.backedge]
1007  %inc = add nsw i32 %i.02, 1
1008  %cmp = icmp slt i32 %inc, 16
1009  br i1 %cmp, label %for.body.backedge, label %for.second
1010
1011for.second:
1012  %iprom = sext i32 %i.02 to i64
1013  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
1014  store i16 0, i16* %b, align 4
1015  %cmps = icmp sgt i32 %inc, 16
1016  br i1 %cmps, label %for.body.backedge, label %for.end
1017
1018for.body.backedge:
1019  br label %for.body
1020
1021for.end:
1022  ret i32 0
1023}
1024
1025
1026; two back branches - loop simplify with convert this to the same form
1027; as previous before vectorizer sees it, but show that.
1028define i32 @multiple_latch2(i16* %p) {
1029; CHECK-LABEL: @multiple_latch2(
1030; CHECK-NEXT:  entry:
1031; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
1032; CHECK:       for.body:
1033; CHECK-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
1034; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
1035; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
1036; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
1037; CHECK:       for.body.backedge:
1038; CHECK-NEXT:    br label [[FOR_BODY]]
1039; CHECK:       for.second:
1040; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
1041; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
1042; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
1043; CHECK-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
1044; CHECK-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
1045; CHECK:       for.end:
1046; CHECK-NEXT:    ret i32 0
1047;
1048; TAILFOLD-LABEL: @multiple_latch2(
1049; TAILFOLD-NEXT:  entry:
1050; TAILFOLD-NEXT:    br label [[FOR_BODY:%.*]]
1051; TAILFOLD:       for.body:
1052; TAILFOLD-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
1053; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
1054; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
1055; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
1056; TAILFOLD:       for.body.backedge:
1057; TAILFOLD-NEXT:    br label [[FOR_BODY]]
1058; TAILFOLD:       for.second:
1059; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
1060; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
1061; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
1062; TAILFOLD-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
1063; TAILFOLD-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
1064; TAILFOLD:       for.end:
1065; TAILFOLD-NEXT:    ret i32 0
1066;
1067entry:
1068  br label %for.body
1069
1070for.body:
1071  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ], [%inc, %for.second]
1072  %inc = add nsw i32 %i.02, 1
1073  %cmp = icmp slt i32 %inc, 16
1074  br i1 %cmp, label %for.body, label %for.second
1075
1076for.second:
1077  %iprom = sext i32 %i.02 to i64
1078  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
1079  store i16 0, i16* %b, align 4
1080  %cmps = icmp sgt i32 %inc, 16
1081  br i1 %cmps, label %for.body, label %for.end
1082
1083for.end:
1084  ret i32 0
1085}
1086
1087
1088; Check interaction between block predication and early exits.  We need the
1089; condition on the early exit to remain dead (i.e. not be used when forming
1090; the predicate mask).
1091define void @scalar_predication(float* %addr) {
1092; CHECK-LABEL: @scalar_predication(
1093; CHECK-NEXT:  entry:
1094; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1095; CHECK:       vector.ph:
1096; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1097; CHECK:       vector.body:
1098; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
1099; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1100; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[TMP0]]
1101; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, float* [[TMP1]], i32 0
1102; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <2 x float>*
1103; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
1104; CHECK-NEXT:    [[TMP4:%.*]] = fcmp oeq <2 x float> [[WIDE_LOAD]], zeroinitializer
1105; CHECK-NEXT:    [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true>
1106; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
1107; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
1108; CHECK:       pred.store.if:
1109; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr float, float* [[ADDR]], i64 [[TMP0]]
1110; CHECK-NEXT:    store float 1.000000e+01, float* [[TMP7]], align 4
1111; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
1112; CHECK:       pred.store.continue:
1113; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
1114; CHECK-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
1115; CHECK:       pred.store.if1:
1116; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 1
1117; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr float, float* [[ADDR]], i64 [[TMP9]]
1118; CHECK-NEXT:    store float 1.000000e+01, float* [[TMP10]], align 4
1119; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
1120; CHECK:       pred.store.continue2:
1121; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1122; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
1123; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
1124; CHECK:       middle.block:
1125; CHECK-NEXT:    br label [[SCALAR_PH]]
1126; CHECK:       scalar.ph:
1127; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1128; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
1129; CHECK:       loop.header:
1130; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1131; CHECK-NEXT:    [[GEP:%.*]] = getelementptr float, float* [[ADDR]], i64 [[IV]]
1132; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1133; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]]
1134; CHECK:       loop.body:
1135; CHECK-NEXT:    [[TMP12:%.*]] = load float, float* [[GEP]], align 4
1136; CHECK-NEXT:    [[PRED:%.*]] = fcmp oeq float [[TMP12]], 0.000000e+00
1137; CHECK-NEXT:    br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]]
1138; CHECK:       then:
1139; CHECK-NEXT:    store float 1.000000e+01, float* [[GEP]], align 4
1140; CHECK-NEXT:    br label [[LOOP_LATCH]]
1141; CHECK:       loop.latch:
1142; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1143; CHECK-NEXT:    br label [[LOOP_HEADER]], !llvm.loop [[LOOP21:![0-9]+]]
1144; CHECK:       exit:
1145; CHECK-NEXT:    ret void
1146;
1147; TAILFOLD-LABEL: @scalar_predication(
1148; TAILFOLD-NEXT:  entry:
1149; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1150; TAILFOLD:       loop.header:
1151; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1152; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[IV]]
1153; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1154; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]]
1155; TAILFOLD:       loop.body:
1156; TAILFOLD-NEXT:    [[TMP0:%.*]] = load float, float* [[GEP]], align 4
1157; TAILFOLD-NEXT:    [[PRED:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00
1158; TAILFOLD-NEXT:    br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]]
1159; TAILFOLD:       then:
1160; TAILFOLD-NEXT:    store float 1.000000e+01, float* [[GEP]], align 4
1161; TAILFOLD-NEXT:    br label [[LOOP_LATCH]]
1162; TAILFOLD:       loop.latch:
1163; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1164; TAILFOLD-NEXT:    br label [[LOOP_HEADER]]
1165; TAILFOLD:       exit:
1166; TAILFOLD-NEXT:    ret void
1167;
1168entry:
1169  br label %loop.header
1170
1171loop.header:
1172  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1173  %gep = getelementptr float, float* %addr, i64 %iv
1174  %exitcond.not = icmp eq i64 %iv, 200
1175  br i1 %exitcond.not, label %exit, label %loop.body
1176
1177loop.body:
1178  %0 = load float, float* %gep, align 4
1179  %pred = fcmp oeq float %0, 0.0
1180  br i1 %pred, label %loop.latch, label %then
1181
1182then:
1183  store float 10.0, float* %gep, align 4
1184  br label %loop.latch
1185
1186loop.latch:
1187  %iv.next = add nuw nsw i64 %iv, 1
1188  br label %loop.header
1189
1190exit:
1191  ret void
1192}
1193
1194define i32 @me_reduction(i32* %addr) {
1195; CHECK-LABEL: @me_reduction(
1196; CHECK-NEXT:  entry:
1197; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1198; CHECK:       vector.ph:
1199; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1200; CHECK:       vector.body:
1201; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1202; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
1203; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1204; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[TMP0]]
1205; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, i32* [[TMP1]], i32 0
1206; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>*
1207; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP3]], align 4
1208; CHECK-NEXT:    [[TMP4]] = add <2 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
1209; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1210; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
1211; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
1212; CHECK:       middle.block:
1213; CHECK-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP4]])
1214; CHECK-NEXT:    br label [[SCALAR_PH]]
1215; CHECK:       scalar.ph:
1216; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1217; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
1218; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
1219; CHECK:       loop.header:
1220; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1221; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1222; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR]], i64 [[IV]]
1223; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1224; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1225; CHECK:       loop.latch:
1226; CHECK-NEXT:    [[TMP7:%.*]] = load i32, i32* [[GEP]], align 4
1227; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP7]]
1228; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1229; CHECK-NEXT:    [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400
1230; CHECK-NEXT:    br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], !llvm.loop [[LOOP23:![0-9]+]]
1231; CHECK:       exit:
1232; CHECK-NEXT:    [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ]
1233; CHECK-NEXT:    ret i32 [[LCSSA]]
1234;
1235; TAILFOLD-LABEL: @me_reduction(
1236; TAILFOLD-NEXT:  entry:
1237; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1238; TAILFOLD:       loop.header:
1239; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1240; TAILFOLD-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1241; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
1242; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1243; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1244; TAILFOLD:       loop.latch:
1245; TAILFOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
1246; TAILFOLD-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1247; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1248; TAILFOLD-NEXT:    [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400
1249; TAILFOLD-NEXT:    br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]]
1250; TAILFOLD:       exit:
1251; TAILFOLD-NEXT:    [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ]
1252; TAILFOLD-NEXT:    ret i32 [[LCSSA]]
1253;
1254entry:
1255  br label %loop.header
1256
1257loop.header:
1258  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1259  %accum = phi i32 [0, %entry], [%accum.next, %loop.latch]
1260  %gep = getelementptr i32, i32* %addr, i64 %iv
1261  %exitcond.not = icmp eq i64 %iv, 200
1262  br i1 %exitcond.not, label %exit, label %loop.latch
1263
1264loop.latch:
1265  %0 = load i32, i32* %gep, align 4
1266  %accum.next = add i32 %accum, %0
1267  %iv.next = add nuw nsw i64 %iv, 1
1268  %exitcond2.not = icmp eq i64 %iv, 400
1269  br i1 %exitcond2.not, label %exit, label %loop.header
1270
1271exit:
1272  %lcssa = phi i32 [0, %loop.header], [%accum.next, %loop.latch]
1273  ret i32 %lcssa
1274}
1275
1276; TODO: The current definition of reduction is too strict, we can vectorize
1277; this.  There's an analogous single exit case where we extract the N-1
1278; value of the reduction that we can also handle.  If we fix the later, the
1279; multiple exit case probably falls out.
1280define i32 @me_reduction2(i32* %addr) {
1281; CHECK-LABEL: @me_reduction2(
1282; CHECK-NEXT:  entry:
1283; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
1284; CHECK:       loop.header:
1285; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1286; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1287; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
1288; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1289; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1290; CHECK:       loop.latch:
1291; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
1292; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1293; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1294; CHECK-NEXT:    br label [[LOOP_HEADER]]
1295; CHECK:       exit:
1296; CHECK-NEXT:    [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ]
1297; CHECK-NEXT:    ret i32 [[ACCUM_LCSSA]]
1298;
1299; TAILFOLD-LABEL: @me_reduction2(
1300; TAILFOLD-NEXT:  entry:
1301; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1302; TAILFOLD:       loop.header:
1303; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1304; TAILFOLD-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1305; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
1306; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1307; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1308; TAILFOLD:       loop.latch:
1309; TAILFOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
1310; TAILFOLD-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1311; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1312; TAILFOLD-NEXT:    br label [[LOOP_HEADER]]
1313; TAILFOLD:       exit:
1314; TAILFOLD-NEXT:    [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ]
1315; TAILFOLD-NEXT:    ret i32 [[ACCUM_LCSSA]]
1316;
1317entry:
1318  br label %loop.header
1319
1320loop.header:
1321  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1322  %accum = phi i32 [0, %entry], [%accum.next, %loop.latch]
1323  %gep = getelementptr i32, i32* %addr, i64 %iv
1324  %exitcond.not = icmp eq i64 %iv, 200
1325  br i1 %exitcond.not, label %exit, label %loop.latch
1326
1327loop.latch:
1328  %0 = load i32, i32* %gep, align 4
1329  %accum.next = add i32 %accum, %0
1330  %iv.next = add nuw nsw i64 %iv, 1
1331  br label %loop.header
1332
1333exit:
1334  ret i32 %accum
1335}
1336
1337