1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -loop-vectorize -force-vector-width=2 < %s | FileCheck %s
3; RUN: opt -S -loop-vectorize -force-vector-width=2  -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck --check-prefix TAILFOLD %s
4
5target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
6
7define void @bottom_tested(i16* %p, i32 %n) {
8; CHECK-LABEL: @bottom_tested(
9; CHECK-NEXT:  entry:
10; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0
11; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0
12; CHECK-NEXT:    [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1
13; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2
14; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
15; CHECK:       vector.ph:
16; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
17; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
18; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
19; CHECK:       vector.body:
20; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
21; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 0
22; CHECK-NEXT:    [[TMP3:%.*]] = sext i32 [[TMP2]] to i64
23; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP3]]
24; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 0
25; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i16* [[TMP5]] to <2 x i16>*
26; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP6]], align 4
27; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
28; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
29; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
30; CHECK:       middle.block:
31; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
32; CHECK-NEXT:    br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
33; CHECK:       scalar.ph:
34; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
35; CHECK-NEXT:    br label [[FOR_COND:%.*]]
36; CHECK:       for.cond:
37; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
38; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
39; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
40; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
41; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
42; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
43; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], [[LOOP2:!llvm.loop !.*]]
44; CHECK:       if.end:
45; CHECK-NEXT:    ret void
46;
47; TAILFOLD-LABEL: @bottom_tested(
48; TAILFOLD-NEXT:  entry:
49; TAILFOLD-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0
50; TAILFOLD-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0
51; TAILFOLD-NEXT:    [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1
52; TAILFOLD-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
53; TAILFOLD:       vector.ph:
54; TAILFOLD-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP1]], 1
55; TAILFOLD-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2
56; TAILFOLD-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
57; TAILFOLD-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
58; TAILFOLD-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
59; TAILFOLD-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
60; TAILFOLD-NEXT:    br label [[VECTOR_BODY:%.*]]
61; TAILFOLD:       vector.body:
62; TAILFOLD-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
63; TAILFOLD-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
64; TAILFOLD-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 0
65; TAILFOLD-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 1
66; TAILFOLD-NEXT:    [[TMP4:%.*]] = icmp ule <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
67; TAILFOLD-NEXT:    [[TMP5:%.*]] = sext <2 x i32> [[VEC_IND]] to <2 x i64>
68; TAILFOLD-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
69; TAILFOLD-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
70; TAILFOLD:       pred.store.if:
71; TAILFOLD-NEXT:    [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
72; TAILFOLD-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]]
73; TAILFOLD-NEXT:    store i16 0, i16* [[TMP8]], align 4
74; TAILFOLD-NEXT:    br label [[PRED_STORE_CONTINUE]]
75; TAILFOLD:       pred.store.continue:
76; TAILFOLD-NEXT:    [[TMP9:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
77; TAILFOLD-NEXT:    br i1 [[TMP9]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
78; TAILFOLD:       pred.store.if1:
79; TAILFOLD-NEXT:    [[TMP10:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
80; TAILFOLD-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[TMP10]]
81; TAILFOLD-NEXT:    store i16 0, i16* [[TMP11]], align 4
82; TAILFOLD-NEXT:    br label [[PRED_STORE_CONTINUE2]]
83; TAILFOLD:       pred.store.continue2:
84; TAILFOLD-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
85; TAILFOLD-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
86; TAILFOLD-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
87; TAILFOLD-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
88; TAILFOLD:       middle.block:
89; TAILFOLD-NEXT:    br i1 true, label [[IF_END:%.*]], label [[SCALAR_PH]]
90; TAILFOLD:       scalar.ph:
91; TAILFOLD-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
92; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
93; TAILFOLD:       for.cond:
94; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
95; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
96; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
97; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
98; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
99; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
100; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], [[LOOP2:!llvm.loop !.*]]
101; TAILFOLD:       if.end:
102; TAILFOLD-NEXT:    ret void
103;
104entry:
105  br label %for.cond
106
107for.cond:
108  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
109  %iprom = sext i32 %i to i64
110  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
111  store i16 0, i16* %b, align 4
112  %inc = add nsw i32 %i, 1
113  %cmp = icmp slt i32 %i, %n
114  br i1 %cmp, label %for.cond, label %if.end
115
116if.end:
117  ret void
118}
119
120define void @early_exit(i16* %p, i32 %n) {
121; CHECK-LABEL: @early_exit(
122; CHECK-NEXT:  entry:
123; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0
124; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0
125; CHECK-NEXT:    [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1
126; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP1]], 2
127; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
128; CHECK:       vector.ph:
129; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
130; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
131; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 2, i32 [[N_MOD_VF]]
132; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP1]], [[TMP3]]
133; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
134; CHECK:       vector.body:
135; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
136; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
137; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[INDEX]], 0
138; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[INDEX]], 1
139; CHECK-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP4]] to i64
140; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP6]]
141; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[TMP7]], i32 0
142; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP8]] to <2 x i16>*
143; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP9]], align 4
144; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
145; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
146; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
147; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]]
148; CHECK:       middle.block:
149; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
150; CHECK-NEXT:    br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
151; CHECK:       scalar.ph:
152; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
153; CHECK-NEXT:    br label [[FOR_COND:%.*]]
154; CHECK:       for.cond:
155; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
156; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
157; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]]
158; CHECK:       for.body:
159; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
160; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
161; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
162; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
163; CHECK-NEXT:    br label [[FOR_COND]], [[LOOP5:!llvm.loop !.*]]
164; CHECK:       if.end:
165; CHECK-NEXT:    ret void
166;
167; TAILFOLD-LABEL: @early_exit(
168; TAILFOLD-NEXT:  entry:
169; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
170; TAILFOLD:       for.cond:
171; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
172; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
173; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
174; TAILFOLD:       for.body:
175; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
176; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
177; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
178; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
179; TAILFOLD-NEXT:    br label [[FOR_COND]]
180; TAILFOLD:       if.end:
181; TAILFOLD-NEXT:    ret void
182;
183entry:
184  br label %for.cond
185
186for.cond:
187  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
188  %cmp = icmp slt i32 %i, %n
189  br i1 %cmp, label %for.body, label %if.end
190
191for.body:
192  %iprom = sext i32 %i to i64
193  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
194  store i16 0, i16* %b, align 4
195  %inc = add nsw i32 %i, 1
196  br label %for.cond
197
198if.end:
199  ret void
200}
201
202; Same as early_exit, but with optsize to prevent the use of
203; a scalar epilogue.  -- Can't vectorize this in either case.
204define void @optsize(i16* %p, i32 %n) optsize {
205; CHECK-LABEL: @optsize(
206; CHECK-NEXT:  entry:
207; CHECK-NEXT:    br label [[FOR_COND:%.*]]
208; CHECK:       for.cond:
209; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
210; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
211; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
212; CHECK:       for.body:
213; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
214; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
215; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
216; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
217; CHECK-NEXT:    br label [[FOR_COND]]
218; CHECK:       if.end:
219; CHECK-NEXT:    ret void
220;
221; TAILFOLD-LABEL: @optsize(
222; TAILFOLD-NEXT:  entry:
223; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
224; TAILFOLD:       for.cond:
225; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
226; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
227; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
228; TAILFOLD:       for.body:
229; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
230; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
231; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
232; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
233; TAILFOLD-NEXT:    br label [[FOR_COND]]
234; TAILFOLD:       if.end:
235; TAILFOLD-NEXT:    ret void
236;
237entry:
238  br label %for.cond
239
240for.cond:
241  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
242  %cmp = icmp slt i32 %i, %n
243  br i1 %cmp, label %for.body, label %if.end
244
245for.body:
246  %iprom = sext i32 %i to i64
247  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
248  store i16 0, i16* %b, align 4
249  %inc = add nsw i32 %i, 1
250  br label %for.cond
251
252if.end:
253  ret void
254}
255
256
257; multiple exit - no values inside the loop used outside
258define void @multiple_unique_exit(i16* %p, i32 %n) {
259; CHECK-LABEL: @multiple_unique_exit(
260; CHECK-NEXT:  entry:
261; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0
262; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0
263; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096
264; CHECK-NEXT:    [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096
265; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1
266; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 2
267; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
268; CHECK:       vector.ph:
269; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2
270; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
271; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 2, i32 [[N_MOD_VF]]
272; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]]
273; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
274; CHECK:       vector.body:
275; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
276; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
277; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[INDEX]], 0
278; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[INDEX]], 1
279; CHECK-NEXT:    [[TMP7:%.*]] = sext i32 [[TMP5]] to i64
280; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]]
281; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0
282; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <2 x i16>*
283; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP10]], align 4
284; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
285; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
286; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
287; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]]
288; CHECK:       middle.block:
289; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
290; CHECK-NEXT:    br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
291; CHECK:       scalar.ph:
292; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
293; CHECK-NEXT:    br label [[FOR_COND:%.*]]
294; CHECK:       for.cond:
295; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
296; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
297; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]]
298; CHECK:       for.body:
299; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
300; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
301; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
302; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
303; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
304; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP7:!llvm.loop !.*]]
305; CHECK:       if.end:
306; CHECK-NEXT:    ret void
307;
308; TAILFOLD-LABEL: @multiple_unique_exit(
309; TAILFOLD-NEXT:  entry:
310; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
311; TAILFOLD:       for.cond:
312; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
313; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
314; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
315; TAILFOLD:       for.body:
316; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
317; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
318; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
319; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
320; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
321; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
322; TAILFOLD:       if.end:
323; TAILFOLD-NEXT:    ret void
324;
325entry:
326  br label %for.cond
327
328for.cond:
329  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
330  %cmp = icmp slt i32 %i, %n
331  br i1 %cmp, label %for.body, label %if.end
332
333for.body:
334  %iprom = sext i32 %i to i64
335  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
336  store i16 0, i16* %b, align 4
337  %inc = add nsw i32 %i, 1
338  %cmp2 = icmp slt i32 %i, 2096
339  br i1 %cmp2, label %for.cond, label %if.end
340
341if.end:
342  ret void
343}
344
345; multiple exit - with an lcssa phi
346define i32 @multiple_unique_exit2(i16* %p, i32 %n) {
347; CHECK-LABEL: @multiple_unique_exit2(
348; CHECK-NEXT:  entry:
349; CHECK-NEXT:    br label [[FOR_COND:%.*]]
350; CHECK:       for.cond:
351; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
352; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
353; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
354; CHECK:       for.body:
355; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
356; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
357; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
358; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
359; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
360; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
361; CHECK:       if.end:
362; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ]
363; CHECK-NEXT:    ret i32 [[I_LCSSA]]
364;
365; TAILFOLD-LABEL: @multiple_unique_exit2(
366; TAILFOLD-NEXT:  entry:
367; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
368; TAILFOLD:       for.cond:
369; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
370; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
371; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
372; TAILFOLD:       for.body:
373; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
374; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
375; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
376; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
377; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
378; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
379; TAILFOLD:       if.end:
380; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ]
381; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
382;
383entry:
384  br label %for.cond
385
386for.cond:
387  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
388  %cmp = icmp slt i32 %i, %n
389  br i1 %cmp, label %for.body, label %if.end
390
391for.body:
392  %iprom = sext i32 %i to i64
393  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
394  store i16 0, i16* %b, align 4
395  %inc = add nsw i32 %i, 1
396  %cmp2 = icmp slt i32 %i, 2096
397  br i1 %cmp2, label %for.cond, label %if.end
398
399if.end:
400  ret i32 %i
401}
402
403; multiple exit w/a non lcssa phi
404define i32 @multiple_unique_exit3(i16* %p, i32 %n) {
405; CHECK-LABEL: @multiple_unique_exit3(
406; CHECK-NEXT:  entry:
407; CHECK-NEXT:    br label [[FOR_COND:%.*]]
408; CHECK:       for.cond:
409; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
410; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
411; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
412; CHECK:       for.body:
413; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
414; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
415; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
416; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
417; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
418; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
419; CHECK:       if.end:
420; CHECK-NEXT:    [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ]
421; CHECK-NEXT:    ret i32 [[EXIT]]
422;
423; TAILFOLD-LABEL: @multiple_unique_exit3(
424; TAILFOLD-NEXT:  entry:
425; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
426; TAILFOLD:       for.cond:
427; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
428; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
429; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
430; TAILFOLD:       for.body:
431; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
432; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
433; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
434; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
435; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
436; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
437; TAILFOLD:       if.end:
438; TAILFOLD-NEXT:    [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ]
439; TAILFOLD-NEXT:    ret i32 [[EXIT]]
440;
441entry:
442  br label %for.cond
443
444for.cond:
445  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
446  %cmp = icmp slt i32 %i, %n
447  br i1 %cmp, label %for.body, label %if.end
448
449for.body:
450  %iprom = sext i32 %i to i64
451  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
452  store i16 0, i16* %b, align 4
453  %inc = add nsw i32 %i, 1
454  %cmp2 = icmp slt i32 %i, 2096
455  br i1 %cmp2, label %for.cond, label %if.end
456
457if.end:
458  %exit = phi i32 [0, %for.cond], [1, %for.body]
459  ret i32 %exit
460}
461
462; multiple exits w/distinct target blocks
463define i32 @multiple_exit_blocks(i16* %p, i32 %n) {
464; CHECK-LABEL: @multiple_exit_blocks(
465; CHECK-NEXT:  entry:
466; CHECK-NEXT:    br label [[FOR_COND:%.*]]
467; CHECK:       for.cond:
468; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
469; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
470; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
471; CHECK:       for.body:
472; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
473; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
474; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
475; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
476; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
477; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
478; CHECK:       if.end:
479; CHECK-NEXT:    ret i32 0
480; CHECK:       if.end2:
481; CHECK-NEXT:    ret i32 1
482;
483; TAILFOLD-LABEL: @multiple_exit_blocks(
484; TAILFOLD-NEXT:  entry:
485; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
486; TAILFOLD:       for.cond:
487; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
488; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
489; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
490; TAILFOLD:       for.body:
491; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
492; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
493; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
494; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
495; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
496; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
497; TAILFOLD:       if.end:
498; TAILFOLD-NEXT:    ret i32 0
499; TAILFOLD:       if.end2:
500; TAILFOLD-NEXT:    ret i32 1
501;
502entry:
503  br label %for.cond
504
505for.cond:
506  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
507  %cmp = icmp slt i32 %i, %n
508  br i1 %cmp, label %for.body, label %if.end
509
510for.body:
511  %iprom = sext i32 %i to i64
512  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
513  store i16 0, i16* %b, align 4
514  %inc = add nsw i32 %i, 1
515  %cmp2 = icmp slt i32 %i, 2096
516  br i1 %cmp2, label %for.cond, label %if.end2
517
518if.end:
519  ret i32 0
520
521if.end2:
522  ret i32 1
523}
524
525; unique exit case but with a switch as two edges between the same pair of
526; blocks is an often missed edge case
527define i32 @multiple_exit_switch(i16* %p, i32 %n) {
528; CHECK-LABEL: @multiple_exit_switch(
529; CHECK-NEXT:  entry:
530; CHECK-NEXT:    br label [[FOR_COND:%.*]]
531; CHECK:       for.cond:
532; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
533; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
534; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
535; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
536; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
537; CHECK-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
538; CHECK-NEXT:    i32 2096, label [[IF_END:%.*]]
539; CHECK-NEXT:    i32 2097, label [[IF_END]]
540; CHECK-NEXT:    ]
541; CHECK:       if.end:
542; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ]
543; CHECK-NEXT:    ret i32 [[I_LCSSA]]
544;
545; TAILFOLD-LABEL: @multiple_exit_switch(
546; TAILFOLD-NEXT:  entry:
547; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
548; TAILFOLD:       for.cond:
549; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
550; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
551; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
552; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
553; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
554; TAILFOLD-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
555; TAILFOLD-NEXT:    i32 2096, label [[IF_END:%.*]]
556; TAILFOLD-NEXT:    i32 2097, label [[IF_END]]
557; TAILFOLD-NEXT:    ]
558; TAILFOLD:       if.end:
559; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ]
560; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
561;
562entry:
563  br label %for.cond
564
565for.cond:
566  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
567  %iprom = sext i32 %i to i64
568  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
569  store i16 0, i16* %b, align 4
570  %inc = add nsw i32 %i, 1
571  switch i32 %i, label %for.cond [
572  i32 2096, label %if.end
573  i32 2097, label %if.end
574  ]
575
576if.end:
577  ret i32 %i
578}
579
580; multiple exit case but with a switch as multiple exiting edges from
581; a single block is a commonly missed edge case
582define i32 @multiple_exit_switch2(i16* %p, i32 %n) {
583; CHECK-LABEL: @multiple_exit_switch2(
584; CHECK-NEXT:  entry:
585; CHECK-NEXT:    br label [[FOR_COND:%.*]]
586; CHECK:       for.cond:
587; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
588; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
589; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
590; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
591; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
592; CHECK-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
593; CHECK-NEXT:    i32 2096, label [[IF_END:%.*]]
594; CHECK-NEXT:    i32 2097, label [[IF_END2:%.*]]
595; CHECK-NEXT:    ]
596; CHECK:       if.end:
597; CHECK-NEXT:    ret i32 0
598; CHECK:       if.end2:
599; CHECK-NEXT:    ret i32 1
600;
601; TAILFOLD-LABEL: @multiple_exit_switch2(
602; TAILFOLD-NEXT:  entry:
603; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
604; TAILFOLD:       for.cond:
605; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
606; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
607; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
608; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
609; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
610; TAILFOLD-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
611; TAILFOLD-NEXT:    i32 2096, label [[IF_END:%.*]]
612; TAILFOLD-NEXT:    i32 2097, label [[IF_END2:%.*]]
613; TAILFOLD-NEXT:    ]
614; TAILFOLD:       if.end:
615; TAILFOLD-NEXT:    ret i32 0
616; TAILFOLD:       if.end2:
617; TAILFOLD-NEXT:    ret i32 1
618;
619entry:
620  br label %for.cond
621
622for.cond:
623  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
624  %iprom = sext i32 %i to i64
625  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
626  store i16 0, i16* %b, align 4
627  %inc = add nsw i32 %i, 1
628  switch i32 %i, label %for.cond [
629  i32 2096, label %if.end
630  i32 2097, label %if.end2
631  ]
632
633if.end:
634  ret i32 0
635
636if.end2:
637  ret i32 1
638}
639
640define i32 @multiple_latch1(i16* %p) {
641; CHECK-LABEL: @multiple_latch1(
642; CHECK-NEXT:  entry:
643; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
644; CHECK:       for.body:
645; CHECK-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
646; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
647; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
648; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
649; CHECK:       for.second:
650; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
651; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
652; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
653; CHECK-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
654; CHECK-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
655; CHECK:       for.body.backedge:
656; CHECK-NEXT:    br label [[FOR_BODY]]
657; CHECK:       for.end:
658; CHECK-NEXT:    ret i32 0
659;
660; TAILFOLD-LABEL: @multiple_latch1(
661; TAILFOLD-NEXT:  entry:
662; TAILFOLD-NEXT:    br label [[FOR_BODY:%.*]]
663; TAILFOLD:       for.body:
664; TAILFOLD-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
665; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
666; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
667; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
668; TAILFOLD:       for.second:
669; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
670; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
671; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
672; TAILFOLD-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
673; TAILFOLD-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
674; TAILFOLD:       for.body.backedge:
675; TAILFOLD-NEXT:    br label [[FOR_BODY]]
676; TAILFOLD:       for.end:
677; TAILFOLD-NEXT:    ret i32 0
678;
679entry:
680  br label %for.body
681
682for.body:
683  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body.backedge]
684  %inc = add nsw i32 %i.02, 1
685  %cmp = icmp slt i32 %inc, 16
686  br i1 %cmp, label %for.body.backedge, label %for.second
687
688for.second:
689  %iprom = sext i32 %i.02 to i64
690  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
691  store i16 0, i16* %b, align 4
692  %cmps = icmp sgt i32 %inc, 16
693  br i1 %cmps, label %for.body.backedge, label %for.end
694
695for.body.backedge:
696  br label %for.body
697
698for.end:
699  ret i32 0
700}
701
702
703; two back branches - loop simplify with convert this to the same form
704; as previous before vectorizer sees it, but show that.
705define i32 @multiple_latch2(i16* %p) {
706; CHECK-LABEL: @multiple_latch2(
707; CHECK-NEXT:  entry:
708; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
709; CHECK:       for.body:
710; CHECK-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
711; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
712; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
713; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
714; CHECK:       for.body.backedge:
715; CHECK-NEXT:    br label [[FOR_BODY]]
716; CHECK:       for.second:
717; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
718; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
719; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
720; CHECK-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
721; CHECK-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
722; CHECK:       for.end:
723; CHECK-NEXT:    ret i32 0
724;
725; TAILFOLD-LABEL: @multiple_latch2(
726; TAILFOLD-NEXT:  entry:
727; TAILFOLD-NEXT:    br label [[FOR_BODY:%.*]]
728; TAILFOLD:       for.body:
729; TAILFOLD-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
730; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
731; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
732; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
733; TAILFOLD:       for.body.backedge:
734; TAILFOLD-NEXT:    br label [[FOR_BODY]]
735; TAILFOLD:       for.second:
736; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
737; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
738; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
739; TAILFOLD-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
740; TAILFOLD-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
741; TAILFOLD:       for.end:
742; TAILFOLD-NEXT:    ret i32 0
743;
744entry:
745  br label %for.body
746
747for.body:
748  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ], [%inc, %for.second]
749  %inc = add nsw i32 %i.02, 1
750  %cmp = icmp slt i32 %inc, 16
751  br i1 %cmp, label %for.body, label %for.second
752
753for.second:
754  %iprom = sext i32 %i.02 to i64
755  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
756  store i16 0, i16* %b, align 4
757  %cmps = icmp sgt i32 %inc, 16
758  br i1 %cmps, label %for.body, label %for.end
759
760for.end:
761  ret i32 0
762}
763
764
765; Check interaction between block predication and early exits.  We need the
766; condition on the early exit to remain dead (i.e. not be used when forming
767; the predicate mask).
768define void @scalar_predication(float* %addr) {
769; CHECK-LABEL: @scalar_predication(
770; CHECK-NEXT:  entry:
771; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
772; CHECK:       vector.ph:
773; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
774; CHECK:       vector.body:
775; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
776; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
777; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
778; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[TMP0]]
779; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, float* [[TMP1]], i32 0
780; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <2 x float>*
781; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
782; CHECK-NEXT:    [[TMP4:%.*]] = fcmp oeq <2 x float> [[WIDE_LOAD]], zeroinitializer
783; CHECK-NEXT:    [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true>
784; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
785; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
786; CHECK:       pred.store.if:
787; CHECK-NEXT:    store float 1.000000e+01, float* [[TMP1]], align 4
788; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
789; CHECK:       pred.store.continue:
790; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
791; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
792; CHECK:       pred.store.if1:
793; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 1
794; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr float, float* [[ADDR]], i64 [[TMP8]]
795; CHECK-NEXT:    store float 1.000000e+01, float* [[TMP9]], align 4
796; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
797; CHECK:       pred.store.continue2:
798; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
799; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
800; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
801; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]]
802; CHECK:       middle.block:
803; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 201, 200
804; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
805; CHECK:       scalar.ph:
806; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
807; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
808; CHECK:       loop.header:
809; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
810; CHECK-NEXT:    [[GEP:%.*]] = getelementptr float, float* [[ADDR]], i64 [[IV]]
811; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
812; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP_BODY:%.*]]
813; CHECK:       loop.body:
814; CHECK-NEXT:    [[TMP11:%.*]] = load float, float* [[GEP]], align 4
815; CHECK-NEXT:    [[PRED:%.*]] = fcmp oeq float [[TMP11]], 0.000000e+00
816; CHECK-NEXT:    br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]]
817; CHECK:       then:
818; CHECK-NEXT:    store float 1.000000e+01, float* [[GEP]], align 4
819; CHECK-NEXT:    br label [[LOOP_LATCH]]
820; CHECK:       loop.latch:
821; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
822; CHECK-NEXT:    br label [[LOOP_HEADER]], [[LOOP9:!llvm.loop !.*]]
823; CHECK:       exit:
824; CHECK-NEXT:    ret void
825;
826; TAILFOLD-LABEL: @scalar_predication(
827; TAILFOLD-NEXT:  entry:
828; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
829; TAILFOLD:       loop.header:
830; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
831; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[IV]]
832; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
833; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]]
834; TAILFOLD:       loop.body:
835; TAILFOLD-NEXT:    [[TMP0:%.*]] = load float, float* [[GEP]], align 4
836; TAILFOLD-NEXT:    [[PRED:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00
837; TAILFOLD-NEXT:    br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]]
838; TAILFOLD:       then:
839; TAILFOLD-NEXT:    store float 1.000000e+01, float* [[GEP]], align 4
840; TAILFOLD-NEXT:    br label [[LOOP_LATCH]]
841; TAILFOLD:       loop.latch:
842; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
843; TAILFOLD-NEXT:    br label [[LOOP_HEADER]]
844; TAILFOLD:       exit:
845; TAILFOLD-NEXT:    ret void
846;
847entry:
848  br label %loop.header
849
850loop.header:
851  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
852  %gep = getelementptr float, float* %addr, i64 %iv
853  %exitcond.not = icmp eq i64 %iv, 200
854  br i1 %exitcond.not, label %exit, label %loop.body
855
856loop.body:
857  %0 = load float, float* %gep, align 4
858  %pred = fcmp oeq float %0, 0.0
859  br i1 %pred, label %loop.latch, label %then
860
861then:
862  store float 10.0, float* %gep, align 4
863  br label %loop.latch
864
865loop.latch:
866  %iv.next = add nuw nsw i64 %iv, 1
867  br label %loop.header
868
869exit:
870  ret void
871}
872