1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -loop-vectorize -force-vector-width=2 < %s | FileCheck %s
3; RUN: opt -S -loop-vectorize -force-vector-width=2  -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck --check-prefix TAILFOLD %s
4
5target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
6
7define void @bottom_tested(i16* %p, i32 %n) {
8; CHECK-LABEL: @bottom_tested(
9; CHECK-NEXT:  entry:
10; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0
11; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0
12; CHECK-NEXT:    [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1
13; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2
14; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
15; CHECK:       vector.ph:
16; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
17; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
18; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
19; CHECK:       vector.body:
20; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
21; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 0
22; CHECK-NEXT:    [[TMP3:%.*]] = sext i32 [[TMP2]] to i64
23; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP3]]
24; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 0
25; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i16* [[TMP5]] to <2 x i16>*
26; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP6]], align 4
27; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
28; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
29; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
30; CHECK:       middle.block:
31; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
32; CHECK-NEXT:    br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
33; CHECK:       scalar.ph:
34; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
35; CHECK-NEXT:    br label [[FOR_COND:%.*]]
36; CHECK:       for.cond:
37; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
38; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
39; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
40; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
41; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
42; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
43; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], [[LOOP2:!llvm.loop !.*]]
44; CHECK:       if.end:
45; CHECK-NEXT:    ret void
46;
47; TAILFOLD-LABEL: @bottom_tested(
48; TAILFOLD-NEXT:  entry:
49; TAILFOLD-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0
50; TAILFOLD-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0
51; TAILFOLD-NEXT:    [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1
52; TAILFOLD-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
53; TAILFOLD:       vector.ph:
54; TAILFOLD-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP1]], 1
55; TAILFOLD-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2
56; TAILFOLD-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
57; TAILFOLD-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
58; TAILFOLD-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
59; TAILFOLD-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
60; TAILFOLD-NEXT:    br label [[VECTOR_BODY:%.*]]
61; TAILFOLD:       vector.body:
62; TAILFOLD-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
63; TAILFOLD-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
64; TAILFOLD-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 0
65; TAILFOLD-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 1
66; TAILFOLD-NEXT:    [[TMP4:%.*]] = icmp ule <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
67; TAILFOLD-NEXT:    [[TMP5:%.*]] = sext <2 x i32> [[VEC_IND]] to <2 x i64>
68; TAILFOLD-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
69; TAILFOLD-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
70; TAILFOLD:       pred.store.if:
71; TAILFOLD-NEXT:    [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
72; TAILFOLD-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]]
73; TAILFOLD-NEXT:    store i16 0, i16* [[TMP8]], align 4
74; TAILFOLD-NEXT:    br label [[PRED_STORE_CONTINUE]]
75; TAILFOLD:       pred.store.continue:
76; TAILFOLD-NEXT:    [[TMP9:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
77; TAILFOLD-NEXT:    br i1 [[TMP9]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
78; TAILFOLD:       pred.store.if1:
79; TAILFOLD-NEXT:    [[TMP10:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
80; TAILFOLD-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[TMP10]]
81; TAILFOLD-NEXT:    store i16 0, i16* [[TMP11]], align 4
82; TAILFOLD-NEXT:    br label [[PRED_STORE_CONTINUE2]]
83; TAILFOLD:       pred.store.continue2:
84; TAILFOLD-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
85; TAILFOLD-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
86; TAILFOLD-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
87; TAILFOLD-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
88; TAILFOLD:       middle.block:
89; TAILFOLD-NEXT:    br i1 true, label [[IF_END:%.*]], label [[SCALAR_PH]]
90; TAILFOLD:       scalar.ph:
91; TAILFOLD-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
92; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
93; TAILFOLD:       for.cond:
94; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
95; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
96; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
97; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
98; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
99; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
100; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], [[LOOP2:!llvm.loop !.*]]
101; TAILFOLD:       if.end:
102; TAILFOLD-NEXT:    ret void
103;
104entry:
105  br label %for.cond
106
107for.cond:
108  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
109  %iprom = sext i32 %i to i64
110  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
111  store i16 0, i16* %b, align 4
112  %inc = add nsw i32 %i, 1
113  %cmp = icmp slt i32 %i, %n
114  br i1 %cmp, label %for.cond, label %if.end
115
116if.end:
117  ret void
118}
119
120define void @early_exit(i16* %p, i32 %n) {
121; CHECK-LABEL: @early_exit(
122; CHECK-NEXT:  entry:
123; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0
124; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0
125; CHECK-NEXT:    [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1
126; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP1]], 2
127; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
128; CHECK:       vector.ph:
129; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
130; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
131; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 2, i32 [[N_MOD_VF]]
132; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP1]], [[TMP3]]
133; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
134; CHECK:       vector.body:
135; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
136; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
137; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[INDEX]], 0
138; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[INDEX]], 1
139; CHECK-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP4]] to i64
140; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP6]]
141; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[TMP7]], i32 0
142; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP8]] to <2 x i16>*
143; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP9]], align 4
144; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
145; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
146; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
147; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]]
148; CHECK:       middle.block:
149; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
150; CHECK-NEXT:    br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
151; CHECK:       scalar.ph:
152; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
153; CHECK-NEXT:    br label [[FOR_COND:%.*]]
154; CHECK:       for.cond:
155; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
156; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
157; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]]
158; CHECK:       for.body:
159; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
160; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
161; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
162; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
163; CHECK-NEXT:    br label [[FOR_COND]], [[LOOP5:!llvm.loop !.*]]
164; CHECK:       if.end:
165; CHECK-NEXT:    ret void
166;
167; TAILFOLD-LABEL: @early_exit(
168; TAILFOLD-NEXT:  entry:
169; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
170; TAILFOLD:       for.cond:
171; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
172; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
173; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
174; TAILFOLD:       for.body:
175; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
176; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
177; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
178; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
179; TAILFOLD-NEXT:    br label [[FOR_COND]]
180; TAILFOLD:       if.end:
181; TAILFOLD-NEXT:    ret void
182;
183entry:
184  br label %for.cond
185
186for.cond:
187  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
188  %cmp = icmp slt i32 %i, %n
189  br i1 %cmp, label %for.body, label %if.end
190
191for.body:
192  %iprom = sext i32 %i to i64
193  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
194  store i16 0, i16* %b, align 4
195  %inc = add nsw i32 %i, 1
196  br label %for.cond
197
198if.end:
199  ret void
200}
201
202; Same as early_exit, but with optsize to prevent the use of
203; a scalar epilogue.  -- Can't vectorize this in either case.
204define void @optsize(i16* %p, i32 %n) optsize {
205; CHECK-LABEL: @optsize(
206; CHECK-NEXT:  entry:
207; CHECK-NEXT:    br label [[FOR_COND:%.*]]
208; CHECK:       for.cond:
209; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
210; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
211; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
212; CHECK:       for.body:
213; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
214; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
215; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
216; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
217; CHECK-NEXT:    br label [[FOR_COND]]
218; CHECK:       if.end:
219; CHECK-NEXT:    ret void
220;
221; TAILFOLD-LABEL: @optsize(
222; TAILFOLD-NEXT:  entry:
223; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
224; TAILFOLD:       for.cond:
225; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
226; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
227; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
228; TAILFOLD:       for.body:
229; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
230; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
231; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
232; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
233; TAILFOLD-NEXT:    br label [[FOR_COND]]
234; TAILFOLD:       if.end:
235; TAILFOLD-NEXT:    ret void
236;
237entry:
238  br label %for.cond
239
240for.cond:
241  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
242  %cmp = icmp slt i32 %i, %n
243  br i1 %cmp, label %for.body, label %if.end
244
245for.body:
246  %iprom = sext i32 %i to i64
247  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
248  store i16 0, i16* %b, align 4
249  %inc = add nsw i32 %i, 1
250  br label %for.cond
251
252if.end:
253  ret void
254}
255
256
257; multiple exit - no values inside the loop used outside
258define void @multiple_unique_exit(i16* %p, i32 %n) {
259; CHECK-LABEL: @multiple_unique_exit(
260; CHECK-NEXT:  entry:
261; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0
262; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0
263; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096
264; CHECK-NEXT:    [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096
265; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1
266; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 2
267; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
268; CHECK:       vector.ph:
269; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2
270; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
271; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 2, i32 [[N_MOD_VF]]
272; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]]
273; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
274; CHECK:       vector.body:
275; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
276; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
277; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[INDEX]], 0
278; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[INDEX]], 1
279; CHECK-NEXT:    [[TMP7:%.*]] = sext i32 [[TMP5]] to i64
280; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]]
281; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0
282; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <2 x i16>*
283; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP10]], align 4
284; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
285; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
286; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
287; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]]
288; CHECK:       middle.block:
289; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
290; CHECK-NEXT:    br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
291; CHECK:       scalar.ph:
292; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
293; CHECK-NEXT:    br label [[FOR_COND:%.*]]
294; CHECK:       for.cond:
295; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
296; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
297; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]]
298; CHECK:       for.body:
299; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
300; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
301; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
302; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
303; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
304; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP7:!llvm.loop !.*]]
305; CHECK:       if.end:
306; CHECK-NEXT:    ret void
307;
308; TAILFOLD-LABEL: @multiple_unique_exit(
309; TAILFOLD-NEXT:  entry:
310; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
311; TAILFOLD:       for.cond:
312; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
313; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
314; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
315; TAILFOLD:       for.body:
316; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
317; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
318; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
319; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
320; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
321; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
322; TAILFOLD:       if.end:
323; TAILFOLD-NEXT:    ret void
324;
325entry:
326  br label %for.cond
327
328for.cond:
329  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
330  %cmp = icmp slt i32 %i, %n
331  br i1 %cmp, label %for.body, label %if.end
332
333for.body:
334  %iprom = sext i32 %i to i64
335  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
336  store i16 0, i16* %b, align 4
337  %inc = add nsw i32 %i, 1
338  %cmp2 = icmp slt i32 %i, 2096
339  br i1 %cmp2, label %for.cond, label %if.end
340
341if.end:
342  ret void
343}
344
345; multiple exit - with an lcssa phi
346define i32 @multiple_unique_exit2(i16* %p, i32 %n) {
347; CHECK-LABEL: @multiple_unique_exit2(
348; CHECK-NEXT:  entry:
349; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0
350; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0
351; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096
352; CHECK-NEXT:    [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096
353; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1
354; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 2
355; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
356; CHECK:       vector.ph:
357; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2
358; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
359; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 2, i32 [[N_MOD_VF]]
360; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]]
361; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
362; CHECK:       vector.body:
363; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
364; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
365; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[INDEX]], 0
366; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[INDEX]], 1
367; CHECK-NEXT:    [[TMP7:%.*]] = sext i32 [[TMP5]] to i64
368; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]]
369; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0
370; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <2 x i16>*
371; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP10]], align 4
372; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
373; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
374; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
375; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]]
376; CHECK:       middle.block:
377; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
378; CHECK-NEXT:    [[IND_ESCAPE:%.*]] = sub i32 [[N_VEC]], 1
379; CHECK-NEXT:    [[IND_ESCAPE1:%.*]] = sub i32 [[N_VEC]], 1
380; CHECK-NEXT:    br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
381; CHECK:       scalar.ph:
382; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
383; CHECK-NEXT:    br label [[FOR_COND:%.*]]
384; CHECK:       for.cond:
385; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
386; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
387; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]]
388; CHECK:       for.body:
389; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
390; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
391; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
392; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
393; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
394; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP9:!llvm.loop !.*]]
395; CHECK:       if.end:
396; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ], [ [[IND_ESCAPE1]], [[MIDDLE_BLOCK]] ]
397; CHECK-NEXT:    ret i32 [[I_LCSSA]]
398;
399; TAILFOLD-LABEL: @multiple_unique_exit2(
400; TAILFOLD-NEXT:  entry:
401; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
402; TAILFOLD:       for.cond:
403; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
404; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
405; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
406; TAILFOLD:       for.body:
407; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
408; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
409; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
410; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
411; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
412; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
413; TAILFOLD:       if.end:
414; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ]
415; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
416;
417entry:
418  br label %for.cond
419
420for.cond:
421  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
422  %cmp = icmp slt i32 %i, %n
423  br i1 %cmp, label %for.body, label %if.end
424
425for.body:
426  %iprom = sext i32 %i to i64
427  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
428  store i16 0, i16* %b, align 4
429  %inc = add nsw i32 %i, 1
430  %cmp2 = icmp slt i32 %i, 2096
431  br i1 %cmp2, label %for.cond, label %if.end
432
433if.end:
434  ret i32 %i
435}
436
437; multiple exit w/a non lcssa phi
438define i32 @multiple_unique_exit3(i16* %p, i32 %n) {
439; CHECK-LABEL: @multiple_unique_exit3(
440; CHECK-NEXT:  entry:
441; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0
442; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0
443; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096
444; CHECK-NEXT:    [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096
445; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1
446; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 2
447; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
448; CHECK:       vector.ph:
449; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2
450; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
451; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 2, i32 [[N_MOD_VF]]
452; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]]
453; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
454; CHECK:       vector.body:
455; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
456; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
457; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[INDEX]], 0
458; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[INDEX]], 1
459; CHECK-NEXT:    [[TMP7:%.*]] = sext i32 [[TMP5]] to i64
460; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]]
461; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0
462; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <2 x i16>*
463; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP10]], align 4
464; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
465; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
466; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
467; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]]
468; CHECK:       middle.block:
469; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP2]], [[N_VEC]]
470; CHECK-NEXT:    br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
471; CHECK:       scalar.ph:
472; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
473; CHECK-NEXT:    br label [[FOR_COND:%.*]]
474; CHECK:       for.cond:
475; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
476; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
477; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END]]
478; CHECK:       for.body:
479; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
480; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
481; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
482; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
483; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
484; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP11:!llvm.loop !.*]]
485; CHECK:       if.end:
486; CHECK-NEXT:    [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ], [ 0, [[MIDDLE_BLOCK]] ]
487; CHECK-NEXT:    ret i32 [[EXIT]]
488;
489; TAILFOLD-LABEL: @multiple_unique_exit3(
490; TAILFOLD-NEXT:  entry:
491; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
492; TAILFOLD:       for.cond:
493; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
494; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
495; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
496; TAILFOLD:       for.body:
497; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
498; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
499; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
500; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
501; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
502; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
503; TAILFOLD:       if.end:
504; TAILFOLD-NEXT:    [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ]
505; TAILFOLD-NEXT:    ret i32 [[EXIT]]
506;
507entry:
508  br label %for.cond
509
510for.cond:
511  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
512  %cmp = icmp slt i32 %i, %n
513  br i1 %cmp, label %for.body, label %if.end
514
515for.body:
516  %iprom = sext i32 %i to i64
517  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
518  store i16 0, i16* %b, align 4
519  %inc = add nsw i32 %i, 1
520  %cmp2 = icmp slt i32 %i, 2096
521  br i1 %cmp2, label %for.cond, label %if.end
522
523if.end:
524  %exit = phi i32 [0, %for.cond], [1, %for.body]
525  ret i32 %exit
526}
527
528; multiple exits w/distinct target blocks
529define i32 @multiple_exit_blocks(i16* %p, i32 %n) {
530; CHECK-LABEL: @multiple_exit_blocks(
531; CHECK-NEXT:  entry:
532; CHECK-NEXT:    br label [[FOR_COND:%.*]]
533; CHECK:       for.cond:
534; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
535; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
536; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
537; CHECK:       for.body:
538; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
539; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
540; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
541; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
542; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
543; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
544; CHECK:       if.end:
545; CHECK-NEXT:    ret i32 0
546; CHECK:       if.end2:
547; CHECK-NEXT:    ret i32 1
548;
549; TAILFOLD-LABEL: @multiple_exit_blocks(
550; TAILFOLD-NEXT:  entry:
551; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
552; TAILFOLD:       for.cond:
553; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
554; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
555; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
556; TAILFOLD:       for.body:
557; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
558; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
559; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
560; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
561; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
562; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
563; TAILFOLD:       if.end:
564; TAILFOLD-NEXT:    ret i32 0
565; TAILFOLD:       if.end2:
566; TAILFOLD-NEXT:    ret i32 1
567;
568entry:
569  br label %for.cond
570
571for.cond:
572  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
573  %cmp = icmp slt i32 %i, %n
574  br i1 %cmp, label %for.body, label %if.end
575
576for.body:
577  %iprom = sext i32 %i to i64
578  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
579  store i16 0, i16* %b, align 4
580  %inc = add nsw i32 %i, 1
581  %cmp2 = icmp slt i32 %i, 2096
582  br i1 %cmp2, label %for.cond, label %if.end2
583
584if.end:
585  ret i32 0
586
587if.end2:
588  ret i32 1
589}
590
591; LCSSA, common value each exit
592define i32 @multiple_exit_blocks2(i16* %p, i32 %n) {
593; CHECK-LABEL: @multiple_exit_blocks2(
594; CHECK-NEXT:  entry:
595; CHECK-NEXT:    br label [[FOR_COND:%.*]]
596; CHECK:       for.cond:
597; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
598; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
599; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
600; CHECK:       for.body:
601; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
602; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
603; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
604; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
605; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
606; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
607; CHECK:       if.end:
608; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
609; CHECK-NEXT:    ret i32 [[I_LCSSA]]
610; CHECK:       if.end2:
611; CHECK-NEXT:    [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ]
612; CHECK-NEXT:    ret i32 [[I_LCSSA1]]
613;
614; TAILFOLD-LABEL: @multiple_exit_blocks2(
615; TAILFOLD-NEXT:  entry:
616; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
617; TAILFOLD:       for.cond:
618; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
619; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
620; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
621; TAILFOLD:       for.body:
622; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
623; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
624; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
625; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
626; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
627; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
628; TAILFOLD:       if.end:
629; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
630; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
631; TAILFOLD:       if.end2:
632; TAILFOLD-NEXT:    [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ]
633; TAILFOLD-NEXT:    ret i32 [[I_LCSSA1]]
634;
635entry:
636  br label %for.cond
637
638for.cond:
639  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
640  %cmp = icmp slt i32 %i, %n
641  br i1 %cmp, label %for.body, label %if.end
642
643for.body:
644  %iprom = sext i32 %i to i64
645  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
646  store i16 0, i16* %b, align 4
647  %inc = add nsw i32 %i, 1
648  %cmp2 = icmp slt i32 %i, 2096
649  br i1 %cmp2, label %for.cond, label %if.end2
650
651if.end:
652  ret i32 %i
653
654if.end2:
655  ret i32 %i
656}
657
658; LCSSA, distinct value each exit
659define i32 @multiple_exit_blocks3(i16* %p, i32 %n) {
660; CHECK-LABEL: @multiple_exit_blocks3(
661; CHECK-NEXT:  entry:
662; CHECK-NEXT:    br label [[FOR_COND:%.*]]
663; CHECK:       for.cond:
664; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
665; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
666; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
667; CHECK:       for.body:
668; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
669; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
670; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
671; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
672; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
673; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
674; CHECK:       if.end:
675; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
676; CHECK-NEXT:    ret i32 [[I_LCSSA]]
677; CHECK:       if.end2:
678; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ]
679; CHECK-NEXT:    ret i32 [[INC_LCSSA]]
680;
681; TAILFOLD-LABEL: @multiple_exit_blocks3(
682; TAILFOLD-NEXT:  entry:
683; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
684; TAILFOLD:       for.cond:
685; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
686; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
687; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
688; TAILFOLD:       for.body:
689; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
690; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
691; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
692; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
693; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
694; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
695; TAILFOLD:       if.end:
696; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
697; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
698; TAILFOLD:       if.end2:
699; TAILFOLD-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ]
700; TAILFOLD-NEXT:    ret i32 [[INC_LCSSA]]
701;
702entry:
703  br label %for.cond
704
705for.cond:
706  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
707  %cmp = icmp slt i32 %i, %n
708  br i1 %cmp, label %for.body, label %if.end
709
710for.body:
711  %iprom = sext i32 %i to i64
712  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
713  store i16 0, i16* %b, align 4
714  %inc = add nsw i32 %i, 1
715  %cmp2 = icmp slt i32 %i, 2096
716  br i1 %cmp2, label %for.cond, label %if.end2
717
718if.end:
719  ret i32 %i
720
721if.end2:
722  ret i32 %inc
723}
724
725; unique exit case but with a switch as two edges between the same pair of
726; blocks is an often missed edge case
727define i32 @multiple_exit_switch(i16* %p, i32 %n) {
728; CHECK-LABEL: @multiple_exit_switch(
729; CHECK-NEXT:  entry:
730; CHECK-NEXT:    br label [[FOR_COND:%.*]]
731; CHECK:       for.cond:
732; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
733; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
734; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
735; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
736; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
737; CHECK-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
738; CHECK-NEXT:    i32 2096, label [[IF_END:%.*]]
739; CHECK-NEXT:    i32 2097, label [[IF_END]]
740; CHECK-NEXT:    ]
741; CHECK:       if.end:
742; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ]
743; CHECK-NEXT:    ret i32 [[I_LCSSA]]
744;
745; TAILFOLD-LABEL: @multiple_exit_switch(
746; TAILFOLD-NEXT:  entry:
747; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
748; TAILFOLD:       for.cond:
749; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
750; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
751; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
752; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
753; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
754; TAILFOLD-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
755; TAILFOLD-NEXT:    i32 2096, label [[IF_END:%.*]]
756; TAILFOLD-NEXT:    i32 2097, label [[IF_END]]
757; TAILFOLD-NEXT:    ]
758; TAILFOLD:       if.end:
759; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ]
760; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
761;
762entry:
763  br label %for.cond
764
765for.cond:
766  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
767  %iprom = sext i32 %i to i64
768  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
769  store i16 0, i16* %b, align 4
770  %inc = add nsw i32 %i, 1
771  switch i32 %i, label %for.cond [
772  i32 2096, label %if.end
773  i32 2097, label %if.end
774  ]
775
776if.end:
777  ret i32 %i
778}
779
780; multiple exit case but with a switch as multiple exiting edges from
781; a single block is a commonly missed edge case
782define i32 @multiple_exit_switch2(i16* %p, i32 %n) {
783; CHECK-LABEL: @multiple_exit_switch2(
784; CHECK-NEXT:  entry:
785; CHECK-NEXT:    br label [[FOR_COND:%.*]]
786; CHECK:       for.cond:
787; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
788; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
789; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
790; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
791; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
792; CHECK-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
793; CHECK-NEXT:    i32 2096, label [[IF_END:%.*]]
794; CHECK-NEXT:    i32 2097, label [[IF_END2:%.*]]
795; CHECK-NEXT:    ]
796; CHECK:       if.end:
797; CHECK-NEXT:    ret i32 0
798; CHECK:       if.end2:
799; CHECK-NEXT:    ret i32 1
800;
801; TAILFOLD-LABEL: @multiple_exit_switch2(
802; TAILFOLD-NEXT:  entry:
803; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
804; TAILFOLD:       for.cond:
805; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
806; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
807; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
808; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
809; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
810; TAILFOLD-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
811; TAILFOLD-NEXT:    i32 2096, label [[IF_END:%.*]]
812; TAILFOLD-NEXT:    i32 2097, label [[IF_END2:%.*]]
813; TAILFOLD-NEXT:    ]
814; TAILFOLD:       if.end:
815; TAILFOLD-NEXT:    ret i32 0
816; TAILFOLD:       if.end2:
817; TAILFOLD-NEXT:    ret i32 1
818;
819entry:
820  br label %for.cond
821
822for.cond:
823  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
824  %iprom = sext i32 %i to i64
825  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
826  store i16 0, i16* %b, align 4
827  %inc = add nsw i32 %i, 1
828  switch i32 %i, label %for.cond [
829  i32 2096, label %if.end
830  i32 2097, label %if.end2
831  ]
832
833if.end:
834  ret i32 0
835
836if.end2:
837  ret i32 1
838}
839
840define i32 @multiple_latch1(i16* %p) {
841; CHECK-LABEL: @multiple_latch1(
842; CHECK-NEXT:  entry:
843; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
844; CHECK:       for.body:
845; CHECK-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
846; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
847; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
848; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
849; CHECK:       for.second:
850; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
851; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
852; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
853; CHECK-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
854; CHECK-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
855; CHECK:       for.body.backedge:
856; CHECK-NEXT:    br label [[FOR_BODY]]
857; CHECK:       for.end:
858; CHECK-NEXT:    ret i32 0
859;
860; TAILFOLD-LABEL: @multiple_latch1(
861; TAILFOLD-NEXT:  entry:
862; TAILFOLD-NEXT:    br label [[FOR_BODY:%.*]]
863; TAILFOLD:       for.body:
864; TAILFOLD-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
865; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
866; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
867; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
868; TAILFOLD:       for.second:
869; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
870; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
871; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
872; TAILFOLD-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
873; TAILFOLD-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
874; TAILFOLD:       for.body.backedge:
875; TAILFOLD-NEXT:    br label [[FOR_BODY]]
876; TAILFOLD:       for.end:
877; TAILFOLD-NEXT:    ret i32 0
878;
879entry:
880  br label %for.body
881
882for.body:
883  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body.backedge]
884  %inc = add nsw i32 %i.02, 1
885  %cmp = icmp slt i32 %inc, 16
886  br i1 %cmp, label %for.body.backedge, label %for.second
887
888for.second:
889  %iprom = sext i32 %i.02 to i64
890  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
891  store i16 0, i16* %b, align 4
892  %cmps = icmp sgt i32 %inc, 16
893  br i1 %cmps, label %for.body.backedge, label %for.end
894
895for.body.backedge:
896  br label %for.body
897
898for.end:
899  ret i32 0
900}
901
902
903; two back branches - loop simplify with convert this to the same form
904; as previous before vectorizer sees it, but show that.
905define i32 @multiple_latch2(i16* %p) {
906; CHECK-LABEL: @multiple_latch2(
907; CHECK-NEXT:  entry:
908; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
909; CHECK:       for.body:
910; CHECK-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
911; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
912; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
913; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
914; CHECK:       for.body.backedge:
915; CHECK-NEXT:    br label [[FOR_BODY]]
916; CHECK:       for.second:
917; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
918; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
919; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
920; CHECK-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
921; CHECK-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
922; CHECK:       for.end:
923; CHECK-NEXT:    ret i32 0
924;
925; TAILFOLD-LABEL: @multiple_latch2(
926; TAILFOLD-NEXT:  entry:
927; TAILFOLD-NEXT:    br label [[FOR_BODY:%.*]]
928; TAILFOLD:       for.body:
929; TAILFOLD-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
930; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
931; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
932; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
933; TAILFOLD:       for.body.backedge:
934; TAILFOLD-NEXT:    br label [[FOR_BODY]]
935; TAILFOLD:       for.second:
936; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
937; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
938; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
939; TAILFOLD-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
940; TAILFOLD-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
941; TAILFOLD:       for.end:
942; TAILFOLD-NEXT:    ret i32 0
943;
944entry:
945  br label %for.body
946
947for.body:
948  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ], [%inc, %for.second]
949  %inc = add nsw i32 %i.02, 1
950  %cmp = icmp slt i32 %inc, 16
951  br i1 %cmp, label %for.body, label %for.second
952
953for.second:
954  %iprom = sext i32 %i.02 to i64
955  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
956  store i16 0, i16* %b, align 4
957  %cmps = icmp sgt i32 %inc, 16
958  br i1 %cmps, label %for.body, label %for.end
959
960for.end:
961  ret i32 0
962}
963
964
965; Check interaction between block predication and early exits.  We need the
966; condition on the early exit to remain dead (i.e. not be used when forming
967; the predicate mask).
968define void @scalar_predication(float* %addr) {
969; CHECK-LABEL: @scalar_predication(
970; CHECK-NEXT:  entry:
971; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
972; CHECK:       vector.ph:
973; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
974; CHECK:       vector.body:
975; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
976; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
977; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
978; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[TMP0]]
979; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, float* [[TMP1]], i32 0
980; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <2 x float>*
981; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
982; CHECK-NEXT:    [[TMP4:%.*]] = fcmp oeq <2 x float> [[WIDE_LOAD]], zeroinitializer
983; CHECK-NEXT:    [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true>
984; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
985; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
986; CHECK:       pred.store.if:
987; CHECK-NEXT:    store float 1.000000e+01, float* [[TMP1]], align 4
988; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
989; CHECK:       pred.store.continue:
990; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
991; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
992; CHECK:       pred.store.if1:
993; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 1
994; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr float, float* [[ADDR]], i64 [[TMP8]]
995; CHECK-NEXT:    store float 1.000000e+01, float* [[TMP9]], align 4
996; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
997; CHECK:       pred.store.continue2:
998; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
999; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1000; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
1001; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP12:!llvm.loop !.*]]
1002; CHECK:       middle.block:
1003; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 201, 200
1004; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
1005; CHECK:       scalar.ph:
1006; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1007; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
1008; CHECK:       loop.header:
1009; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1010; CHECK-NEXT:    [[GEP:%.*]] = getelementptr float, float* [[ADDR]], i64 [[IV]]
1011; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1012; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP_BODY:%.*]]
1013; CHECK:       loop.body:
1014; CHECK-NEXT:    [[TMP11:%.*]] = load float, float* [[GEP]], align 4
1015; CHECK-NEXT:    [[PRED:%.*]] = fcmp oeq float [[TMP11]], 0.000000e+00
1016; CHECK-NEXT:    br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]]
1017; CHECK:       then:
1018; CHECK-NEXT:    store float 1.000000e+01, float* [[GEP]], align 4
1019; CHECK-NEXT:    br label [[LOOP_LATCH]]
1020; CHECK:       loop.latch:
1021; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1022; CHECK-NEXT:    br label [[LOOP_HEADER]], [[LOOP13:!llvm.loop !.*]]
1023; CHECK:       exit:
1024; CHECK-NEXT:    ret void
1025;
1026; TAILFOLD-LABEL: @scalar_predication(
1027; TAILFOLD-NEXT:  entry:
1028; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1029; TAILFOLD:       loop.header:
1030; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1031; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[IV]]
1032; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1033; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]]
1034; TAILFOLD:       loop.body:
1035; TAILFOLD-NEXT:    [[TMP0:%.*]] = load float, float* [[GEP]], align 4
1036; TAILFOLD-NEXT:    [[PRED:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00
1037; TAILFOLD-NEXT:    br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]]
1038; TAILFOLD:       then:
1039; TAILFOLD-NEXT:    store float 1.000000e+01, float* [[GEP]], align 4
1040; TAILFOLD-NEXT:    br label [[LOOP_LATCH]]
1041; TAILFOLD:       loop.latch:
1042; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1043; TAILFOLD-NEXT:    br label [[LOOP_HEADER]]
1044; TAILFOLD:       exit:
1045; TAILFOLD-NEXT:    ret void
1046;
1047entry:
1048  br label %loop.header
1049
1050loop.header:
1051  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1052  %gep = getelementptr float, float* %addr, i64 %iv
1053  %exitcond.not = icmp eq i64 %iv, 200
1054  br i1 %exitcond.not, label %exit, label %loop.body
1055
1056loop.body:
1057  %0 = load float, float* %gep, align 4
1058  %pred = fcmp oeq float %0, 0.0
1059  br i1 %pred, label %loop.latch, label %then
1060
1061then:
1062  store float 10.0, float* %gep, align 4
1063  br label %loop.latch
1064
1065loop.latch:
1066  %iv.next = add nuw nsw i64 %iv, 1
1067  br label %loop.header
1068
1069exit:
1070  ret void
1071}
1072
1073define i32 @me_reduction(i32* %addr) {
1074; CHECK-LABEL: @me_reduction(
1075; CHECK-NEXT:  entry:
1076; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1077; CHECK:       vector.ph:
1078; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1079; CHECK:       vector.body:
1080; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1081; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1082; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
1083; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1084; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
1085; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[TMP0]]
1086; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 0
1087; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>*
1088; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP4]], align 4
1089; CHECK-NEXT:    [[TMP5]] = add <2 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
1090; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
1091; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1092; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
1093; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP14:!llvm.loop !.*]]
1094; CHECK:       middle.block:
1095; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <2 x i32> <i32 1, i32 undef>
1096; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <2 x i32> [[TMP5]], [[RDX_SHUF]]
1097; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i32> [[BIN_RDX]], i32 0
1098; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 201, 200
1099; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
1100; CHECK:       scalar.ph:
1101; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1102; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
1103; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
1104; CHECK:       loop.header:
1105; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1106; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1107; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR]], i64 [[IV]]
1108; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1109; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT]], label [[LOOP_LATCH]]
1110; CHECK:       loop.latch:
1111; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[GEP]], align 4
1112; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP8]]
1113; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1114; CHECK-NEXT:    [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400
1115; CHECK-NEXT:    br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], [[LOOP15:!llvm.loop !.*]]
1116; CHECK:       exit:
1117; CHECK-NEXT:    [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
1118; CHECK-NEXT:    ret i32 [[LCSSA]]
1119;
1120; TAILFOLD-LABEL: @me_reduction(
1121; TAILFOLD-NEXT:  entry:
1122; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1123; TAILFOLD:       loop.header:
1124; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1125; TAILFOLD-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1126; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
1127; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1128; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1129; TAILFOLD:       loop.latch:
1130; TAILFOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
1131; TAILFOLD-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1132; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1133; TAILFOLD-NEXT:    [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400
1134; TAILFOLD-NEXT:    br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]]
1135; TAILFOLD:       exit:
1136; TAILFOLD-NEXT:    [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ]
1137; TAILFOLD-NEXT:    ret i32 [[LCSSA]]
1138;
1139entry:
1140  br label %loop.header
1141
1142loop.header:
1143  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1144  %accum = phi i32 [0, %entry], [%accum.next, %loop.latch]
1145  %gep = getelementptr i32, i32* %addr, i64 %iv
1146  %exitcond.not = icmp eq i64 %iv, 200
1147  br i1 %exitcond.not, label %exit, label %loop.latch
1148
1149loop.latch:
1150  %0 = load i32, i32* %gep, align 4
1151  %accum.next = add i32 %accum, %0
1152  %iv.next = add nuw nsw i64 %iv, 1
1153  %exitcond2.not = icmp eq i64 %iv, 400
1154  br i1 %exitcond2.not, label %exit, label %loop.header
1155
1156exit:
1157  %lcssa = phi i32 [0, %loop.header], [%accum.next, %loop.latch]
1158  ret i32 %lcssa
1159}
1160
1161; TODO: The current definition of reduction is too strict, we can vectorize
1162; this.  There's an analogous single exit case where we extract the N-1
1163; value of the reduction that we can also handle.  If we fix the later, the
1164; multiple exit case probably falls out.
1165define i32 @me_reduction2(i32* %addr) {
1166; CHECK-LABEL: @me_reduction2(
1167; CHECK-NEXT:  entry:
1168; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
1169; CHECK:       loop.header:
1170; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1171; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1172; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
1173; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1174; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1175; CHECK:       loop.latch:
1176; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
1177; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1178; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1179; CHECK-NEXT:    br label [[LOOP_HEADER]]
1180; CHECK:       exit:
1181; CHECK-NEXT:    [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ]
1182; CHECK-NEXT:    ret i32 [[ACCUM_LCSSA]]
1183;
1184; TAILFOLD-LABEL: @me_reduction2(
1185; TAILFOLD-NEXT:  entry:
1186; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1187; TAILFOLD:       loop.header:
1188; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1189; TAILFOLD-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1190; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
1191; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1192; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1193; TAILFOLD:       loop.latch:
1194; TAILFOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
1195; TAILFOLD-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1196; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1197; TAILFOLD-NEXT:    br label [[LOOP_HEADER]]
1198; TAILFOLD:       exit:
1199; TAILFOLD-NEXT:    [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ]
1200; TAILFOLD-NEXT:    ret i32 [[ACCUM_LCSSA]]
1201;
1202entry:
1203  br label %loop.header
1204
1205loop.header:
1206  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1207  %accum = phi i32 [0, %entry], [%accum.next, %loop.latch]
1208  %gep = getelementptr i32, i32* %addr, i64 %iv
1209  %exitcond.not = icmp eq i64 %iv, 200
1210  br i1 %exitcond.not, label %exit, label %loop.latch
1211
1212loop.latch:
1213  %0 = load i32, i32* %gep, align 4
1214  %accum.next = add i32 %accum, %0
1215  %iv.next = add nuw nsw i64 %iv, 1
1216  br label %loop.header
1217
1218exit:
1219  ret i32 %accum
1220}
1221
1222