1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -loop-vectorize -force-vector-width=2 < %s | FileCheck %s
3; RUN: opt -S -loop-vectorize -force-vector-width=2  -prefer-predicate-over-epilogue=predicate-dont-vectorize < %s | FileCheck --check-prefix TAILFOLD %s
4
5target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
6
7define void @bottom_tested(i16* %p, i32 %n) {
8; CHECK-LABEL: @bottom_tested(
9; CHECK-NEXT:  entry:
10; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0
11; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0
12; CHECK-NEXT:    [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1
13; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 2
14; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
15; CHECK:       vector.ph:
16; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
17; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP1]], [[N_MOD_VF]]
18; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
19; CHECK:       vector.body:
20; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
21; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 0
22; CHECK-NEXT:    [[TMP3:%.*]] = sext i32 [[TMP2]] to i64
23; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP3]]
24; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 0
25; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i16* [[TMP5]] to <2 x i16>*
26; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP6]], align 4
27; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
28; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
29; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
30; CHECK:       middle.block:
31; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP1]], [[N_VEC]]
32; CHECK-NEXT:    br i1 [[CMP_N]], label [[IF_END:%.*]], label [[SCALAR_PH]]
33; CHECK:       scalar.ph:
34; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
35; CHECK-NEXT:    br label [[FOR_COND:%.*]]
36; CHECK:       for.cond:
37; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
38; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
39; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
40; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
41; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
42; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
43; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], [[LOOP2:!llvm.loop !.*]]
44; CHECK:       if.end:
45; CHECK-NEXT:    ret void
46;
47; TAILFOLD-LABEL: @bottom_tested(
48; TAILFOLD-NEXT:  entry:
49; TAILFOLD-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0
50; TAILFOLD-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0
51; TAILFOLD-NEXT:    [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1
52; TAILFOLD-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
53; TAILFOLD:       vector.ph:
54; TAILFOLD-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP1]], 1
55; TAILFOLD-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2
56; TAILFOLD-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
57; TAILFOLD-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
58; TAILFOLD-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
59; TAILFOLD-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
60; TAILFOLD-NEXT:    br label [[VECTOR_BODY:%.*]]
61; TAILFOLD:       vector.body:
62; TAILFOLD-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
63; TAILFOLD-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
64; TAILFOLD-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 0
65; TAILFOLD-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 1
66; TAILFOLD-NEXT:    [[TMP4:%.*]] = icmp ule <2 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
67; TAILFOLD-NEXT:    [[TMP5:%.*]] = sext <2 x i32> [[VEC_IND]] to <2 x i64>
68; TAILFOLD-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
69; TAILFOLD-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
70; TAILFOLD:       pred.store.if:
71; TAILFOLD-NEXT:    [[TMP7:%.*]] = extractelement <2 x i64> [[TMP5]], i32 0
72; TAILFOLD-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]]
73; TAILFOLD-NEXT:    store i16 0, i16* [[TMP8]], align 4
74; TAILFOLD-NEXT:    br label [[PRED_STORE_CONTINUE]]
75; TAILFOLD:       pred.store.continue:
76; TAILFOLD-NEXT:    [[TMP9:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
77; TAILFOLD-NEXT:    br i1 [[TMP9]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
78; TAILFOLD:       pred.store.if1:
79; TAILFOLD-NEXT:    [[TMP10:%.*]] = extractelement <2 x i64> [[TMP5]], i32 1
80; TAILFOLD-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[TMP10]]
81; TAILFOLD-NEXT:    store i16 0, i16* [[TMP11]], align 4
82; TAILFOLD-NEXT:    br label [[PRED_STORE_CONTINUE2]]
83; TAILFOLD:       pred.store.continue2:
84; TAILFOLD-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
85; TAILFOLD-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
86; TAILFOLD-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
87; TAILFOLD-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]]
88; TAILFOLD:       middle.block:
89; TAILFOLD-NEXT:    br i1 true, label [[IF_END:%.*]], label [[SCALAR_PH]]
90; TAILFOLD:       scalar.ph:
91; TAILFOLD-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
92; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
93; TAILFOLD:       for.cond:
94; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_COND]] ]
95; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
96; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
97; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
98; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
99; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
100; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_COND]], label [[IF_END]], [[LOOP2:!llvm.loop !.*]]
101; TAILFOLD:       if.end:
102; TAILFOLD-NEXT:    ret void
103;
104entry:
105  br label %for.cond
106
107for.cond:
108  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
109  %iprom = sext i32 %i to i64
110  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
111  store i16 0, i16* %b, align 4
112  %inc = add nsw i32 %i, 1
113  %cmp = icmp slt i32 %i, %n
114  br i1 %cmp, label %for.cond, label %if.end
115
116if.end:
117  ret void
118}
119
120define void @early_exit(i16* %p, i32 %n) {
121; CHECK-LABEL: @early_exit(
122; CHECK-NEXT:  entry:
123; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0
124; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0
125; CHECK-NEXT:    [[TMP1:%.*]] = add nuw i32 [[SMAX]], 1
126; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP1]], 2
127; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
128; CHECK:       vector.ph:
129; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP1]], 2
130; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
131; CHECK-NEXT:    [[TMP3:%.*]] = select i1 [[TMP2]], i32 2, i32 [[N_MOD_VF]]
132; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP1]], [[TMP3]]
133; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
134; CHECK:       vector.body:
135; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
136; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
137; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[INDEX]], 0
138; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[INDEX]], 1
139; CHECK-NEXT:    [[TMP6:%.*]] = sext i32 [[TMP4]] to i64
140; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP6]]
141; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[TMP7]], i32 0
142; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP8]] to <2 x i16>*
143; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP9]], align 4
144; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
145; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
146; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
147; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP4:!llvm.loop !.*]]
148; CHECK:       middle.block:
149; CHECK-NEXT:    br label [[SCALAR_PH]]
150; CHECK:       scalar.ph:
151; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
152; CHECK-NEXT:    br label [[FOR_COND:%.*]]
153; CHECK:       for.cond:
154; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
155; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
156; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
157; CHECK:       for.body:
158; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
159; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
160; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
161; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
162; CHECK-NEXT:    br label [[FOR_COND]], [[LOOP5:!llvm.loop !.*]]
163; CHECK:       if.end:
164; CHECK-NEXT:    ret void
165;
166; TAILFOLD-LABEL: @early_exit(
167; TAILFOLD-NEXT:  entry:
168; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
169; TAILFOLD:       for.cond:
170; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
171; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
172; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
173; TAILFOLD:       for.body:
174; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
175; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
176; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
177; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
178; TAILFOLD-NEXT:    br label [[FOR_COND]]
179; TAILFOLD:       if.end:
180; TAILFOLD-NEXT:    ret void
181;
182entry:
183  br label %for.cond
184
185for.cond:
186  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
187  %cmp = icmp slt i32 %i, %n
188  br i1 %cmp, label %for.body, label %if.end
189
190for.body:
191  %iprom = sext i32 %i to i64
192  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
193  store i16 0, i16* %b, align 4
194  %inc = add nsw i32 %i, 1
195  br label %for.cond
196
197if.end:
198  ret void
199}
200
201; Same as early_exit, but with optsize to prevent the use of
202; a scalar epilogue.  -- Can't vectorize this in either case.
203define void @optsize(i16* %p, i32 %n) optsize {
204; CHECK-LABEL: @optsize(
205; CHECK-NEXT:  entry:
206; CHECK-NEXT:    br label [[FOR_COND:%.*]]
207; CHECK:       for.cond:
208; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
209; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
210; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
211; CHECK:       for.body:
212; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
213; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
214; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
215; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
216; CHECK-NEXT:    br label [[FOR_COND]]
217; CHECK:       if.end:
218; CHECK-NEXT:    ret void
219;
220; TAILFOLD-LABEL: @optsize(
221; TAILFOLD-NEXT:  entry:
222; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
223; TAILFOLD:       for.cond:
224; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
225; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
226; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
227; TAILFOLD:       for.body:
228; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
229; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
230; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
231; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
232; TAILFOLD-NEXT:    br label [[FOR_COND]]
233; TAILFOLD:       if.end:
234; TAILFOLD-NEXT:    ret void
235;
236entry:
237  br label %for.cond
238
239for.cond:
240  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
241  %cmp = icmp slt i32 %i, %n
242  br i1 %cmp, label %for.body, label %if.end
243
244for.body:
245  %iprom = sext i32 %i to i64
246  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
247  store i16 0, i16* %b, align 4
248  %inc = add nsw i32 %i, 1
249  br label %for.cond
250
251if.end:
252  ret void
253}
254
255
256; multiple exit - no values inside the loop used outside
257define void @multiple_unique_exit(i16* %p, i32 %n) {
258; CHECK-LABEL: @multiple_unique_exit(
259; CHECK-NEXT:  entry:
260; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0
261; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0
262; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096
263; CHECK-NEXT:    [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096
264; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1
265; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 2
266; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
267; CHECK:       vector.ph:
268; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2
269; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
270; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 2, i32 [[N_MOD_VF]]
271; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]]
272; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
273; CHECK:       vector.body:
274; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
275; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
276; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[INDEX]], 0
277; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[INDEX]], 1
278; CHECK-NEXT:    [[TMP7:%.*]] = sext i32 [[TMP5]] to i64
279; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]]
280; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0
281; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <2 x i16>*
282; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP10]], align 4
283; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
284; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
285; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
286; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP6:!llvm.loop !.*]]
287; CHECK:       middle.block:
288; CHECK-NEXT:    br label [[SCALAR_PH]]
289; CHECK:       scalar.ph:
290; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
291; CHECK-NEXT:    br label [[FOR_COND:%.*]]
292; CHECK:       for.cond:
293; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
294; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
295; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
296; CHECK:       for.body:
297; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
298; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
299; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
300; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
301; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
302; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP7:!llvm.loop !.*]]
303; CHECK:       if.end:
304; CHECK-NEXT:    ret void
305;
306; TAILFOLD-LABEL: @multiple_unique_exit(
307; TAILFOLD-NEXT:  entry:
308; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
309; TAILFOLD:       for.cond:
310; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
311; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
312; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
313; TAILFOLD:       for.body:
314; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
315; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
316; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
317; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
318; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
319; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
320; TAILFOLD:       if.end:
321; TAILFOLD-NEXT:    ret void
322;
323entry:
324  br label %for.cond
325
326for.cond:
327  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
328  %cmp = icmp slt i32 %i, %n
329  br i1 %cmp, label %for.body, label %if.end
330
331for.body:
332  %iprom = sext i32 %i to i64
333  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
334  store i16 0, i16* %b, align 4
335  %inc = add nsw i32 %i, 1
336  %cmp2 = icmp slt i32 %i, 2096
337  br i1 %cmp2, label %for.cond, label %if.end
338
339if.end:
340  ret void
341}
342
343; multiple exit - with an lcssa phi
344define i32 @multiple_unique_exit2(i16* %p, i32 %n) {
345; CHECK-LABEL: @multiple_unique_exit2(
346; CHECK-NEXT:  entry:
347; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0
348; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0
349; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096
350; CHECK-NEXT:    [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096
351; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1
352; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 2
353; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
354; CHECK:       vector.ph:
355; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2
356; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
357; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 2, i32 [[N_MOD_VF]]
358; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]]
359; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
360; CHECK:       vector.body:
361; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
362; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
363; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[INDEX]], 0
364; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[INDEX]], 1
365; CHECK-NEXT:    [[TMP7:%.*]] = sext i32 [[TMP5]] to i64
366; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]]
367; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0
368; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <2 x i16>*
369; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP10]], align 4
370; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
371; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
372; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
373; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP8:!llvm.loop !.*]]
374; CHECK:       middle.block:
375; CHECK-NEXT:    br label [[SCALAR_PH]]
376; CHECK:       scalar.ph:
377; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
378; CHECK-NEXT:    br label [[FOR_COND:%.*]]
379; CHECK:       for.cond:
380; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
381; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
382; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
383; CHECK:       for.body:
384; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
385; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
386; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
387; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
388; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
389; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP9:!llvm.loop !.*]]
390; CHECK:       if.end:
391; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ]
392; CHECK-NEXT:    ret i32 [[I_LCSSA]]
393;
394; TAILFOLD-LABEL: @multiple_unique_exit2(
395; TAILFOLD-NEXT:  entry:
396; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
397; TAILFOLD:       for.cond:
398; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
399; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
400; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
401; TAILFOLD:       for.body:
402; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
403; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
404; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
405; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
406; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
407; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
408; TAILFOLD:       if.end:
409; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ], [ [[I]], [[FOR_COND]] ]
410; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
411;
412entry:
413  br label %for.cond
414
415for.cond:
416  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
417  %cmp = icmp slt i32 %i, %n
418  br i1 %cmp, label %for.body, label %if.end
419
420for.body:
421  %iprom = sext i32 %i to i64
422  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
423  store i16 0, i16* %b, align 4
424  %inc = add nsw i32 %i, 1
425  %cmp2 = icmp slt i32 %i, 2096
426  br i1 %cmp2, label %for.cond, label %if.end
427
428if.end:
429  ret i32 %i
430}
431
432; multiple exit w/a non lcssa phi
433define i32 @multiple_unique_exit3(i16* %p, i32 %n) {
434; CHECK-LABEL: @multiple_unique_exit3(
435; CHECK-NEXT:  entry:
436; CHECK-NEXT:    [[TMP0:%.*]] = icmp sgt i32 [[N:%.*]], 0
437; CHECK-NEXT:    [[SMAX:%.*]] = select i1 [[TMP0]], i32 [[N]], i32 0
438; CHECK-NEXT:    [[TMP1:%.*]] = icmp ult i32 [[SMAX]], 2096
439; CHECK-NEXT:    [[UMIN:%.*]] = select i1 [[TMP1]], i32 [[SMAX]], i32 2096
440; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i32 [[UMIN]], 1
441; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ule i32 [[TMP2]], 2
442; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
443; CHECK:       vector.ph:
444; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP2]], 2
445; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[N_MOD_VF]], 0
446; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i32 2, i32 [[N_MOD_VF]]
447; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP2]], [[TMP4]]
448; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
449; CHECK:       vector.body:
450; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
451; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
452; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[INDEX]], 0
453; CHECK-NEXT:    [[TMP6:%.*]] = add i32 [[INDEX]], 1
454; CHECK-NEXT:    [[TMP7:%.*]] = sext i32 [[TMP5]] to i64
455; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[TMP7]]
456; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0
457; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <2 x i16>*
458; CHECK-NEXT:    store <2 x i16> zeroinitializer, <2 x i16>* [[TMP10]], align 4
459; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
460; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
461; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
462; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP10:!llvm.loop !.*]]
463; CHECK:       middle.block:
464; CHECK-NEXT:    br label [[SCALAR_PH]]
465; CHECK:       scalar.ph:
466; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
467; CHECK-NEXT:    br label [[FOR_COND:%.*]]
468; CHECK:       for.cond:
469; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
470; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N]]
471; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
472; CHECK:       for.body:
473; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
474; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P]], i64 [[IPROM]]
475; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
476; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
477; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
478; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]], [[LOOP11:!llvm.loop !.*]]
479; CHECK:       if.end:
480; CHECK-NEXT:    [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ]
481; CHECK-NEXT:    ret i32 [[EXIT]]
482;
483; TAILFOLD-LABEL: @multiple_unique_exit3(
484; TAILFOLD-NEXT:  entry:
485; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
486; TAILFOLD:       for.cond:
487; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
488; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
489; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
490; TAILFOLD:       for.body:
491; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
492; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
493; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
494; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
495; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
496; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END]]
497; TAILFOLD:       if.end:
498; TAILFOLD-NEXT:    [[EXIT:%.*]] = phi i32 [ 0, [[FOR_COND]] ], [ 1, [[FOR_BODY]] ]
499; TAILFOLD-NEXT:    ret i32 [[EXIT]]
500;
501entry:
502  br label %for.cond
503
504for.cond:
505  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
506  %cmp = icmp slt i32 %i, %n
507  br i1 %cmp, label %for.body, label %if.end
508
509for.body:
510  %iprom = sext i32 %i to i64
511  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
512  store i16 0, i16* %b, align 4
513  %inc = add nsw i32 %i, 1
514  %cmp2 = icmp slt i32 %i, 2096
515  br i1 %cmp2, label %for.cond, label %if.end
516
517if.end:
518  %exit = phi i32 [0, %for.cond], [1, %for.body]
519  ret i32 %exit
520}
521
522; multiple exits w/distinct target blocks
523define i32 @multiple_exit_blocks(i16* %p, i32 %n) {
524; CHECK-LABEL: @multiple_exit_blocks(
525; CHECK-NEXT:  entry:
526; CHECK-NEXT:    br label [[FOR_COND:%.*]]
527; CHECK:       for.cond:
528; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
529; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
530; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
531; CHECK:       for.body:
532; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
533; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
534; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
535; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
536; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
537; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
538; CHECK:       if.end:
539; CHECK-NEXT:    ret i32 0
540; CHECK:       if.end2:
541; CHECK-NEXT:    ret i32 1
542;
543; TAILFOLD-LABEL: @multiple_exit_blocks(
544; TAILFOLD-NEXT:  entry:
545; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
546; TAILFOLD:       for.cond:
547; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
548; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
549; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
550; TAILFOLD:       for.body:
551; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
552; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
553; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
554; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
555; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
556; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
557; TAILFOLD:       if.end:
558; TAILFOLD-NEXT:    ret i32 0
559; TAILFOLD:       if.end2:
560; TAILFOLD-NEXT:    ret i32 1
561;
562entry:
563  br label %for.cond
564
565for.cond:
566  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
567  %cmp = icmp slt i32 %i, %n
568  br i1 %cmp, label %for.body, label %if.end
569
570for.body:
571  %iprom = sext i32 %i to i64
572  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
573  store i16 0, i16* %b, align 4
574  %inc = add nsw i32 %i, 1
575  %cmp2 = icmp slt i32 %i, 2096
576  br i1 %cmp2, label %for.cond, label %if.end2
577
578if.end:
579  ret i32 0
580
581if.end2:
582  ret i32 1
583}
584
585; LCSSA, common value each exit
586define i32 @multiple_exit_blocks2(i16* %p, i32 %n) {
587; CHECK-LABEL: @multiple_exit_blocks2(
588; CHECK-NEXT:  entry:
589; CHECK-NEXT:    br label [[FOR_COND:%.*]]
590; CHECK:       for.cond:
591; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
592; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
593; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
594; CHECK:       for.body:
595; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
596; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
597; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
598; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
599; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
600; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
601; CHECK:       if.end:
602; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
603; CHECK-NEXT:    ret i32 [[I_LCSSA]]
604; CHECK:       if.end2:
605; CHECK-NEXT:    [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ]
606; CHECK-NEXT:    ret i32 [[I_LCSSA1]]
607;
608; TAILFOLD-LABEL: @multiple_exit_blocks2(
609; TAILFOLD-NEXT:  entry:
610; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
611; TAILFOLD:       for.cond:
612; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
613; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
614; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
615; TAILFOLD:       for.body:
616; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
617; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
618; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
619; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
620; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
621; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
622; TAILFOLD:       if.end:
623; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
624; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
625; TAILFOLD:       if.end2:
626; TAILFOLD-NEXT:    [[I_LCSSA1:%.*]] = phi i32 [ [[I]], [[FOR_BODY]] ]
627; TAILFOLD-NEXT:    ret i32 [[I_LCSSA1]]
628;
629entry:
630  br label %for.cond
631
632for.cond:
633  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
634  %cmp = icmp slt i32 %i, %n
635  br i1 %cmp, label %for.body, label %if.end
636
637for.body:
638  %iprom = sext i32 %i to i64
639  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
640  store i16 0, i16* %b, align 4
641  %inc = add nsw i32 %i, 1
642  %cmp2 = icmp slt i32 %i, 2096
643  br i1 %cmp2, label %for.cond, label %if.end2
644
645if.end:
646  ret i32 %i
647
648if.end2:
649  ret i32 %i
650}
651
652; LCSSA, distinct value each exit
653define i32 @multiple_exit_blocks3(i16* %p, i32 %n) {
654; CHECK-LABEL: @multiple_exit_blocks3(
655; CHECK-NEXT:  entry:
656; CHECK-NEXT:    br label [[FOR_COND:%.*]]
657; CHECK:       for.cond:
658; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
659; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
660; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
661; CHECK:       for.body:
662; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
663; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
664; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
665; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
666; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
667; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
668; CHECK:       if.end:
669; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
670; CHECK-NEXT:    ret i32 [[I_LCSSA]]
671; CHECK:       if.end2:
672; CHECK-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ]
673; CHECK-NEXT:    ret i32 [[INC_LCSSA]]
674;
675; TAILFOLD-LABEL: @multiple_exit_blocks3(
676; TAILFOLD-NEXT:  entry:
677; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
678; TAILFOLD:       for.cond:
679; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
680; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I]], [[N:%.*]]
681; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[IF_END:%.*]]
682; TAILFOLD:       for.body:
683; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
684; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
685; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
686; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
687; TAILFOLD-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[I]], 2096
688; TAILFOLD-NEXT:    br i1 [[CMP2]], label [[FOR_COND]], label [[IF_END2:%.*]]
689; TAILFOLD:       if.end:
690; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ]
691; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
692; TAILFOLD:       if.end2:
693; TAILFOLD-NEXT:    [[INC_LCSSA:%.*]] = phi i32 [ [[INC]], [[FOR_BODY]] ]
694; TAILFOLD-NEXT:    ret i32 [[INC_LCSSA]]
695;
696entry:
697  br label %for.cond
698
699for.cond:
700  %i = phi i32 [ 0, %entry ], [ %inc, %for.body ]
701  %cmp = icmp slt i32 %i, %n
702  br i1 %cmp, label %for.body, label %if.end
703
704for.body:
705  %iprom = sext i32 %i to i64
706  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
707  store i16 0, i16* %b, align 4
708  %inc = add nsw i32 %i, 1
709  %cmp2 = icmp slt i32 %i, 2096
710  br i1 %cmp2, label %for.cond, label %if.end2
711
712if.end:
713  ret i32 %i
714
715if.end2:
716  ret i32 %inc
717}
718
719; unique exit case but with a switch as two edges between the same pair of
720; blocks is an often missed edge case
721define i32 @multiple_exit_switch(i16* %p, i32 %n) {
722; CHECK-LABEL: @multiple_exit_switch(
723; CHECK-NEXT:  entry:
724; CHECK-NEXT:    br label [[FOR_COND:%.*]]
725; CHECK:       for.cond:
726; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
727; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
728; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
729; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
730; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
731; CHECK-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
732; CHECK-NEXT:    i32 2096, label [[IF_END:%.*]]
733; CHECK-NEXT:    i32 2097, label [[IF_END]]
734; CHECK-NEXT:    ]
735; CHECK:       if.end:
736; CHECK-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ]
737; CHECK-NEXT:    ret i32 [[I_LCSSA]]
738;
739; TAILFOLD-LABEL: @multiple_exit_switch(
740; TAILFOLD-NEXT:  entry:
741; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
742; TAILFOLD:       for.cond:
743; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
744; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
745; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
746; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
747; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
748; TAILFOLD-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
749; TAILFOLD-NEXT:    i32 2096, label [[IF_END:%.*]]
750; TAILFOLD-NEXT:    i32 2097, label [[IF_END]]
751; TAILFOLD-NEXT:    ]
752; TAILFOLD:       if.end:
753; TAILFOLD-NEXT:    [[I_LCSSA:%.*]] = phi i32 [ [[I]], [[FOR_COND]] ], [ [[I]], [[FOR_COND]] ]
754; TAILFOLD-NEXT:    ret i32 [[I_LCSSA]]
755;
756entry:
757  br label %for.cond
758
759for.cond:
760  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
761  %iprom = sext i32 %i to i64
762  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
763  store i16 0, i16* %b, align 4
764  %inc = add nsw i32 %i, 1
765  switch i32 %i, label %for.cond [
766  i32 2096, label %if.end
767  i32 2097, label %if.end
768  ]
769
770if.end:
771  ret i32 %i
772}
773
774; multiple exit case but with a switch as multiple exiting edges from
775; a single block is a commonly missed edge case
776define i32 @multiple_exit_switch2(i16* %p, i32 %n) {
777; CHECK-LABEL: @multiple_exit_switch2(
778; CHECK-NEXT:  entry:
779; CHECK-NEXT:    br label [[FOR_COND:%.*]]
780; CHECK:       for.cond:
781; CHECK-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
782; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
783; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
784; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
785; CHECK-NEXT:    [[INC]] = add nsw i32 [[I]], 1
786; CHECK-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
787; CHECK-NEXT:    i32 2096, label [[IF_END:%.*]]
788; CHECK-NEXT:    i32 2097, label [[IF_END2:%.*]]
789; CHECK-NEXT:    ]
790; CHECK:       if.end:
791; CHECK-NEXT:    ret i32 0
792; CHECK:       if.end2:
793; CHECK-NEXT:    ret i32 1
794;
795; TAILFOLD-LABEL: @multiple_exit_switch2(
796; TAILFOLD-NEXT:  entry:
797; TAILFOLD-NEXT:    br label [[FOR_COND:%.*]]
798; TAILFOLD:       for.cond:
799; TAILFOLD-NEXT:    [[I:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND]] ]
800; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I]] to i64
801; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
802; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
803; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I]], 1
804; TAILFOLD-NEXT:    switch i32 [[I]], label [[FOR_COND]] [
805; TAILFOLD-NEXT:    i32 2096, label [[IF_END:%.*]]
806; TAILFOLD-NEXT:    i32 2097, label [[IF_END2:%.*]]
807; TAILFOLD-NEXT:    ]
808; TAILFOLD:       if.end:
809; TAILFOLD-NEXT:    ret i32 0
810; TAILFOLD:       if.end2:
811; TAILFOLD-NEXT:    ret i32 1
812;
813entry:
814  br label %for.cond
815
816for.cond:
817  %i = phi i32 [ 0, %entry ], [ %inc, %for.cond ]
818  %iprom = sext i32 %i to i64
819  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
820  store i16 0, i16* %b, align 4
821  %inc = add nsw i32 %i, 1
822  switch i32 %i, label %for.cond [
823  i32 2096, label %if.end
824  i32 2097, label %if.end2
825  ]
826
827if.end:
828  ret i32 0
829
830if.end2:
831  ret i32 1
832}
833
834define i32 @multiple_latch1(i16* %p) {
835; CHECK-LABEL: @multiple_latch1(
836; CHECK-NEXT:  entry:
837; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
838; CHECK:       for.body:
839; CHECK-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
840; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
841; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
842; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
843; CHECK:       for.second:
844; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
845; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
846; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
847; CHECK-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
848; CHECK-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
849; CHECK:       for.body.backedge:
850; CHECK-NEXT:    br label [[FOR_BODY]]
851; CHECK:       for.end:
852; CHECK-NEXT:    ret i32 0
853;
854; TAILFOLD-LABEL: @multiple_latch1(
855; TAILFOLD-NEXT:  entry:
856; TAILFOLD-NEXT:    br label [[FOR_BODY:%.*]]
857; TAILFOLD:       for.body:
858; TAILFOLD-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
859; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
860; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
861; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
862; TAILFOLD:       for.second:
863; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
864; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
865; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
866; TAILFOLD-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
867; TAILFOLD-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
868; TAILFOLD:       for.body.backedge:
869; TAILFOLD-NEXT:    br label [[FOR_BODY]]
870; TAILFOLD:       for.end:
871; TAILFOLD-NEXT:    ret i32 0
872;
873entry:
874  br label %for.body
875
876for.body:
877  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body.backedge]
878  %inc = add nsw i32 %i.02, 1
879  %cmp = icmp slt i32 %inc, 16
880  br i1 %cmp, label %for.body.backedge, label %for.second
881
882for.second:
883  %iprom = sext i32 %i.02 to i64
884  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
885  store i16 0, i16* %b, align 4
886  %cmps = icmp sgt i32 %inc, 16
887  br i1 %cmps, label %for.body.backedge, label %for.end
888
889for.body.backedge:
890  br label %for.body
891
892for.end:
893  ret i32 0
894}
895
896
897; two back branches - loop simplify with convert this to the same form
898; as previous before vectorizer sees it, but show that.
899define i32 @multiple_latch2(i16* %p) {
900; CHECK-LABEL: @multiple_latch2(
901; CHECK-NEXT:  entry:
902; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
903; CHECK:       for.body:
904; CHECK-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
905; CHECK-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
906; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
907; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
908; CHECK:       for.body.backedge:
909; CHECK-NEXT:    br label [[FOR_BODY]]
910; CHECK:       for.second:
911; CHECK-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
912; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
913; CHECK-NEXT:    store i16 0, i16* [[B]], align 4
914; CHECK-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
915; CHECK-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
916; CHECK:       for.end:
917; CHECK-NEXT:    ret i32 0
918;
919; TAILFOLD-LABEL: @multiple_latch2(
920; TAILFOLD-NEXT:  entry:
921; TAILFOLD-NEXT:    br label [[FOR_BODY:%.*]]
922; TAILFOLD:       for.body:
923; TAILFOLD-NEXT:    [[I_02:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY_BACKEDGE:%.*]] ]
924; TAILFOLD-NEXT:    [[INC]] = add nsw i32 [[I_02]], 1
925; TAILFOLD-NEXT:    [[CMP:%.*]] = icmp slt i32 [[INC]], 16
926; TAILFOLD-NEXT:    br i1 [[CMP]], label [[FOR_BODY_BACKEDGE]], label [[FOR_SECOND:%.*]]
927; TAILFOLD:       for.body.backedge:
928; TAILFOLD-NEXT:    br label [[FOR_BODY]]
929; TAILFOLD:       for.second:
930; TAILFOLD-NEXT:    [[IPROM:%.*]] = sext i32 [[I_02]] to i64
931; TAILFOLD-NEXT:    [[B:%.*]] = getelementptr inbounds i16, i16* [[P:%.*]], i64 [[IPROM]]
932; TAILFOLD-NEXT:    store i16 0, i16* [[B]], align 4
933; TAILFOLD-NEXT:    [[CMPS:%.*]] = icmp sgt i32 [[INC]], 16
934; TAILFOLD-NEXT:    br i1 [[CMPS]], label [[FOR_BODY_BACKEDGE]], label [[FOR_END:%.*]]
935; TAILFOLD:       for.end:
936; TAILFOLD-NEXT:    ret i32 0
937;
938entry:
939  br label %for.body
940
941for.body:
942  %i.02 = phi i32 [ 0, %entry ], [ %inc, %for.body ], [%inc, %for.second]
943  %inc = add nsw i32 %i.02, 1
944  %cmp = icmp slt i32 %inc, 16
945  br i1 %cmp, label %for.body, label %for.second
946
947for.second:
948  %iprom = sext i32 %i.02 to i64
949  %b = getelementptr inbounds i16, i16* %p, i64 %iprom
950  store i16 0, i16* %b, align 4
951  %cmps = icmp sgt i32 %inc, 16
952  br i1 %cmps, label %for.body, label %for.end
953
954for.end:
955  ret i32 0
956}
957
958
959; Check interaction between block predication and early exits.  We need the
960; condition on the early exit to remain dead (i.e. not be used when forming
961; the predicate mask).
962define void @scalar_predication(float* %addr) {
963; CHECK-LABEL: @scalar_predication(
964; CHECK-NEXT:  entry:
965; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
966; CHECK:       vector.ph:
967; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
968; CHECK:       vector.body:
969; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE2:%.*]] ]
970; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE2]] ]
971; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
972; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[TMP0]]
973; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr float, float* [[TMP1]], i32 0
974; CHECK-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <2 x float>*
975; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP3]], align 4
976; CHECK-NEXT:    [[TMP4:%.*]] = fcmp oeq <2 x float> [[WIDE_LOAD]], zeroinitializer
977; CHECK-NEXT:    [[TMP5:%.*]] = xor <2 x i1> [[TMP4]], <i1 true, i1 true>
978; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
979; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
980; CHECK:       pred.store.if:
981; CHECK-NEXT:    store float 1.000000e+01, float* [[TMP1]], align 4
982; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
983; CHECK:       pred.store.continue:
984; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
985; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_STORE_IF1:%.*]], label [[PRED_STORE_CONTINUE2]]
986; CHECK:       pred.store.if1:
987; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 1
988; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr float, float* [[ADDR]], i64 [[TMP8]]
989; CHECK-NEXT:    store float 1.000000e+01, float* [[TMP9]], align 4
990; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE2]]
991; CHECK:       pred.store.continue2:
992; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
993; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
994; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
995; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP12:!llvm.loop !.*]]
996; CHECK:       middle.block:
997; CHECK-NEXT:    br label [[SCALAR_PH]]
998; CHECK:       scalar.ph:
999; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1000; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
1001; CHECK:       loop.header:
1002; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1003; CHECK-NEXT:    [[GEP:%.*]] = getelementptr float, float* [[ADDR]], i64 [[IV]]
1004; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1005; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]]
1006; CHECK:       loop.body:
1007; CHECK-NEXT:    [[TMP11:%.*]] = load float, float* [[GEP]], align 4
1008; CHECK-NEXT:    [[PRED:%.*]] = fcmp oeq float [[TMP11]], 0.000000e+00
1009; CHECK-NEXT:    br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]]
1010; CHECK:       then:
1011; CHECK-NEXT:    store float 1.000000e+01, float* [[GEP]], align 4
1012; CHECK-NEXT:    br label [[LOOP_LATCH]]
1013; CHECK:       loop.latch:
1014; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1015; CHECK-NEXT:    br label [[LOOP_HEADER]], [[LOOP13:!llvm.loop !.*]]
1016; CHECK:       exit:
1017; CHECK-NEXT:    ret void
1018;
1019; TAILFOLD-LABEL: @scalar_predication(
1020; TAILFOLD-NEXT:  entry:
1021; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1022; TAILFOLD:       loop.header:
1023; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1024; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr float, float* [[ADDR:%.*]], i64 [[IV]]
1025; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1026; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_BODY:%.*]]
1027; TAILFOLD:       loop.body:
1028; TAILFOLD-NEXT:    [[TMP0:%.*]] = load float, float* [[GEP]], align 4
1029; TAILFOLD-NEXT:    [[PRED:%.*]] = fcmp oeq float [[TMP0]], 0.000000e+00
1030; TAILFOLD-NEXT:    br i1 [[PRED]], label [[LOOP_LATCH]], label [[THEN:%.*]]
1031; TAILFOLD:       then:
1032; TAILFOLD-NEXT:    store float 1.000000e+01, float* [[GEP]], align 4
1033; TAILFOLD-NEXT:    br label [[LOOP_LATCH]]
1034; TAILFOLD:       loop.latch:
1035; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1036; TAILFOLD-NEXT:    br label [[LOOP_HEADER]]
1037; TAILFOLD:       exit:
1038; TAILFOLD-NEXT:    ret void
1039;
1040entry:
1041  br label %loop.header
1042
1043loop.header:
1044  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1045  %gep = getelementptr float, float* %addr, i64 %iv
1046  %exitcond.not = icmp eq i64 %iv, 200
1047  br i1 %exitcond.not, label %exit, label %loop.body
1048
1049loop.body:
1050  %0 = load float, float* %gep, align 4
1051  %pred = fcmp oeq float %0, 0.0
1052  br i1 %pred, label %loop.latch, label %then
1053
1054then:
1055  store float 10.0, float* %gep, align 4
1056  br label %loop.latch
1057
1058loop.latch:
1059  %iv.next = add nuw nsw i64 %iv, 1
1060  br label %loop.header
1061
1062exit:
1063  ret void
1064}
1065
1066define i32 @me_reduction(i32* %addr) {
1067; CHECK-LABEL: @me_reduction(
1068; CHECK-NEXT:  entry:
1069; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1070; CHECK:       vector.ph:
1071; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1072; CHECK:       vector.body:
1073; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1074; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1075; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
1076; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1077; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
1078; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[TMP0]]
1079; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr i32, i32* [[TMP2]], i32 0
1080; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>*
1081; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP4]], align 4
1082; CHECK-NEXT:    [[TMP5]] = add <2 x i32> [[VEC_PHI]], [[WIDE_LOAD]]
1083; CHECK-NEXT:    [[INDEX_NEXT]] = add i64 [[INDEX]], 2
1084; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1085; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 200
1086; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP14:!llvm.loop !.*]]
1087; CHECK:       middle.block:
1088; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <2 x i32> [[TMP5]], <2 x i32> poison, <2 x i32> <i32 1, i32 undef>
1089; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <2 x i32> [[TMP5]], [[RDX_SHUF]]
1090; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <2 x i32> [[BIN_RDX]], i32 0
1091; CHECK-NEXT:    br label [[SCALAR_PH]]
1092; CHECK:       scalar.ph:
1093; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 200, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1094; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
1095; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
1096; CHECK:       loop.header:
1097; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1098; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1099; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR]], i64 [[IV]]
1100; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1101; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1102; CHECK:       loop.latch:
1103; CHECK-NEXT:    [[TMP8:%.*]] = load i32, i32* [[GEP]], align 4
1104; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP8]]
1105; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1106; CHECK-NEXT:    [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400
1107; CHECK-NEXT:    br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]], [[LOOP15:!llvm.loop !.*]]
1108; CHECK:       exit:
1109; CHECK-NEXT:    [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ]
1110; CHECK-NEXT:    ret i32 [[LCSSA]]
1111;
1112; TAILFOLD-LABEL: @me_reduction(
1113; TAILFOLD-NEXT:  entry:
1114; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1115; TAILFOLD:       loop.header:
1116; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1117; TAILFOLD-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1118; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
1119; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1120; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1121; TAILFOLD:       loop.latch:
1122; TAILFOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
1123; TAILFOLD-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1124; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1125; TAILFOLD-NEXT:    [[EXITCOND2_NOT:%.*]] = icmp eq i64 [[IV]], 400
1126; TAILFOLD-NEXT:    br i1 [[EXITCOND2_NOT]], label [[EXIT]], label [[LOOP_HEADER]]
1127; TAILFOLD:       exit:
1128; TAILFOLD-NEXT:    [[LCSSA:%.*]] = phi i32 [ 0, [[LOOP_HEADER]] ], [ [[ACCUM_NEXT]], [[LOOP_LATCH]] ]
1129; TAILFOLD-NEXT:    ret i32 [[LCSSA]]
1130;
1131entry:
1132  br label %loop.header
1133
1134loop.header:
1135  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1136  %accum = phi i32 [0, %entry], [%accum.next, %loop.latch]
1137  %gep = getelementptr i32, i32* %addr, i64 %iv
1138  %exitcond.not = icmp eq i64 %iv, 200
1139  br i1 %exitcond.not, label %exit, label %loop.latch
1140
1141loop.latch:
1142  %0 = load i32, i32* %gep, align 4
1143  %accum.next = add i32 %accum, %0
1144  %iv.next = add nuw nsw i64 %iv, 1
1145  %exitcond2.not = icmp eq i64 %iv, 400
1146  br i1 %exitcond2.not, label %exit, label %loop.header
1147
1148exit:
1149  %lcssa = phi i32 [0, %loop.header], [%accum.next, %loop.latch]
1150  ret i32 %lcssa
1151}
1152
1153; TODO: The current definition of reduction is too strict, we can vectorize
1154; this.  There's an analogous single exit case where we extract the N-1
1155; value of the reduction that we can also handle.  If we fix the later, the
1156; multiple exit case probably falls out.
1157define i32 @me_reduction2(i32* %addr) {
1158; CHECK-LABEL: @me_reduction2(
1159; CHECK-NEXT:  entry:
1160; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
1161; CHECK:       loop.header:
1162; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1163; CHECK-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1164; CHECK-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
1165; CHECK-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1166; CHECK-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1167; CHECK:       loop.latch:
1168; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
1169; CHECK-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1170; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1171; CHECK-NEXT:    br label [[LOOP_HEADER]]
1172; CHECK:       exit:
1173; CHECK-NEXT:    [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ]
1174; CHECK-NEXT:    ret i32 [[ACCUM_LCSSA]]
1175;
1176; TAILFOLD-LABEL: @me_reduction2(
1177; TAILFOLD-NEXT:  entry:
1178; TAILFOLD-NEXT:    br label [[LOOP_HEADER:%.*]]
1179; TAILFOLD:       loop.header:
1180; TAILFOLD-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[IV_NEXT:%.*]], [[LOOP_LATCH:%.*]] ]
1181; TAILFOLD-NEXT:    [[ACCUM:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[ACCUM_NEXT:%.*]], [[LOOP_LATCH]] ]
1182; TAILFOLD-NEXT:    [[GEP:%.*]] = getelementptr i32, i32* [[ADDR:%.*]], i64 [[IV]]
1183; TAILFOLD-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV]], 200
1184; TAILFOLD-NEXT:    br i1 [[EXITCOND_NOT]], label [[EXIT:%.*]], label [[LOOP_LATCH]]
1185; TAILFOLD:       loop.latch:
1186; TAILFOLD-NEXT:    [[TMP0:%.*]] = load i32, i32* [[GEP]], align 4
1187; TAILFOLD-NEXT:    [[ACCUM_NEXT]] = add i32 [[ACCUM]], [[TMP0]]
1188; TAILFOLD-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
1189; TAILFOLD-NEXT:    br label [[LOOP_HEADER]]
1190; TAILFOLD:       exit:
1191; TAILFOLD-NEXT:    [[ACCUM_LCSSA:%.*]] = phi i32 [ [[ACCUM]], [[LOOP_HEADER]] ]
1192; TAILFOLD-NEXT:    ret i32 [[ACCUM_LCSSA]]
1193;
1194entry:
1195  br label %loop.header
1196
1197loop.header:
1198  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
1199  %accum = phi i32 [0, %entry], [%accum.next, %loop.latch]
1200  %gep = getelementptr i32, i32* %addr, i64 %iv
1201  %exitcond.not = icmp eq i64 %iv, 200
1202  br i1 %exitcond.not, label %exit, label %loop.latch
1203
1204loop.latch:
1205  %0 = load i32, i32* %gep, align 4
1206  %accum.next = add i32 %accum, %0
1207  %iv.next = add nuw nsw i64 %iv, 1
1208  br label %loop.header
1209
1210exit:
1211  ret i32 %accum
1212}
1213
1214