1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -S | FileCheck %s
3; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=IND
4; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -instcombine -S | FileCheck %s --check-prefix=UNROLL
5; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=2 -S | FileCheck %s --check-prefix=UNROLL-NO-IC
6; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -enable-interleaved-mem-accesses -instcombine -S | FileCheck %s --check-prefix=INTERLEAVE
7
8target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"
9
10; Make sure that we can handle multiple integer induction variables.
11;
12define void @multi_int_induction(i32* %A, i32 %N) {
13; CHECK-LABEL: @multi_int_induction(
14; CHECK-NEXT:  for.body.lr.ph:
15; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
16; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
17; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
18; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2
19; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
20; CHECK:       vector.ph:
21; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2
22; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
23; CHECK-NEXT:    [[CAST_CRD:%.*]] = trunc i64 [[N_VEC]] to i32
24; CHECK-NEXT:    [[IND_END:%.*]] = add i32 190, [[CAST_CRD]]
25; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
26; CHECK:       vector.body:
27; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
28; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 190, i32 191>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
29; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
30; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP3]]
31; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
32; CHECK-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <2 x i32>*
33; CHECK-NEXT:    store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP6]], align 4
34; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
35; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
36; CHECK-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
37; CHECK-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
38; CHECK:       middle.block:
39; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
40; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
41; CHECK:       scalar.ph:
42; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH:%.*]] ]
43; CHECK-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 190, [[FOR_BODY_LR_PH]] ]
44; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
45; CHECK:       for.body:
46; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
47; CHECK-NEXT:    [[COUNT_09:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
48; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
49; CHECK-NEXT:    store i32 [[COUNT_09]], i32* [[ARRAYIDX2]], align 4
50; CHECK-NEXT:    [[INC]] = add nsw i32 [[COUNT_09]], 1
51; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
52; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
53; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[N]]
54; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP2:![0-9]+]]
55; CHECK:       for.end:
56; CHECK-NEXT:    ret void
57;
58; IND-LABEL: @multi_int_induction(
59; IND-NEXT:  for.body.lr.ph:
60; IND-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
61; IND-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
62; IND-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
63; IND-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0
64; IND-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
65; IND:       vector.ph:
66; IND-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590
67; IND-NEXT:    [[CAST_CRD:%.*]] = trunc i64 [[N_VEC]] to i32
68; IND-NEXT:    [[IND_END:%.*]] = add i32 [[CAST_CRD]], 190
69; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
70; IND:       vector.body:
71; IND-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
72; IND-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 190, i32 191>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
73; IND-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
74; IND-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>*
75; IND-NEXT:    store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP4]], align 4
76; IND-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
77; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
78; IND-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
79; IND-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
80; IND:       middle.block:
81; IND-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
82; IND-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
83; IND:       scalar.ph:
84; IND-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH:%.*]] ]
85; IND-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 190, [[FOR_BODY_LR_PH]] ]
86; IND-NEXT:    br label [[FOR_BODY:%.*]]
87; IND:       for.body:
88; IND-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
89; IND-NEXT:    [[COUNT_09:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
90; IND-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
91; IND-NEXT:    store i32 [[COUNT_09]], i32* [[ARRAYIDX2]], align 4
92; IND-NEXT:    [[INC]] = add nsw i32 [[COUNT_09]], 1
93; IND-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
94; IND-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
95; IND-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
96; IND-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
97; IND:       for.end:
98; IND-NEXT:    ret void
99;
100; UNROLL-LABEL: @multi_int_induction(
101; UNROLL-NEXT:  for.body.lr.ph:
102; UNROLL-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
103; UNROLL-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
104; UNROLL-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
105; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
106; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
107; UNROLL:       vector.ph:
108; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
109; UNROLL-NEXT:    [[CAST_CRD:%.*]] = trunc i64 [[N_VEC]] to i32
110; UNROLL-NEXT:    [[IND_END:%.*]] = add i32 [[CAST_CRD]], 190
111; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
112; UNROLL:       vector.body:
113; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
114; UNROLL-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 190, i32 191>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
115; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
116; UNROLL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
117; UNROLL-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>*
118; UNROLL-NEXT:    store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP4]], align 4
119; UNROLL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 2
120; UNROLL-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <2 x i32>*
121; UNROLL-NEXT:    store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP6]], align 4
122; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
123; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
124; UNROLL-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
125; UNROLL-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
126; UNROLL:       middle.block:
127; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
128; UNROLL-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
129; UNROLL:       scalar.ph:
130; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH:%.*]] ]
131; UNROLL-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 190, [[FOR_BODY_LR_PH]] ]
132; UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
133; UNROLL:       for.body:
134; UNROLL-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
135; UNROLL-NEXT:    [[COUNT_09:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
136; UNROLL-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
137; UNROLL-NEXT:    store i32 [[COUNT_09]], i32* [[ARRAYIDX2]], align 4
138; UNROLL-NEXT:    [[INC]] = add nsw i32 [[COUNT_09]], 1
139; UNROLL-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
140; UNROLL-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
141; UNROLL-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
142; UNROLL-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
143; UNROLL:       for.end:
144; UNROLL-NEXT:    ret void
145;
146; UNROLL-NO-IC-LABEL: @multi_int_induction(
147; UNROLL-NO-IC-NEXT:  for.body.lr.ph:
148; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
149; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
150; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
151; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
152; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
153; UNROLL-NO-IC:       vector.ph:
154; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
155; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
156; UNROLL-NO-IC-NEXT:    [[CAST_CRD:%.*]] = trunc i64 [[N_VEC]] to i32
157; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = add i32 190, [[CAST_CRD]]
158; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
159; UNROLL-NO-IC:       vector.body:
160; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
161; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 190, i32 191>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
162; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
163; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 2
164; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
165; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP3]]
166; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
167; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
168; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <2 x i32>*
169; UNROLL-NO-IC-NEXT:    store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP8]], align 4
170; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 2
171; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <2 x i32>*
172; UNROLL-NO-IC-NEXT:    store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP10]], align 4
173; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
174; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2>
175; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
176; UNROLL-NO-IC-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
177; UNROLL-NO-IC:       middle.block:
178; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
179; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
180; UNROLL-NO-IC:       scalar.ph:
181; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH:%.*]] ]
182; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 190, [[FOR_BODY_LR_PH]] ]
183; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
184; UNROLL-NO-IC:       for.body:
185; UNROLL-NO-IC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
186; UNROLL-NO-IC-NEXT:    [[COUNT_09:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
187; UNROLL-NO-IC-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
188; UNROLL-NO-IC-NEXT:    store i32 [[COUNT_09]], i32* [[ARRAYIDX2]], align 4
189; UNROLL-NO-IC-NEXT:    [[INC]] = add nsw i32 [[COUNT_09]], 1
190; UNROLL-NO-IC-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
191; UNROLL-NO-IC-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
192; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp ne i32 [[LFTR_WIDEIV]], [[N]]
193; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP2:![0-9]+]]
194; UNROLL-NO-IC:       for.end:
195; UNROLL-NO-IC-NEXT:    ret void
196;
197; INTERLEAVE-LABEL: @multi_int_induction(
198; INTERLEAVE-NEXT:  for.body.lr.ph:
199; INTERLEAVE-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
200; INTERLEAVE-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
201; INTERLEAVE-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
202; INTERLEAVE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7
203; INTERLEAVE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
204; INTERLEAVE:       vector.ph:
205; INTERLEAVE-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584
206; INTERLEAVE-NEXT:    [[CAST_CRD:%.*]] = trunc i64 [[N_VEC]] to i32
207; INTERLEAVE-NEXT:    [[IND_END:%.*]] = add i32 [[CAST_CRD]], 190
208; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
209; INTERLEAVE:       vector.body:
210; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
211; INTERLEAVE-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 190, i32 191, i32 192, i32 193>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
212; INTERLEAVE-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
213; INTERLEAVE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
214; INTERLEAVE-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
215; INTERLEAVE-NEXT:    store <4 x i32> [[VEC_IND]], <4 x i32>* [[TMP4]], align 4
216; INTERLEAVE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 4
217; INTERLEAVE-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
218; INTERLEAVE-NEXT:    store <4 x i32> [[STEP_ADD]], <4 x i32>* [[TMP6]], align 4
219; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
220; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
221; INTERLEAVE-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
222; INTERLEAVE-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
223; INTERLEAVE:       middle.block:
224; INTERLEAVE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
225; INTERLEAVE-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
226; INTERLEAVE:       scalar.ph:
227; INTERLEAVE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH:%.*]] ]
228; INTERLEAVE-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 190, [[FOR_BODY_LR_PH]] ]
229; INTERLEAVE-NEXT:    br label [[FOR_BODY:%.*]]
230; INTERLEAVE:       for.body:
231; INTERLEAVE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
232; INTERLEAVE-NEXT:    [[COUNT_09:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
233; INTERLEAVE-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
234; INTERLEAVE-NEXT:    store i32 [[COUNT_09]], i32* [[ARRAYIDX2]], align 4
235; INTERLEAVE-NEXT:    [[INC]] = add nsw i32 [[COUNT_09]], 1
236; INTERLEAVE-NEXT:    [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1
237; INTERLEAVE-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
238; INTERLEAVE-NEXT:    [[EXITCOND_NOT:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
239; INTERLEAVE-NEXT:    br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
240; INTERLEAVE:       for.end:
241; INTERLEAVE-NEXT:    ret void
242;
243for.body.lr.ph:
244  br label %for.body
245
246for.body:
247  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
248  %count.09 = phi i32 [ 190, %for.body.lr.ph ], [ %inc, %for.body ]
249  %arrayidx2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv
250  store i32 %count.09, i32* %arrayidx2, align 4
251  %inc = add nsw i32 %count.09, 1
252  %indvars.iv.next = add i64 %indvars.iv, 1
253  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
254  %exitcond = icmp ne i32 %lftr.wideiv, %N
255  br i1 %exitcond, label %for.body, label %for.end
256
257for.end:
258  ret void
259}
260
261; Make sure we remove unneeded vectorization of induction variables.
262; In order for instcombine to cleanup the vectorized induction variables that we
263; create in the loop vectorizer we need to perform some form of redundancy
264; elimination to get rid of multiple uses.
265
266
267;   Vectorized induction variable.
268
269define void @scalar_use(float* %a, float %b, i64 %offset, i64 %offset2, i64 %n) {
270; CHECK-LABEL: @scalar_use(
271; CHECK-NEXT:  entry:
272; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
273; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
274; CHECK:       vector.memcheck:
275; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr float, float* [[A:%.*]], i64 [[OFFSET:%.*]]
276; CHECK-NEXT:    [[SCEVGEP1:%.*]] = bitcast float* [[SCEVGEP]] to i8*
277; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], [[OFFSET]]
278; CHECK-NEXT:    [[SCEVGEP2:%.*]] = getelementptr float, float* [[A]], i64 [[TMP0]]
279; CHECK-NEXT:    [[SCEVGEP23:%.*]] = bitcast float* [[SCEVGEP2]] to i8*
280; CHECK-NEXT:    [[SCEVGEP4:%.*]] = getelementptr float, float* [[A]], i64 [[OFFSET2:%.*]]
281; CHECK-NEXT:    [[SCEVGEP45:%.*]] = bitcast float* [[SCEVGEP4]] to i8*
282; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[N]], [[OFFSET2]]
283; CHECK-NEXT:    [[SCEVGEP6:%.*]] = getelementptr float, float* [[A]], i64 [[TMP1]]
284; CHECK-NEXT:    [[SCEVGEP67:%.*]] = bitcast float* [[SCEVGEP6]] to i8*
285; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP67]]
286; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP45]], [[SCEVGEP23]]
287; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
288; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
289; CHECK:       vector.ph:
290; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
291; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
292; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[B:%.*]], i32 0
293; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
294; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
295; CHECK:       vector.body:
296; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
297; CHECK-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
298; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[TMP2]], [[OFFSET]]
299; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP3]]
300; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP4]], i32 0
301; CHECK-NEXT:    [[TMP6:%.*]] = bitcast float* [[TMP5]] to <2 x float>*
302; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP6]], align 4, !alias.scope !4, !noalias !7
303; CHECK-NEXT:    [[TMP7:%.*]] = add i64 [[TMP2]], [[OFFSET2]]
304; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP7]]
305; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[TMP8]], i32 0
306; CHECK-NEXT:    [[TMP10:%.*]] = bitcast float* [[TMP9]] to <2 x float>*
307; CHECK-NEXT:    [[WIDE_LOAD8:%.*]] = load <2 x float>, <2 x float>* [[TMP10]], align 4, !alias.scope !7
308; CHECK-NEXT:    [[TMP11:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD8]]
309; CHECK-NEXT:    [[TMP12:%.*]] = fadd fast <2 x float> [[WIDE_LOAD]], [[TMP11]]
310; CHECK-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP5]] to <2 x float>*
311; CHECK-NEXT:    store <2 x float> [[TMP12]], <2 x float>* [[TMP13]], align 4, !alias.scope !4, !noalias !7
312; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
313; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
314; CHECK-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
315; CHECK:       middle.block:
316; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
317; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]]
318; CHECK:       scalar.ph:
319; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
320; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
321; CHECK:       for.body:
322; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
323; CHECK-NEXT:    [[IND_SUM:%.*]] = add i64 [[IV]], [[OFFSET]]
324; CHECK-NEXT:    [[ARR_IDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM]]
325; CHECK-NEXT:    [[L1:%.*]] = load float, float* [[ARR_IDX]], align 4
326; CHECK-NEXT:    [[IND_SUM2:%.*]] = add i64 [[IV]], [[OFFSET2]]
327; CHECK-NEXT:    [[ARR_IDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM2]]
328; CHECK-NEXT:    [[L2:%.*]] = load float, float* [[ARR_IDX2]], align 4
329; CHECK-NEXT:    [[M:%.*]] = fmul fast float [[B]], [[L2]]
330; CHECK-NEXT:    [[AD:%.*]] = fadd fast float [[L1]], [[M]]
331; CHECK-NEXT:    store float [[AD]], float* [[ARR_IDX]], align 4
332; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
333; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
334; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
335; CHECK:       loopexit:
336; CHECK-NEXT:    ret void
337;
338; IND-LABEL: @scalar_use(
339; IND-NEXT:  entry:
340; IND-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
341; IND-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
342; IND:       vector.memcheck:
343; IND-NEXT:    [[SCEVGEP:%.*]] = getelementptr float, float* [[A:%.*]], i64 [[OFFSET:%.*]]
344; IND-NEXT:    [[TMP0:%.*]] = add i64 [[N]], [[OFFSET]]
345; IND-NEXT:    [[SCEVGEP2:%.*]] = getelementptr float, float* [[A]], i64 [[TMP0]]
346; IND-NEXT:    [[SCEVGEP4:%.*]] = getelementptr float, float* [[A]], i64 [[OFFSET2:%.*]]
347; IND-NEXT:    [[TMP1:%.*]] = add i64 [[N]], [[OFFSET2]]
348; IND-NEXT:    [[SCEVGEP6:%.*]] = getelementptr float, float* [[A]], i64 [[TMP1]]
349; IND-NEXT:    [[BOUND0:%.*]] = icmp ult float* [[SCEVGEP]], [[SCEVGEP6]]
350; IND-NEXT:    [[BOUND1:%.*]] = icmp ult float* [[SCEVGEP4]], [[SCEVGEP2]]
351; IND-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
352; IND-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
353; IND:       vector.ph:
354; IND-NEXT:    [[N_VEC:%.*]] = and i64 [[N]], -2
355; IND-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[B:%.*]], i64 0
356; IND-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
357; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
358; IND:       vector.body:
359; IND-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
360; IND-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], [[OFFSET]]
361; IND-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP2]]
362; IND-NEXT:    [[TMP4:%.*]] = bitcast float* [[TMP3]] to <2 x float>*
363; IND-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP4]], align 4, !alias.scope !4, !noalias !7
364; IND-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], [[OFFSET2]]
365; IND-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP5]]
366; IND-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <2 x float>*
367; IND-NEXT:    [[WIDE_LOAD8:%.*]] = load <2 x float>, <2 x float>* [[TMP7]], align 4, !alias.scope !7
368; IND-NEXT:    [[TMP8:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD8]]
369; IND-NEXT:    [[TMP9:%.*]] = fadd fast <2 x float> [[WIDE_LOAD]], [[TMP8]]
370; IND-NEXT:    [[TMP10:%.*]] = bitcast float* [[TMP3]] to <2 x float>*
371; IND-NEXT:    store <2 x float> [[TMP9]], <2 x float>* [[TMP10]], align 4, !alias.scope !4, !noalias !7
372; IND-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
373; IND-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
374; IND-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
375; IND:       middle.block:
376; IND-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
377; IND-NEXT:    br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]]
378; IND:       scalar.ph:
379; IND-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
380; IND-NEXT:    br label [[FOR_BODY:%.*]]
381; IND:       for.body:
382; IND-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
383; IND-NEXT:    [[IND_SUM:%.*]] = add i64 [[IV]], [[OFFSET]]
384; IND-NEXT:    [[ARR_IDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM]]
385; IND-NEXT:    [[L1:%.*]] = load float, float* [[ARR_IDX]], align 4
386; IND-NEXT:    [[IND_SUM2:%.*]] = add i64 [[IV]], [[OFFSET2]]
387; IND-NEXT:    [[ARR_IDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM2]]
388; IND-NEXT:    [[L2:%.*]] = load float, float* [[ARR_IDX2]], align 4
389; IND-NEXT:    [[M:%.*]] = fmul fast float [[L2]], [[B]]
390; IND-NEXT:    [[AD:%.*]] = fadd fast float [[L1]], [[M]]
391; IND-NEXT:    store float [[AD]], float* [[ARR_IDX]], align 4
392; IND-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
393; IND-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
394; IND-NEXT:    br i1 [[EXITCOND]], label [[LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
395; IND:       loopexit:
396; IND-NEXT:    ret void
397;
398; UNROLL-LABEL: @scalar_use(
399; UNROLL-NEXT:  entry:
400; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
401; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
402; UNROLL:       vector.memcheck:
403; UNROLL-NEXT:    [[SCEVGEP:%.*]] = getelementptr float, float* [[A:%.*]], i64 [[OFFSET:%.*]]
404; UNROLL-NEXT:    [[TMP0:%.*]] = add i64 [[N]], [[OFFSET]]
405; UNROLL-NEXT:    [[SCEVGEP2:%.*]] = getelementptr float, float* [[A]], i64 [[TMP0]]
406; UNROLL-NEXT:    [[SCEVGEP4:%.*]] = getelementptr float, float* [[A]], i64 [[OFFSET2:%.*]]
407; UNROLL-NEXT:    [[TMP1:%.*]] = add i64 [[N]], [[OFFSET2]]
408; UNROLL-NEXT:    [[SCEVGEP6:%.*]] = getelementptr float, float* [[A]], i64 [[TMP1]]
409; UNROLL-NEXT:    [[BOUND0:%.*]] = icmp ult float* [[SCEVGEP]], [[SCEVGEP6]]
410; UNROLL-NEXT:    [[BOUND1:%.*]] = icmp ult float* [[SCEVGEP4]], [[SCEVGEP2]]
411; UNROLL-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
412; UNROLL-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
413; UNROLL:       vector.ph:
414; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[N]], -4
415; UNROLL-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[B:%.*]], i64 0
416; UNROLL-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
417; UNROLL-NEXT:    [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <2 x float> poison, float [[B]], i64 0
418; UNROLL-NEXT:    [[BROADCAST_SPLAT12:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT11]], <2 x float> poison, <2 x i32> zeroinitializer
419; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
420; UNROLL:       vector.body:
421; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
422; UNROLL-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], [[OFFSET]]
423; UNROLL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP2]]
424; UNROLL-NEXT:    [[TMP4:%.*]] = bitcast float* [[TMP3]] to <2 x float>*
425; UNROLL-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP4]], align 4, !alias.scope !4, !noalias !7
426; UNROLL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP3]], i64 2
427; UNROLL-NEXT:    [[TMP6:%.*]] = bitcast float* [[TMP5]] to <2 x float>*
428; UNROLL-NEXT:    [[WIDE_LOAD8:%.*]] = load <2 x float>, <2 x float>* [[TMP6]], align 4, !alias.scope !4, !noalias !7
429; UNROLL-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], [[OFFSET2]]
430; UNROLL-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP7]]
431; UNROLL-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP8]] to <2 x float>*
432; UNROLL-NEXT:    [[WIDE_LOAD9:%.*]] = load <2 x float>, <2 x float>* [[TMP9]], align 4, !alias.scope !7
433; UNROLL-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 2
434; UNROLL-NEXT:    [[TMP11:%.*]] = bitcast float* [[TMP10]] to <2 x float>*
435; UNROLL-NEXT:    [[WIDE_LOAD10:%.*]] = load <2 x float>, <2 x float>* [[TMP11]], align 4, !alias.scope !7
436; UNROLL-NEXT:    [[TMP12:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD9]]
437; UNROLL-NEXT:    [[TMP13:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT12]], [[WIDE_LOAD10]]
438; UNROLL-NEXT:    [[TMP14:%.*]] = fadd fast <2 x float> [[WIDE_LOAD]], [[TMP12]]
439; UNROLL-NEXT:    [[TMP15:%.*]] = fadd fast <2 x float> [[WIDE_LOAD8]], [[TMP13]]
440; UNROLL-NEXT:    [[TMP16:%.*]] = bitcast float* [[TMP3]] to <2 x float>*
441; UNROLL-NEXT:    store <2 x float> [[TMP14]], <2 x float>* [[TMP16]], align 4, !alias.scope !4, !noalias !7
442; UNROLL-NEXT:    [[TMP17:%.*]] = bitcast float* [[TMP5]] to <2 x float>*
443; UNROLL-NEXT:    store <2 x float> [[TMP15]], <2 x float>* [[TMP17]], align 4, !alias.scope !4, !noalias !7
444; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
445; UNROLL-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
446; UNROLL-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
447; UNROLL:       middle.block:
448; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
449; UNROLL-NEXT:    br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]]
450; UNROLL:       scalar.ph:
451; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
452; UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
453; UNROLL:       for.body:
454; UNROLL-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
455; UNROLL-NEXT:    [[IND_SUM:%.*]] = add i64 [[IV]], [[OFFSET]]
456; UNROLL-NEXT:    [[ARR_IDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM]]
457; UNROLL-NEXT:    [[L1:%.*]] = load float, float* [[ARR_IDX]], align 4
458; UNROLL-NEXT:    [[IND_SUM2:%.*]] = add i64 [[IV]], [[OFFSET2]]
459; UNROLL-NEXT:    [[ARR_IDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM2]]
460; UNROLL-NEXT:    [[L2:%.*]] = load float, float* [[ARR_IDX2]], align 4
461; UNROLL-NEXT:    [[M:%.*]] = fmul fast float [[L2]], [[B]]
462; UNROLL-NEXT:    [[AD:%.*]] = fadd fast float [[L1]], [[M]]
463; UNROLL-NEXT:    store float [[AD]], float* [[ARR_IDX]], align 4
464; UNROLL-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
465; UNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
466; UNROLL-NEXT:    br i1 [[EXITCOND]], label [[LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
467; UNROLL:       loopexit:
468; UNROLL-NEXT:    ret void
469;
470; UNROLL-NO-IC-LABEL: @scalar_use(
471; UNROLL-NO-IC-NEXT:  entry:
472; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
473; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
474; UNROLL-NO-IC:       vector.memcheck:
475; UNROLL-NO-IC-NEXT:    [[SCEVGEP:%.*]] = getelementptr float, float* [[A:%.*]], i64 [[OFFSET:%.*]]
476; UNROLL-NO-IC-NEXT:    [[SCEVGEP1:%.*]] = bitcast float* [[SCEVGEP]] to i8*
477; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i64 [[N]], [[OFFSET]]
478; UNROLL-NO-IC-NEXT:    [[SCEVGEP2:%.*]] = getelementptr float, float* [[A]], i64 [[TMP0]]
479; UNROLL-NO-IC-NEXT:    [[SCEVGEP23:%.*]] = bitcast float* [[SCEVGEP2]] to i8*
480; UNROLL-NO-IC-NEXT:    [[SCEVGEP4:%.*]] = getelementptr float, float* [[A]], i64 [[OFFSET2:%.*]]
481; UNROLL-NO-IC-NEXT:    [[SCEVGEP45:%.*]] = bitcast float* [[SCEVGEP4]] to i8*
482; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add i64 [[N]], [[OFFSET2]]
483; UNROLL-NO-IC-NEXT:    [[SCEVGEP6:%.*]] = getelementptr float, float* [[A]], i64 [[TMP1]]
484; UNROLL-NO-IC-NEXT:    [[SCEVGEP67:%.*]] = bitcast float* [[SCEVGEP6]] to i8*
485; UNROLL-NO-IC-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP67]]
486; UNROLL-NO-IC-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP45]], [[SCEVGEP23]]
487; UNROLL-NO-IC-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
488; UNROLL-NO-IC-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
489; UNROLL-NO-IC:       vector.ph:
490; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
491; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
492; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[B:%.*]], i32 0
493; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
494; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <2 x float> poison, float [[B]], i32 0
495; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT12:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT11]], <2 x float> poison, <2 x i32> zeroinitializer
496; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
497; UNROLL-NO-IC:       vector.body:
498; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
499; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 0
500; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 2
501; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add i64 [[TMP2]], [[OFFSET]]
502; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = add i64 [[TMP3]], [[OFFSET]]
503; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP4]]
504; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP5]]
505; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP6]], i32 0
506; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP8]] to <2 x float>*
507; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP9]], align 4, !alias.scope !4, !noalias !7
508; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP6]], i32 2
509; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = bitcast float* [[TMP10]] to <2 x float>*
510; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD8:%.*]] = load <2 x float>, <2 x float>* [[TMP11]], align 4, !alias.scope !4, !noalias !7
511; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = add i64 [[TMP2]], [[OFFSET2]]
512; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = add i64 [[TMP3]], [[OFFSET2]]
513; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]]
514; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP13]]
515; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 0
516; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = bitcast float* [[TMP16]] to <2 x float>*
517; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD9:%.*]] = load <2 x float>, <2 x float>* [[TMP17]], align 4, !alias.scope !7
518; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 2
519; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = bitcast float* [[TMP18]] to <2 x float>*
520; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD10:%.*]] = load <2 x float>, <2 x float>* [[TMP19]], align 4, !alias.scope !7
521; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD9]]
522; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = fmul fast <2 x float> [[BROADCAST_SPLAT12]], [[WIDE_LOAD10]]
523; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = fadd fast <2 x float> [[WIDE_LOAD]], [[TMP20]]
524; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = fadd fast <2 x float> [[WIDE_LOAD8]], [[TMP21]]
525; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = bitcast float* [[TMP8]] to <2 x float>*
526; UNROLL-NO-IC-NEXT:    store <2 x float> [[TMP22]], <2 x float>* [[TMP24]], align 4, !alias.scope !4, !noalias !7
527; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = bitcast float* [[TMP10]] to <2 x float>*
528; UNROLL-NO-IC-NEXT:    store <2 x float> [[TMP23]], <2 x float>* [[TMP25]], align 4, !alias.scope !4, !noalias !7
529; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
530; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
531; UNROLL-NO-IC-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
532; UNROLL-NO-IC:       middle.block:
533; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
534; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]]
535; UNROLL-NO-IC:       scalar.ph:
536; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
537; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
538; UNROLL-NO-IC:       for.body:
539; UNROLL-NO-IC-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
540; UNROLL-NO-IC-NEXT:    [[IND_SUM:%.*]] = add i64 [[IV]], [[OFFSET]]
541; UNROLL-NO-IC-NEXT:    [[ARR_IDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM]]
542; UNROLL-NO-IC-NEXT:    [[L1:%.*]] = load float, float* [[ARR_IDX]], align 4
543; UNROLL-NO-IC-NEXT:    [[IND_SUM2:%.*]] = add i64 [[IV]], [[OFFSET2]]
544; UNROLL-NO-IC-NEXT:    [[ARR_IDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM2]]
545; UNROLL-NO-IC-NEXT:    [[L2:%.*]] = load float, float* [[ARR_IDX2]], align 4
546; UNROLL-NO-IC-NEXT:    [[M:%.*]] = fmul fast float [[B]], [[L2]]
547; UNROLL-NO-IC-NEXT:    [[AD:%.*]] = fadd fast float [[L1]], [[M]]
548; UNROLL-NO-IC-NEXT:    store float [[AD]], float* [[ARR_IDX]], align 4
549; UNROLL-NO-IC-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
550; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
551; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
552; UNROLL-NO-IC:       loopexit:
553; UNROLL-NO-IC-NEXT:    ret void
554;
555; INTERLEAVE-LABEL: @scalar_use(
556; INTERLEAVE-NEXT:  entry:
557; INTERLEAVE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
558; INTERLEAVE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
559; INTERLEAVE:       vector.memcheck:
560; INTERLEAVE-NEXT:    [[SCEVGEP:%.*]] = getelementptr float, float* [[A:%.*]], i64 [[OFFSET:%.*]]
561; INTERLEAVE-NEXT:    [[TMP0:%.*]] = add i64 [[N]], [[OFFSET]]
562; INTERLEAVE-NEXT:    [[SCEVGEP2:%.*]] = getelementptr float, float* [[A]], i64 [[TMP0]]
563; INTERLEAVE-NEXT:    [[SCEVGEP4:%.*]] = getelementptr float, float* [[A]], i64 [[OFFSET2:%.*]]
564; INTERLEAVE-NEXT:    [[TMP1:%.*]] = add i64 [[N]], [[OFFSET2]]
565; INTERLEAVE-NEXT:    [[SCEVGEP6:%.*]] = getelementptr float, float* [[A]], i64 [[TMP1]]
566; INTERLEAVE-NEXT:    [[BOUND0:%.*]] = icmp ult float* [[SCEVGEP]], [[SCEVGEP6]]
567; INTERLEAVE-NEXT:    [[BOUND1:%.*]] = icmp ult float* [[SCEVGEP4]], [[SCEVGEP2]]
568; INTERLEAVE-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
569; INTERLEAVE-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
570; INTERLEAVE:       vector.ph:
571; INTERLEAVE-NEXT:    [[N_VEC:%.*]] = and i64 [[N]], -8
572; INTERLEAVE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
573; INTERLEAVE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
574; INTERLEAVE-NEXT:    [[BROADCAST_SPLATINSERT11:%.*]] = insertelement <4 x float> poison, float [[B]], i64 0
575; INTERLEAVE-NEXT:    [[BROADCAST_SPLAT12:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT11]], <4 x float> poison, <4 x i32> zeroinitializer
576; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
577; INTERLEAVE:       vector.body:
578; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
579; INTERLEAVE-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], [[OFFSET]]
580; INTERLEAVE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP2]]
581; INTERLEAVE-NEXT:    [[TMP4:%.*]] = bitcast float* [[TMP3]] to <4 x float>*
582; INTERLEAVE-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4, !alias.scope !4, !noalias !7
583; INTERLEAVE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[TMP3]], i64 4
584; INTERLEAVE-NEXT:    [[TMP6:%.*]] = bitcast float* [[TMP5]] to <4 x float>*
585; INTERLEAVE-NEXT:    [[WIDE_LOAD8:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4, !alias.scope !4, !noalias !7
586; INTERLEAVE-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], [[OFFSET2]]
587; INTERLEAVE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP7]]
588; INTERLEAVE-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
589; INTERLEAVE-NEXT:    [[WIDE_LOAD9:%.*]] = load <4 x float>, <4 x float>* [[TMP9]], align 4, !alias.scope !7
590; INTERLEAVE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 4
591; INTERLEAVE-NEXT:    [[TMP11:%.*]] = bitcast float* [[TMP10]] to <4 x float>*
592; INTERLEAVE-NEXT:    [[WIDE_LOAD10:%.*]] = load <4 x float>, <4 x float>* [[TMP11]], align 4, !alias.scope !7
593; INTERLEAVE-NEXT:    [[TMP12:%.*]] = fmul fast <4 x float> [[BROADCAST_SPLAT]], [[WIDE_LOAD9]]
594; INTERLEAVE-NEXT:    [[TMP13:%.*]] = fmul fast <4 x float> [[BROADCAST_SPLAT12]], [[WIDE_LOAD10]]
595; INTERLEAVE-NEXT:    [[TMP14:%.*]] = fadd fast <4 x float> [[WIDE_LOAD]], [[TMP12]]
596; INTERLEAVE-NEXT:    [[TMP15:%.*]] = fadd fast <4 x float> [[WIDE_LOAD8]], [[TMP13]]
597; INTERLEAVE-NEXT:    [[TMP16:%.*]] = bitcast float* [[TMP3]] to <4 x float>*
598; INTERLEAVE-NEXT:    store <4 x float> [[TMP14]], <4 x float>* [[TMP16]], align 4, !alias.scope !4, !noalias !7
599; INTERLEAVE-NEXT:    [[TMP17:%.*]] = bitcast float* [[TMP5]] to <4 x float>*
600; INTERLEAVE-NEXT:    store <4 x float> [[TMP15]], <4 x float>* [[TMP17]], align 4, !alias.scope !4, !noalias !7
601; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
602; INTERLEAVE-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
603; INTERLEAVE-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
604; INTERLEAVE:       middle.block:
605; INTERLEAVE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
606; INTERLEAVE-NEXT:    br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]]
607; INTERLEAVE:       scalar.ph:
608; INTERLEAVE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
609; INTERLEAVE-NEXT:    br label [[FOR_BODY:%.*]]
610; INTERLEAVE:       for.body:
611; INTERLEAVE-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_BODY]] ]
612; INTERLEAVE-NEXT:    [[IND_SUM:%.*]] = add i64 [[IV]], [[OFFSET]]
613; INTERLEAVE-NEXT:    [[ARR_IDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM]]
614; INTERLEAVE-NEXT:    [[L1:%.*]] = load float, float* [[ARR_IDX]], align 4
615; INTERLEAVE-NEXT:    [[IND_SUM2:%.*]] = add i64 [[IV]], [[OFFSET2]]
616; INTERLEAVE-NEXT:    [[ARR_IDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IND_SUM2]]
617; INTERLEAVE-NEXT:    [[L2:%.*]] = load float, float* [[ARR_IDX2]], align 4
618; INTERLEAVE-NEXT:    [[M:%.*]] = fmul fast float [[L2]], [[B]]
619; INTERLEAVE-NEXT:    [[AD:%.*]] = fadd fast float [[L1]], [[M]]
620; INTERLEAVE-NEXT:    store float [[AD]], float* [[ARR_IDX]], align 4
621; INTERLEAVE-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
622; INTERLEAVE-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]]
623; INTERLEAVE-NEXT:    br i1 [[EXITCOND]], label [[LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
624; INTERLEAVE:       loopexit:
625; INTERLEAVE-NEXT:    ret void
626;
627entry:
628  br label %for.body
629
630for.body:
631  %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.body ]
632  %ind.sum = add i64 %iv, %offset
633  %arr.idx = getelementptr inbounds float, float* %a, i64 %ind.sum
634  %l1 = load float, float* %arr.idx, align 4
635  %ind.sum2 = add i64 %iv, %offset2
636  %arr.idx2 = getelementptr inbounds float, float* %a, i64 %ind.sum2
637  %l2 = load float, float* %arr.idx2, align 4
638  %m = fmul fast float %b, %l2
639  %ad = fadd fast float %l1, %m
640  store float %ad, float* %arr.idx, align 4
641  %iv.next = add nuw nsw i64 %iv, 1
642  %exitcond = icmp eq i64 %iv.next, %n
643  br i1 %exitcond, label %loopexit, label %for.body
644
645loopexit:
646  ret void
647}
648
649; Make sure we don't create a vector induction phi node that is unused.
650; Scalarize the step vectors instead.
651;
652; for (int i = 0; i < n; ++i)
653;   sum += a[i];
654;
655;
656;
657;
658
659define i64 @scalarize_induction_variable_01(i64 *%a, i64 %n) {
660; CHECK-LABEL: @scalarize_induction_variable_01(
661; CHECK-NEXT:  entry:
662; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
663; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 2
664; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
665; CHECK:       vector.ph:
666; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 2
667; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]]
668; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
669; CHECK:       vector.body:
670; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
671; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
672; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
673; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[TMP0]]
674; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[TMP1]], i32 0
675; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <2 x i64>*
676; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 8
677; CHECK-NEXT:    [[TMP4]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]]
678; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
679; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
680; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
681; CHECK:       middle.block:
682; CHECK-NEXT:    [[TMP6:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP4]])
683; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
684; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
685; CHECK:       scalar.ph:
686; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
687; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
688; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
689; CHECK:       for.body:
690; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
691; CHECK-NEXT:    [[SUM:%.*]] = phi i64 [ [[TMP9:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
692; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I]]
693; CHECK-NEXT:    [[TMP8:%.*]] = load i64, i64* [[TMP7]], align 8
694; CHECK-NEXT:    [[TMP9]] = add i64 [[TMP8]], [[SUM]]
695; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
696; CHECK-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
697; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP12:![0-9]+]]
698; CHECK:       for.end:
699; CHECK-NEXT:    [[TMP10:%.*]] = phi i64 [ [[TMP9]], [[FOR_BODY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
700; CHECK-NEXT:    ret i64 [[TMP10]]
701;
702; IND-LABEL: @scalarize_induction_variable_01(
703; IND-NEXT:  entry:
704; IND-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
705; IND-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 2
706; IND-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
707; IND:       vector.ph:
708; IND-NEXT:    [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806
709; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
710; IND:       vector.body:
711; IND-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
712; IND-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
713; IND-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]]
714; IND-NEXT:    [[TMP1:%.*]] = bitcast i64* [[TMP0]] to <2 x i64>*
715; IND-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
716; IND-NEXT:    [[TMP2]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]]
717; IND-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
718; IND-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
719; IND-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
720; IND:       middle.block:
721; IND-NEXT:    [[TMP4:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP2]])
722; IND-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
723; IND-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
724; IND:       scalar.ph:
725; IND-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
726; IND-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP4]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
727; IND-NEXT:    br label [[FOR_BODY:%.*]]
728; IND:       for.body:
729; IND-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
730; IND-NEXT:    [[SUM:%.*]] = phi i64 [ [[TMP7:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
731; IND-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I]]
732; IND-NEXT:    [[TMP6:%.*]] = load i64, i64* [[TMP5]], align 8
733; IND-NEXT:    [[TMP7]] = add i64 [[TMP6]], [[SUM]]
734; IND-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
735; IND-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
736; IND-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP12:![0-9]+]]
737; IND:       for.end:
738; IND-NEXT:    [[TMP8:%.*]] = phi i64 [ [[TMP7]], [[FOR_BODY]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
739; IND-NEXT:    ret i64 [[TMP8]]
740;
741; UNROLL-LABEL: @scalarize_induction_variable_01(
742; UNROLL-NEXT:  entry:
743; UNROLL-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
744; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4
745; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
746; UNROLL:       vector.ph:
747; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775804
748; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
749; UNROLL:       vector.body:
750; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
751; UNROLL-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
752; UNROLL-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
753; UNROLL-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]]
754; UNROLL-NEXT:    [[TMP1:%.*]] = bitcast i64* [[TMP0]] to <2 x i64>*
755; UNROLL-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP1]], align 8
756; UNROLL-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 2
757; UNROLL-NEXT:    [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <2 x i64>*
758; UNROLL-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP3]], align 8
759; UNROLL-NEXT:    [[TMP4]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]]
760; UNROLL-NEXT:    [[TMP5]] = add <2 x i64> [[WIDE_LOAD2]], [[VEC_PHI1]]
761; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
762; UNROLL-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
763; UNROLL-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
764; UNROLL:       middle.block:
765; UNROLL-NEXT:    [[BIN_RDX:%.*]] = add <2 x i64> [[TMP5]], [[TMP4]]
766; UNROLL-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]])
767; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
768; UNROLL-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
769; UNROLL:       scalar.ph:
770; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
771; UNROLL-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
772; UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
773; UNROLL:       for.body:
774; UNROLL-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
775; UNROLL-NEXT:    [[SUM:%.*]] = phi i64 [ [[TMP10:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
776; UNROLL-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I]]
777; UNROLL-NEXT:    [[TMP9:%.*]] = load i64, i64* [[TMP8]], align 8
778; UNROLL-NEXT:    [[TMP10]] = add i64 [[TMP9]], [[SUM]]
779; UNROLL-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
780; UNROLL-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
781; UNROLL-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP12:![0-9]+]]
782; UNROLL:       for.end:
783; UNROLL-NEXT:    [[TMP11:%.*]] = phi i64 [ [[TMP10]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
784; UNROLL-NEXT:    ret i64 [[TMP11]]
785;
786; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_01(
787; UNROLL-NO-IC-NEXT:  entry:
788; UNROLL-NO-IC-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
789; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4
790; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
791; UNROLL-NO-IC:       vector.ph:
792; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 4
793; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]]
794; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
795; UNROLL-NO-IC:       vector.body:
796; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
797; UNROLL-NO-IC-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
798; UNROLL-NO-IC-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
799; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
800; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 2
801; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[TMP0]]
802; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[TMP1]]
803; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, i64* [[TMP2]], i32 0
804; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <2 x i64>*
805; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i64>, <2 x i64>* [[TMP5]], align 8
806; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i64, i64* [[TMP2]], i32 2
807; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = bitcast i64* [[TMP6]] to <2 x i64>*
808; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD2:%.*]] = load <2 x i64>, <2 x i64>* [[TMP7]], align 8
809; UNROLL-NO-IC-NEXT:    [[TMP8]] = add <2 x i64> [[WIDE_LOAD]], [[VEC_PHI]]
810; UNROLL-NO-IC-NEXT:    [[TMP9]] = add <2 x i64> [[WIDE_LOAD2]], [[VEC_PHI1]]
811; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
812; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
813; UNROLL-NO-IC-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
814; UNROLL-NO-IC:       middle.block:
815; UNROLL-NO-IC-NEXT:    [[BIN_RDX:%.*]] = add <2 x i64> [[TMP9]], [[TMP8]]
816; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]])
817; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
818; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
819; UNROLL-NO-IC:       scalar.ph:
820; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
821; UNROLL-NO-IC-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
822; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
823; UNROLL-NO-IC:       for.body:
824; UNROLL-NO-IC-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
825; UNROLL-NO-IC-NEXT:    [[SUM:%.*]] = phi i64 [ [[TMP14:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
826; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I]]
827; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = load i64, i64* [[TMP12]], align 8
828; UNROLL-NO-IC-NEXT:    [[TMP14]] = add i64 [[TMP13]], [[SUM]]
829; UNROLL-NO-IC-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
830; UNROLL-NO-IC-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
831; UNROLL-NO-IC-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP12:![0-9]+]]
832; UNROLL-NO-IC:       for.end:
833; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = phi i64 [ [[TMP14]], [[FOR_BODY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
834; UNROLL-NO-IC-NEXT:    ret i64 [[TMP15]]
835;
836; INTERLEAVE-LABEL: @scalarize_induction_variable_01(
837; INTERLEAVE-NEXT:  entry:
838; INTERLEAVE-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
839; INTERLEAVE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 8
840; INTERLEAVE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
841; INTERLEAVE:       vector.ph:
842; INTERLEAVE-NEXT:    [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775800
843; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
844; INTERLEAVE:       vector.body:
845; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
846; INTERLEAVE-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
847; INTERLEAVE-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
848; INTERLEAVE-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i64, i64* [[A:%.*]], i64 [[INDEX]]
849; INTERLEAVE-NEXT:    [[TMP1:%.*]] = bitcast i64* [[TMP0]] to <4 x i64>*
850; INTERLEAVE-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i64>, <4 x i64>* [[TMP1]], align 8
851; INTERLEAVE-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i64, i64* [[TMP0]], i64 4
852; INTERLEAVE-NEXT:    [[TMP3:%.*]] = bitcast i64* [[TMP2]] to <4 x i64>*
853; INTERLEAVE-NEXT:    [[WIDE_LOAD2:%.*]] = load <4 x i64>, <4 x i64>* [[TMP3]], align 8
854; INTERLEAVE-NEXT:    [[TMP4]] = add <4 x i64> [[WIDE_LOAD]], [[VEC_PHI]]
855; INTERLEAVE-NEXT:    [[TMP5]] = add <4 x i64> [[WIDE_LOAD2]], [[VEC_PHI1]]
856; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
857; INTERLEAVE-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
858; INTERLEAVE-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
859; INTERLEAVE:       middle.block:
860; INTERLEAVE-NEXT:    [[BIN_RDX:%.*]] = add <4 x i64> [[TMP5]], [[TMP4]]
861; INTERLEAVE-NEXT:    [[TMP7:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[BIN_RDX]])
862; INTERLEAVE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
863; INTERLEAVE-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
864; INTERLEAVE:       scalar.ph:
865; INTERLEAVE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
866; INTERLEAVE-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
867; INTERLEAVE-NEXT:    br label [[FOR_BODY:%.*]]
868; INTERLEAVE:       for.body:
869; INTERLEAVE-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
870; INTERLEAVE-NEXT:    [[SUM:%.*]] = phi i64 [ [[TMP10:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
871; INTERLEAVE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, i64* [[A]], i64 [[I]]
872; INTERLEAVE-NEXT:    [[TMP9:%.*]] = load i64, i64* [[TMP8]], align 8
873; INTERLEAVE-NEXT:    [[TMP10]] = add i64 [[TMP9]], [[SUM]]
874; INTERLEAVE-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
875; INTERLEAVE-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
876; INTERLEAVE-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP12:![0-9]+]]
877; INTERLEAVE:       for.end:
878; INTERLEAVE-NEXT:    [[TMP11:%.*]] = phi i64 [ [[TMP10]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
879; INTERLEAVE-NEXT:    ret i64 [[TMP11]]
880;
881entry:
882  br label %for.body
883
884for.body:
885  %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
886  %sum = phi i64 [ %2, %for.body ], [ 0, %entry ]
887  %0 = getelementptr inbounds i64, i64* %a, i64 %i
888  %1 = load i64, i64* %0, align 8
889  %2 = add i64 %1, %sum
890  %i.next = add nuw nsw i64 %i, 1
891  %cond = icmp slt i64 %i.next, %n
892  br i1 %cond, label %for.body, label %for.end
893
894for.end:
895  %3  = phi i64 [ %2, %for.body ]
896  ret i64 %3
897}
898
899; Make sure we scalarize the step vectors used for the pointer arithmetic. We
900; can't easily simplify vectorized step vectors.
901;
902; float s = 0;
903; for (int i ; 0; i < n; i += 8)
904;   s += (a[i] + b[i] + 1.0f);
905;
906;
907;
908;
909
910define float @scalarize_induction_variable_02(float* %a, float* %b, i64 %n) {
911; CHECK-LABEL: @scalarize_induction_variable_02(
912; CHECK-NEXT:  entry:
913; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 8)
914; CHECK-NEXT:    [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
915; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
916; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
917; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2
918; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
919; CHECK:       vector.ph:
920; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2
921; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
922; CHECK-NEXT:    [[IND_END:%.*]] = mul i64 [[N_VEC]], 8
923; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
924; CHECK:       vector.body:
925; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
926; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
927; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
928; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
929; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8
930; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP3]]
931; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP4]]
932; CHECK-NEXT:    [[TMP7:%.*]] = load float, float* [[TMP5]], align 4
933; CHECK-NEXT:    [[TMP8:%.*]] = load float, float* [[TMP6]], align 4
934; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i32 0
935; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[TMP8]], i32 1
936; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP3]]
937; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP4]]
938; CHECK-NEXT:    [[TMP13:%.*]] = load float, float* [[TMP11]], align 4
939; CHECK-NEXT:    [[TMP14:%.*]] = load float, float* [[TMP12]], align 4
940; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <2 x float> poison, float [[TMP13]], i32 0
941; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <2 x float> [[TMP15]], float [[TMP14]], i32 1
942; CHECK-NEXT:    [[TMP17:%.*]] = fadd fast <2 x float> [[VEC_PHI]], <float 1.000000e+00, float 1.000000e+00>
943; CHECK-NEXT:    [[TMP18:%.*]] = fadd fast <2 x float> [[TMP17]], [[TMP10]]
944; CHECK-NEXT:    [[TMP19]] = fadd fast <2 x float> [[TMP18]], [[TMP16]]
945; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
946; CHECK-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
947; CHECK-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
948; CHECK:       middle.block:
949; CHECK-NEXT:    [[TMP21:%.*]] = call fast float @llvm.vector.reduce.fadd.v2f32(float -0.000000e+00, <2 x float> [[TMP19]])
950; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
951; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
952; CHECK:       scalar.ph:
953; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
954; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ]
955; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
956; CHECK:       for.body:
957; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_BODY]] ]
958; CHECK-NEXT:    [[S:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP28:%.*]], [[FOR_BODY]] ]
959; CHECK-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]]
960; CHECK-NEXT:    [[TMP23:%.*]] = load float, float* [[TMP22]], align 4
961; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[I]]
962; CHECK-NEXT:    [[TMP25:%.*]] = load float, float* [[TMP24]], align 4
963; CHECK-NEXT:    [[TMP26:%.*]] = fadd fast float [[S]], 1.000000e+00
964; CHECK-NEXT:    [[TMP27:%.*]] = fadd fast float [[TMP26]], [[TMP23]]
965; CHECK-NEXT:    [[TMP28]] = fadd fast float [[TMP27]], [[TMP25]]
966; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 8
967; CHECK-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
968; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP14:![0-9]+]]
969; CHECK:       for.end:
970; CHECK-NEXT:    [[S_LCSSA:%.*]] = phi float [ [[TMP28]], [[FOR_BODY]] ], [ [[TMP21]], [[MIDDLE_BLOCK]] ]
971; CHECK-NEXT:    ret float [[S_LCSSA]]
972;
973; IND-LABEL: @scalarize_induction_variable_02(
974; IND-NEXT:  entry:
975; IND-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 8)
976; IND-NEXT:    [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
977; IND-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
978; IND-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
979; IND-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 8
980; IND-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
981; IND:       vector.ph:
982; IND-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 4611686018427387902
983; IND-NEXT:    [[IND_END:%.*]] = shl i64 [[N_VEC]], 3
984; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
985; IND:       vector.body:
986; IND-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
987; IND-NEXT:    [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
988; IND-NEXT:    [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 3
989; IND-NEXT:    [[TMP3:%.*]] = or i64 [[OFFSET_IDX]], 8
990; IND-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[OFFSET_IDX]]
991; IND-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP3]]
992; IND-NEXT:    [[TMP6:%.*]] = load float, float* [[TMP4]], align 4
993; IND-NEXT:    [[TMP7:%.*]] = load float, float* [[TMP5]], align 4
994; IND-NEXT:    [[TMP8:%.*]] = insertelement <2 x float> poison, float [[TMP6]], i64 0
995; IND-NEXT:    [[TMP9:%.*]] = insertelement <2 x float> [[TMP8]], float [[TMP7]], i64 1
996; IND-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[OFFSET_IDX]]
997; IND-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP3]]
998; IND-NEXT:    [[TMP12:%.*]] = load float, float* [[TMP10]], align 4
999; IND-NEXT:    [[TMP13:%.*]] = load float, float* [[TMP11]], align 4
1000; IND-NEXT:    [[TMP14:%.*]] = insertelement <2 x float> poison, float [[TMP12]], i64 0
1001; IND-NEXT:    [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[TMP13]], i64 1
1002; IND-NEXT:    [[TMP16:%.*]] = fadd fast <2 x float> [[VEC_PHI]], <float 1.000000e+00, float 1.000000e+00>
1003; IND-NEXT:    [[TMP17:%.*]] = fadd fast <2 x float> [[TMP16]], [[TMP9]]
1004; IND-NEXT:    [[TMP18]] = fadd fast <2 x float> [[TMP17]], [[TMP15]]
1005; IND-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1006; IND-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1007; IND-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
1008; IND:       middle.block:
1009; IND-NEXT:    [[TMP20:%.*]] = call fast float @llvm.vector.reduce.fadd.v2f32(float -0.000000e+00, <2 x float> [[TMP18]])
1010; IND-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1011; IND-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1012; IND:       scalar.ph:
1013; IND-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1014; IND-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP20]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ]
1015; IND-NEXT:    br label [[FOR_BODY:%.*]]
1016; IND:       for.body:
1017; IND-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_BODY]] ]
1018; IND-NEXT:    [[S:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP27:%.*]], [[FOR_BODY]] ]
1019; IND-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]]
1020; IND-NEXT:    [[TMP22:%.*]] = load float, float* [[TMP21]], align 4
1021; IND-NEXT:    [[TMP23:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[I]]
1022; IND-NEXT:    [[TMP24:%.*]] = load float, float* [[TMP23]], align 4
1023; IND-NEXT:    [[TMP25:%.*]] = fadd fast float [[S]], 1.000000e+00
1024; IND-NEXT:    [[TMP26:%.*]] = fadd fast float [[TMP25]], [[TMP22]]
1025; IND-NEXT:    [[TMP27]] = fadd fast float [[TMP26]], [[TMP24]]
1026; IND-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 8
1027; IND-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
1028; IND-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP14:![0-9]+]]
1029; IND:       for.end:
1030; IND-NEXT:    [[S_LCSSA:%.*]] = phi float [ [[TMP27]], [[FOR_BODY]] ], [ [[TMP20]], [[MIDDLE_BLOCK]] ]
1031; IND-NEXT:    ret float [[S_LCSSA]]
1032;
1033; UNROLL-LABEL: @scalarize_induction_variable_02(
1034; UNROLL-NEXT:  entry:
1035; UNROLL-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 8)
1036; UNROLL-NEXT:    [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
1037; UNROLL-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
1038; UNROLL-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
1039; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 24
1040; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1041; UNROLL:       vector.ph:
1042; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 4611686018427387900
1043; UNROLL-NEXT:    [[IND_END:%.*]] = shl i64 [[N_VEC]], 3
1044; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
1045; UNROLL:       vector.body:
1046; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1047; UNROLL-NEXT:    [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP34:%.*]], [[VECTOR_BODY]] ]
1048; UNROLL-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP35:%.*]], [[VECTOR_BODY]] ]
1049; UNROLL-NEXT:    [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 3
1050; UNROLL-NEXT:    [[TMP3:%.*]] = or i64 [[OFFSET_IDX]], 8
1051; UNROLL-NEXT:    [[TMP4:%.*]] = or i64 [[OFFSET_IDX]], 16
1052; UNROLL-NEXT:    [[TMP5:%.*]] = or i64 [[OFFSET_IDX]], 24
1053; UNROLL-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[OFFSET_IDX]]
1054; UNROLL-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP3]]
1055; UNROLL-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP4]]
1056; UNROLL-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP5]]
1057; UNROLL-NEXT:    [[TMP10:%.*]] = load float, float* [[TMP6]], align 4
1058; UNROLL-NEXT:    [[TMP11:%.*]] = load float, float* [[TMP7]], align 4
1059; UNROLL-NEXT:    [[TMP12:%.*]] = insertelement <2 x float> poison, float [[TMP10]], i64 0
1060; UNROLL-NEXT:    [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[TMP11]], i64 1
1061; UNROLL-NEXT:    [[TMP14:%.*]] = load float, float* [[TMP8]], align 4
1062; UNROLL-NEXT:    [[TMP15:%.*]] = load float, float* [[TMP9]], align 4
1063; UNROLL-NEXT:    [[TMP16:%.*]] = insertelement <2 x float> poison, float [[TMP14]], i64 0
1064; UNROLL-NEXT:    [[TMP17:%.*]] = insertelement <2 x float> [[TMP16]], float [[TMP15]], i64 1
1065; UNROLL-NEXT:    [[TMP18:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[OFFSET_IDX]]
1066; UNROLL-NEXT:    [[TMP19:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP3]]
1067; UNROLL-NEXT:    [[TMP20:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP4]]
1068; UNROLL-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP5]]
1069; UNROLL-NEXT:    [[TMP22:%.*]] = load float, float* [[TMP18]], align 4
1070; UNROLL-NEXT:    [[TMP23:%.*]] = load float, float* [[TMP19]], align 4
1071; UNROLL-NEXT:    [[TMP24:%.*]] = insertelement <2 x float> poison, float [[TMP22]], i64 0
1072; UNROLL-NEXT:    [[TMP25:%.*]] = insertelement <2 x float> [[TMP24]], float [[TMP23]], i64 1
1073; UNROLL-NEXT:    [[TMP26:%.*]] = load float, float* [[TMP20]], align 4
1074; UNROLL-NEXT:    [[TMP27:%.*]] = load float, float* [[TMP21]], align 4
1075; UNROLL-NEXT:    [[TMP28:%.*]] = insertelement <2 x float> poison, float [[TMP26]], i64 0
1076; UNROLL-NEXT:    [[TMP29:%.*]] = insertelement <2 x float> [[TMP28]], float [[TMP27]], i64 1
1077; UNROLL-NEXT:    [[TMP30:%.*]] = fadd fast <2 x float> [[VEC_PHI]], <float 1.000000e+00, float 1.000000e+00>
1078; UNROLL-NEXT:    [[TMP31:%.*]] = fadd fast <2 x float> [[VEC_PHI1]], <float 1.000000e+00, float 1.000000e+00>
1079; UNROLL-NEXT:    [[TMP32:%.*]] = fadd fast <2 x float> [[TMP30]], [[TMP13]]
1080; UNROLL-NEXT:    [[TMP33:%.*]] = fadd fast <2 x float> [[TMP31]], [[TMP17]]
1081; UNROLL-NEXT:    [[TMP34]] = fadd fast <2 x float> [[TMP32]], [[TMP25]]
1082; UNROLL-NEXT:    [[TMP35]] = fadd fast <2 x float> [[TMP33]], [[TMP29]]
1083; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1084; UNROLL-NEXT:    [[TMP36:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1085; UNROLL-NEXT:    br i1 [[TMP36]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
1086; UNROLL:       middle.block:
1087; UNROLL-NEXT:    [[BIN_RDX:%.*]] = fadd fast <2 x float> [[TMP35]], [[TMP34]]
1088; UNROLL-NEXT:    [[TMP37:%.*]] = call fast float @llvm.vector.reduce.fadd.v2f32(float -0.000000e+00, <2 x float> [[BIN_RDX]])
1089; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1090; UNROLL-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1091; UNROLL:       scalar.ph:
1092; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1093; UNROLL-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP37]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ]
1094; UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
1095; UNROLL:       for.body:
1096; UNROLL-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_BODY]] ]
1097; UNROLL-NEXT:    [[S:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP44:%.*]], [[FOR_BODY]] ]
1098; UNROLL-NEXT:    [[TMP38:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]]
1099; UNROLL-NEXT:    [[TMP39:%.*]] = load float, float* [[TMP38]], align 4
1100; UNROLL-NEXT:    [[TMP40:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[I]]
1101; UNROLL-NEXT:    [[TMP41:%.*]] = load float, float* [[TMP40]], align 4
1102; UNROLL-NEXT:    [[TMP42:%.*]] = fadd fast float [[S]], 1.000000e+00
1103; UNROLL-NEXT:    [[TMP43:%.*]] = fadd fast float [[TMP42]], [[TMP39]]
1104; UNROLL-NEXT:    [[TMP44]] = fadd fast float [[TMP43]], [[TMP41]]
1105; UNROLL-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 8
1106; UNROLL-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
1107; UNROLL-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP14:![0-9]+]]
1108; UNROLL:       for.end:
1109; UNROLL-NEXT:    [[S_LCSSA:%.*]] = phi float [ [[TMP44]], [[FOR_BODY]] ], [ [[TMP37]], [[MIDDLE_BLOCK]] ]
1110; UNROLL-NEXT:    ret float [[S_LCSSA]]
1111;
1112; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_02(
1113; UNROLL-NO-IC-NEXT:  entry:
1114; UNROLL-NO-IC-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 8)
1115; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
1116; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
1117; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
1118; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
1119; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1120; UNROLL-NO-IC:       vector.ph:
1121; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
1122; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
1123; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = mul i64 [[N_VEC]], 8
1124; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
1125; UNROLL-NO-IC:       vector.body:
1126; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1127; UNROLL-NO-IC-NEXT:    [[VEC_PHI:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP35:%.*]], [[VECTOR_BODY]] ]
1128; UNROLL-NO-IC-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP36:%.*]], [[VECTOR_BODY]] ]
1129; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 8
1130; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
1131; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 8
1132; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 16
1133; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 24
1134; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[TMP3]]
1135; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP4]]
1136; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP5]]
1137; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP6]]
1138; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = load float, float* [[TMP7]], align 4
1139; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = load float, float* [[TMP8]], align 4
1140; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = insertelement <2 x float> poison, float [[TMP11]], i32 0
1141; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = insertelement <2 x float> [[TMP13]], float [[TMP12]], i32 1
1142; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = load float, float* [[TMP9]], align 4
1143; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = load float, float* [[TMP10]], align 4
1144; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = insertelement <2 x float> poison, float [[TMP15]], i32 0
1145; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = insertelement <2 x float> [[TMP17]], float [[TMP16]], i32 1
1146; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[TMP3]]
1147; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP4]]
1148; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP5]]
1149; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP6]]
1150; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = load float, float* [[TMP19]], align 4
1151; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = load float, float* [[TMP20]], align 4
1152; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = insertelement <2 x float> poison, float [[TMP23]], i32 0
1153; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = insertelement <2 x float> [[TMP25]], float [[TMP24]], i32 1
1154; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = load float, float* [[TMP21]], align 4
1155; UNROLL-NO-IC-NEXT:    [[TMP28:%.*]] = load float, float* [[TMP22]], align 4
1156; UNROLL-NO-IC-NEXT:    [[TMP29:%.*]] = insertelement <2 x float> poison, float [[TMP27]], i32 0
1157; UNROLL-NO-IC-NEXT:    [[TMP30:%.*]] = insertelement <2 x float> [[TMP29]], float [[TMP28]], i32 1
1158; UNROLL-NO-IC-NEXT:    [[TMP31:%.*]] = fadd fast <2 x float> [[VEC_PHI]], <float 1.000000e+00, float 1.000000e+00>
1159; UNROLL-NO-IC-NEXT:    [[TMP32:%.*]] = fadd fast <2 x float> [[VEC_PHI1]], <float 1.000000e+00, float 1.000000e+00>
1160; UNROLL-NO-IC-NEXT:    [[TMP33:%.*]] = fadd fast <2 x float> [[TMP31]], [[TMP14]]
1161; UNROLL-NO-IC-NEXT:    [[TMP34:%.*]] = fadd fast <2 x float> [[TMP32]], [[TMP18]]
1162; UNROLL-NO-IC-NEXT:    [[TMP35]] = fadd fast <2 x float> [[TMP33]], [[TMP26]]
1163; UNROLL-NO-IC-NEXT:    [[TMP36]] = fadd fast <2 x float> [[TMP34]], [[TMP30]]
1164; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1165; UNROLL-NO-IC-NEXT:    [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1166; UNROLL-NO-IC-NEXT:    br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
1167; UNROLL-NO-IC:       middle.block:
1168; UNROLL-NO-IC-NEXT:    [[BIN_RDX:%.*]] = fadd fast <2 x float> [[TMP36]], [[TMP35]]
1169; UNROLL-NO-IC-NEXT:    [[TMP38:%.*]] = call fast float @llvm.vector.reduce.fadd.v2f32(float -0.000000e+00, <2 x float> [[BIN_RDX]])
1170; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1171; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1172; UNROLL-NO-IC:       scalar.ph:
1173; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1174; UNROLL-NO-IC-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ 0.000000e+00, [[ENTRY]] ], [ [[TMP38]], [[MIDDLE_BLOCK]] ]
1175; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
1176; UNROLL-NO-IC:       for.body:
1177; UNROLL-NO-IC-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_BODY]] ]
1178; UNROLL-NO-IC-NEXT:    [[S:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP45:%.*]], [[FOR_BODY]] ]
1179; UNROLL-NO-IC-NEXT:    [[TMP39:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]]
1180; UNROLL-NO-IC-NEXT:    [[TMP40:%.*]] = load float, float* [[TMP39]], align 4
1181; UNROLL-NO-IC-NEXT:    [[TMP41:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[I]]
1182; UNROLL-NO-IC-NEXT:    [[TMP42:%.*]] = load float, float* [[TMP41]], align 4
1183; UNROLL-NO-IC-NEXT:    [[TMP43:%.*]] = fadd fast float [[S]], 1.000000e+00
1184; UNROLL-NO-IC-NEXT:    [[TMP44:%.*]] = fadd fast float [[TMP43]], [[TMP40]]
1185; UNROLL-NO-IC-NEXT:    [[TMP45]] = fadd fast float [[TMP44]], [[TMP42]]
1186; UNROLL-NO-IC-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 8
1187; UNROLL-NO-IC-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
1188; UNROLL-NO-IC-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP14:![0-9]+]]
1189; UNROLL-NO-IC:       for.end:
1190; UNROLL-NO-IC-NEXT:    [[S_LCSSA:%.*]] = phi float [ [[TMP45]], [[FOR_BODY]] ], [ [[TMP38]], [[MIDDLE_BLOCK]] ]
1191; UNROLL-NO-IC-NEXT:    ret float [[S_LCSSA]]
1192;
1193; INTERLEAVE-LABEL: @scalarize_induction_variable_02(
1194; INTERLEAVE-NEXT:  entry:
1195; INTERLEAVE-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 8)
1196; INTERLEAVE-NEXT:    [[TMP0:%.*]] = add nsw i64 [[SMAX]], -1
1197; INTERLEAVE-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 3
1198; INTERLEAVE-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
1199; INTERLEAVE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 64
1200; INTERLEAVE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1201; INTERLEAVE:       vector.ph:
1202; INTERLEAVE-NEXT:    [[N_MOD_VF:%.*]] = and i64 [[TMP2]], 7
1203; INTERLEAVE-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
1204; INTERLEAVE-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i64 8, i64 [[N_MOD_VF]]
1205; INTERLEAVE-NEXT:    [[N_VEC:%.*]] = sub nsw i64 [[TMP2]], [[TMP4]]
1206; INTERLEAVE-NEXT:    [[IND_END:%.*]] = shl i64 [[N_VEC]], 3
1207; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
1208; INTERLEAVE:       vector.body:
1209; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1210; INTERLEAVE-NEXT:    [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
1211; INTERLEAVE-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
1212; INTERLEAVE-NEXT:    [[OFFSET_IDX:%.*]] = shl i64 [[INDEX]], 3
1213; INTERLEAVE-NEXT:    [[TMP5:%.*]] = or i64 [[OFFSET_IDX]], 32
1214; INTERLEAVE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[OFFSET_IDX]]
1215; INTERLEAVE-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP5]]
1216; INTERLEAVE-NEXT:    [[TMP8:%.*]] = bitcast float* [[TMP6]] to <32 x float>*
1217; INTERLEAVE-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP7]] to <32 x float>*
1218; INTERLEAVE-NEXT:    [[WIDE_VEC:%.*]] = load <32 x float>, <32 x float>* [[TMP8]], align 4
1219; INTERLEAVE-NEXT:    [[WIDE_VEC2:%.*]] = load <32 x float>, <32 x float>* [[TMP9]], align 4
1220; INTERLEAVE-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <32 x float> [[WIDE_VEC]], <32 x float> poison, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
1221; INTERLEAVE-NEXT:    [[STRIDED_VEC3:%.*]] = shufflevector <32 x float> [[WIDE_VEC2]], <32 x float> poison, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
1222; INTERLEAVE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[OFFSET_IDX]]
1223; INTERLEAVE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[TMP5]]
1224; INTERLEAVE-NEXT:    [[TMP12:%.*]] = bitcast float* [[TMP10]] to <32 x float>*
1225; INTERLEAVE-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP11]] to <32 x float>*
1226; INTERLEAVE-NEXT:    [[WIDE_VEC4:%.*]] = load <32 x float>, <32 x float>* [[TMP12]], align 4
1227; INTERLEAVE-NEXT:    [[WIDE_VEC5:%.*]] = load <32 x float>, <32 x float>* [[TMP13]], align 4
1228; INTERLEAVE-NEXT:    [[STRIDED_VEC6:%.*]] = shufflevector <32 x float> [[WIDE_VEC4]], <32 x float> poison, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
1229; INTERLEAVE-NEXT:    [[STRIDED_VEC7:%.*]] = shufflevector <32 x float> [[WIDE_VEC5]], <32 x float> poison, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
1230; INTERLEAVE-NEXT:    [[TMP14:%.*]] = fadd fast <4 x float> [[VEC_PHI]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
1231; INTERLEAVE-NEXT:    [[TMP15:%.*]] = fadd fast <4 x float> [[VEC_PHI1]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>
1232; INTERLEAVE-NEXT:    [[TMP16:%.*]] = fadd fast <4 x float> [[TMP14]], [[STRIDED_VEC]]
1233; INTERLEAVE-NEXT:    [[TMP17:%.*]] = fadd fast <4 x float> [[TMP15]], [[STRIDED_VEC3]]
1234; INTERLEAVE-NEXT:    [[TMP18]] = fadd fast <4 x float> [[TMP16]], [[STRIDED_VEC6]]
1235; INTERLEAVE-NEXT:    [[TMP19]] = fadd fast <4 x float> [[TMP17]], [[STRIDED_VEC7]]
1236; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1237; INTERLEAVE-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1238; INTERLEAVE-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
1239; INTERLEAVE:       middle.block:
1240; INTERLEAVE-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP19]], [[TMP18]]
1241; INTERLEAVE-NEXT:    [[TMP21:%.*]] = call fast float @llvm.vector.reduce.fadd.v4f32(float -0.000000e+00, <4 x float> [[BIN_RDX]])
1242; INTERLEAVE-NEXT:    br label [[SCALAR_PH]]
1243; INTERLEAVE:       scalar.ph:
1244; INTERLEAVE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1245; INTERLEAVE-NEXT:    [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP21]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ]
1246; INTERLEAVE-NEXT:    br label [[FOR_BODY:%.*]]
1247; INTERLEAVE:       for.body:
1248; INTERLEAVE-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_BODY]] ]
1249; INTERLEAVE-NEXT:    [[S:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP28:%.*]], [[FOR_BODY]] ]
1250; INTERLEAVE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]]
1251; INTERLEAVE-NEXT:    [[TMP23:%.*]] = load float, float* [[TMP22]], align 4
1252; INTERLEAVE-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[I]]
1253; INTERLEAVE-NEXT:    [[TMP25:%.*]] = load float, float* [[TMP24]], align 4
1254; INTERLEAVE-NEXT:    [[TMP26:%.*]] = fadd fast float [[S]], 1.000000e+00
1255; INTERLEAVE-NEXT:    [[TMP27:%.*]] = fadd fast float [[TMP26]], [[TMP23]]
1256; INTERLEAVE-NEXT:    [[TMP28]] = fadd fast float [[TMP27]], [[TMP25]]
1257; INTERLEAVE-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 8
1258; INTERLEAVE-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
1259; INTERLEAVE-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP14:![0-9]+]]
1260; INTERLEAVE:       for.end:
1261; INTERLEAVE-NEXT:    ret float [[TMP28]]
1262;
1263entry:
1264  br label %for.body
1265
1266for.body:
1267  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
1268  %s = phi float [ 0.0, %entry ], [ %6, %for.body ]
1269  %0 = getelementptr inbounds float, float* %a, i64 %i
1270  %1 = load float, float* %0, align 4
1271  %2 = getelementptr inbounds float, float* %b, i64 %i
1272  %3 = load float, float* %2, align 4
1273  %4 = fadd fast float %s, 1.0
1274  %5 = fadd fast float %4, %1
1275  %6 = fadd fast float %5, %3
1276  %i.next = add nuw nsw i64 %i, 8
1277  %cond = icmp slt i64 %i.next, %n
1278  br i1 %cond, label %for.body, label %for.end
1279
1280for.end:
1281  %s.lcssa = phi float [ %6, %for.body ]
1282  ret float %s.lcssa
1283}
1284
1285; Make sure we scalarize the step vectors used for the pointer arithmetic. We
1286; can't easily simplify vectorized step vectors. (Interleaved accesses.)
1287;
1288; for (int i = 0; i < n; ++i)
1289;   a[i].f ^= y;
1290;
1291
1292%pair.i32 = type { i32, i32 }
1293define void @scalarize_induction_variable_03(%pair.i32 *%p, i32 %y, i64 %n) {
1294; CHECK-LABEL: @scalarize_induction_variable_03(
1295; CHECK-NEXT:  entry:
1296; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
1297; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 2
1298; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1299; CHECK:       vector.ph:
1300; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 2
1301; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]]
1302; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i32 0
1303; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
1304; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1305; CHECK:       vector.body:
1306; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1307; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1308; CHECK-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
1309; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[TMP0]], i32 1
1310; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP1]], i32 1
1311; CHECK-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 8
1312; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 8
1313; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i32 0
1314; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP5]], i32 1
1315; CHECK-NEXT:    [[TMP8:%.*]] = xor <2 x i32> [[TMP7]], [[BROADCAST_SPLAT]]
1316; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i32> [[TMP8]], i32 0
1317; CHECK-NEXT:    store i32 [[TMP9]], i32* [[TMP2]], align 8
1318; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i32> [[TMP8]], i32 1
1319; CHECK-NEXT:    store i32 [[TMP10]], i32* [[TMP3]], align 8
1320; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1321; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1322; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
1323; CHECK:       middle.block:
1324; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
1325; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1326; CHECK:       scalar.ph:
1327; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1328; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
1329; CHECK:       for.body:
1330; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1331; CHECK-NEXT:    [[F:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1
1332; CHECK-NEXT:    [[TMP12:%.*]] = load i32, i32* [[F]], align 8
1333; CHECK-NEXT:    [[TMP13:%.*]] = xor i32 [[TMP12]], [[Y]]
1334; CHECK-NEXT:    store i32 [[TMP13]], i32* [[F]], align 8
1335; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1336; CHECK-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
1337; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP16:![0-9]+]]
1338; CHECK:       for.end:
1339; CHECK-NEXT:    ret void
1340;
1341; IND-LABEL: @scalarize_induction_variable_03(
1342; IND-NEXT:  entry:
1343; IND-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
1344; IND-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 2
1345; IND-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1346; IND:       vector.ph:
1347; IND-NEXT:    [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806
1348; IND-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i64 0
1349; IND-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
1350; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
1351; IND:       vector.body:
1352; IND-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1353; IND-NEXT:    [[TMP0:%.*]] = or i64 [[INDEX]], 1
1354; IND-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[INDEX]], i32 1
1355; IND-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP0]], i32 1
1356; IND-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP1]], align 8
1357; IND-NEXT:    [[TMP4:%.*]] = load i32, i32* [[TMP2]], align 8
1358; IND-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[TMP3]], i64 0
1359; IND-NEXT:    [[TMP6:%.*]] = insertelement <2 x i32> [[TMP5]], i32 [[TMP4]], i64 1
1360; IND-NEXT:    [[TMP7:%.*]] = xor <2 x i32> [[TMP6]], [[BROADCAST_SPLAT]]
1361; IND-NEXT:    [[TMP8:%.*]] = extractelement <2 x i32> [[TMP7]], i64 0
1362; IND-NEXT:    store i32 [[TMP8]], i32* [[TMP1]], align 8
1363; IND-NEXT:    [[TMP9:%.*]] = extractelement <2 x i32> [[TMP7]], i64 1
1364; IND-NEXT:    store i32 [[TMP9]], i32* [[TMP2]], align 8
1365; IND-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1366; IND-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1367; IND-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
1368; IND:       middle.block:
1369; IND-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
1370; IND-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1371; IND:       scalar.ph:
1372; IND-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1373; IND-NEXT:    br label [[FOR_BODY:%.*]]
1374; IND:       for.body:
1375; IND-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1376; IND-NEXT:    [[F:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1
1377; IND-NEXT:    [[TMP11:%.*]] = load i32, i32* [[F]], align 8
1378; IND-NEXT:    [[TMP12:%.*]] = xor i32 [[TMP11]], [[Y]]
1379; IND-NEXT:    store i32 [[TMP12]], i32* [[F]], align 8
1380; IND-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1381; IND-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
1382; IND-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP16:![0-9]+]]
1383; IND:       for.end:
1384; IND-NEXT:    ret void
1385;
1386; UNROLL-LABEL: @scalarize_induction_variable_03(
1387; UNROLL-NEXT:  entry:
1388; UNROLL-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
1389; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4
1390; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1391; UNROLL:       vector.ph:
1392; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775804
1393; UNROLL-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i64 0
1394; UNROLL-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
1395; UNROLL-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[Y]], i64 0
1396; UNROLL-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer
1397; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
1398; UNROLL:       vector.body:
1399; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1400; UNROLL-NEXT:    [[TMP0:%.*]] = or i64 [[INDEX]], 1
1401; UNROLL-NEXT:    [[TMP1:%.*]] = or i64 [[INDEX]], 2
1402; UNROLL-NEXT:    [[TMP2:%.*]] = or i64 [[INDEX]], 3
1403; UNROLL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[INDEX]], i32 1
1404; UNROLL-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP0]], i32 1
1405; UNROLL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP1]], i32 1
1406; UNROLL-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP2]], i32 1
1407; UNROLL-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP3]], align 8
1408; UNROLL-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 8
1409; UNROLL-NEXT:    [[TMP9:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i64 0
1410; UNROLL-NEXT:    [[TMP10:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP8]], i64 1
1411; UNROLL-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP5]], align 8
1412; UNROLL-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP6]], align 8
1413; UNROLL-NEXT:    [[TMP13:%.*]] = insertelement <2 x i32> poison, i32 [[TMP11]], i64 0
1414; UNROLL-NEXT:    [[TMP14:%.*]] = insertelement <2 x i32> [[TMP13]], i32 [[TMP12]], i64 1
1415; UNROLL-NEXT:    [[TMP15:%.*]] = xor <2 x i32> [[TMP10]], [[BROADCAST_SPLAT]]
1416; UNROLL-NEXT:    [[TMP16:%.*]] = xor <2 x i32> [[TMP14]], [[BROADCAST_SPLAT2]]
1417; UNROLL-NEXT:    [[TMP17:%.*]] = extractelement <2 x i32> [[TMP15]], i64 0
1418; UNROLL-NEXT:    store i32 [[TMP17]], i32* [[TMP3]], align 8
1419; UNROLL-NEXT:    [[TMP18:%.*]] = extractelement <2 x i32> [[TMP15]], i64 1
1420; UNROLL-NEXT:    store i32 [[TMP18]], i32* [[TMP4]], align 8
1421; UNROLL-NEXT:    [[TMP19:%.*]] = extractelement <2 x i32> [[TMP16]], i64 0
1422; UNROLL-NEXT:    store i32 [[TMP19]], i32* [[TMP5]], align 8
1423; UNROLL-NEXT:    [[TMP20:%.*]] = extractelement <2 x i32> [[TMP16]], i64 1
1424; UNROLL-NEXT:    store i32 [[TMP20]], i32* [[TMP6]], align 8
1425; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1426; UNROLL-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1427; UNROLL-NEXT:    br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
1428; UNROLL:       middle.block:
1429; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
1430; UNROLL-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1431; UNROLL:       scalar.ph:
1432; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1433; UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
1434; UNROLL:       for.body:
1435; UNROLL-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1436; UNROLL-NEXT:    [[F:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1
1437; UNROLL-NEXT:    [[TMP22:%.*]] = load i32, i32* [[F]], align 8
1438; UNROLL-NEXT:    [[TMP23:%.*]] = xor i32 [[TMP22]], [[Y]]
1439; UNROLL-NEXT:    store i32 [[TMP23]], i32* [[F]], align 8
1440; UNROLL-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1441; UNROLL-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
1442; UNROLL-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP16:![0-9]+]]
1443; UNROLL:       for.end:
1444; UNROLL-NEXT:    ret void
1445;
1446; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_03(
1447; UNROLL-NO-IC-NEXT:  entry:
1448; UNROLL-NO-IC-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
1449; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4
1450; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1451; UNROLL-NO-IC:       vector.ph:
1452; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 4
1453; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]]
1454; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[Y:%.*]], i32 0
1455; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
1456; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <2 x i32> poison, i32 [[Y]], i32 0
1457; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT1]], <2 x i32> poison, <2 x i32> zeroinitializer
1458; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
1459; UNROLL-NO-IC:       vector.body:
1460; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1461; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
1462; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
1463; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
1464; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
1465; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[TMP0]], i32 1
1466; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP1]], i32 1
1467; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP2]], i32 1
1468; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP3]], i32 1
1469; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = load i32, i32* [[TMP4]], align 8
1470; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = load i32, i32* [[TMP5]], align 8
1471; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = insertelement <2 x i32> poison, i32 [[TMP8]], i32 0
1472; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = insertelement <2 x i32> [[TMP10]], i32 [[TMP9]], i32 1
1473; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = load i32, i32* [[TMP6]], align 8
1474; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = load i32, i32* [[TMP7]], align 8
1475; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = insertelement <2 x i32> poison, i32 [[TMP12]], i32 0
1476; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = insertelement <2 x i32> [[TMP14]], i32 [[TMP13]], i32 1
1477; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = xor <2 x i32> [[TMP11]], [[BROADCAST_SPLAT]]
1478; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = xor <2 x i32> [[TMP15]], [[BROADCAST_SPLAT2]]
1479; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = extractelement <2 x i32> [[TMP16]], i32 0
1480; UNROLL-NO-IC-NEXT:    store i32 [[TMP18]], i32* [[TMP4]], align 8
1481; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = extractelement <2 x i32> [[TMP16]], i32 1
1482; UNROLL-NO-IC-NEXT:    store i32 [[TMP19]], i32* [[TMP5]], align 8
1483; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = extractelement <2 x i32> [[TMP17]], i32 0
1484; UNROLL-NO-IC-NEXT:    store i32 [[TMP20]], i32* [[TMP6]], align 8
1485; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = extractelement <2 x i32> [[TMP17]], i32 1
1486; UNROLL-NO-IC-NEXT:    store i32 [[TMP21]], i32* [[TMP7]], align 8
1487; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1488; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1489; UNROLL-NO-IC-NEXT:    br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
1490; UNROLL-NO-IC:       middle.block:
1491; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
1492; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1493; UNROLL-NO-IC:       scalar.ph:
1494; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1495; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
1496; UNROLL-NO-IC:       for.body:
1497; UNROLL-NO-IC-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1498; UNROLL-NO-IC-NEXT:    [[F:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1
1499; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = load i32, i32* [[F]], align 8
1500; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = xor i32 [[TMP23]], [[Y]]
1501; UNROLL-NO-IC-NEXT:    store i32 [[TMP24]], i32* [[F]], align 8
1502; UNROLL-NO-IC-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1503; UNROLL-NO-IC-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
1504; UNROLL-NO-IC-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP16:![0-9]+]]
1505; UNROLL-NO-IC:       for.end:
1506; UNROLL-NO-IC-NEXT:    ret void
1507;
1508; INTERLEAVE-LABEL: @scalarize_induction_variable_03(
1509; INTERLEAVE-NEXT:  entry:
1510; INTERLEAVE-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
1511; INTERLEAVE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 9
1512; INTERLEAVE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1513; INTERLEAVE:       vector.ph:
1514; INTERLEAVE-NEXT:    [[N_MOD_VF:%.*]] = and i64 [[SMAX]], 7
1515; INTERLEAVE-NEXT:    [[TMP0:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
1516; INTERLEAVE-NEXT:    [[TMP1:%.*]] = select i1 [[TMP0]], i64 8, i64 [[N_MOD_VF]]
1517; INTERLEAVE-NEXT:    [[N_VEC:%.*]] = sub nsw i64 [[SMAX]], [[TMP1]]
1518; INTERLEAVE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[Y:%.*]], i64 0
1519; INTERLEAVE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
1520; INTERLEAVE-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[Y]], i64 0
1521; INTERLEAVE-NEXT:    [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer
1522; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
1523; INTERLEAVE:       vector.body:
1524; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1525; INTERLEAVE-NEXT:    [[TMP2:%.*]] = or i64 [[INDEX]], 1
1526; INTERLEAVE-NEXT:    [[TMP3:%.*]] = or i64 [[INDEX]], 2
1527; INTERLEAVE-NEXT:    [[TMP4:%.*]] = or i64 [[INDEX]], 3
1528; INTERLEAVE-NEXT:    [[TMP5:%.*]] = or i64 [[INDEX]], 4
1529; INTERLEAVE-NEXT:    [[TMP6:%.*]] = or i64 [[INDEX]], 5
1530; INTERLEAVE-NEXT:    [[TMP7:%.*]] = or i64 [[INDEX]], 6
1531; INTERLEAVE-NEXT:    [[TMP8:%.*]] = or i64 [[INDEX]], 7
1532; INTERLEAVE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 [[INDEX]], i32 1
1533; INTERLEAVE-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP2]], i32 1
1534; INTERLEAVE-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP3]], i32 1
1535; INTERLEAVE-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP4]], i32 1
1536; INTERLEAVE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP5]], i32 1
1537; INTERLEAVE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP6]], i32 1
1538; INTERLEAVE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP7]], i32 1
1539; INTERLEAVE-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP8]], i32 1
1540; INTERLEAVE-NEXT:    [[TMP17:%.*]] = bitcast i32* [[TMP9]] to <8 x i32>*
1541; INTERLEAVE-NEXT:    [[TMP18:%.*]] = bitcast i32* [[TMP13]] to <8 x i32>*
1542; INTERLEAVE-NEXT:    [[WIDE_VEC:%.*]] = load <8 x i32>, <8 x i32>* [[TMP17]], align 8
1543; INTERLEAVE-NEXT:    [[WIDE_VEC1:%.*]] = load <8 x i32>, <8 x i32>* [[TMP18]], align 8
1544; INTERLEAVE-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <8 x i32> [[WIDE_VEC]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1545; INTERLEAVE-NEXT:    [[STRIDED_VEC2:%.*]] = shufflevector <8 x i32> [[WIDE_VEC1]], <8 x i32> poison, <4 x i32> <i32 0, i32 2, i32 4, i32 6>
1546; INTERLEAVE-NEXT:    [[TMP19:%.*]] = xor <4 x i32> [[STRIDED_VEC]], [[BROADCAST_SPLAT]]
1547; INTERLEAVE-NEXT:    [[TMP20:%.*]] = xor <4 x i32> [[STRIDED_VEC2]], [[BROADCAST_SPLAT4]]
1548; INTERLEAVE-NEXT:    [[TMP21:%.*]] = extractelement <4 x i32> [[TMP19]], i64 0
1549; INTERLEAVE-NEXT:    store i32 [[TMP21]], i32* [[TMP9]], align 8
1550; INTERLEAVE-NEXT:    [[TMP22:%.*]] = extractelement <4 x i32> [[TMP19]], i64 1
1551; INTERLEAVE-NEXT:    store i32 [[TMP22]], i32* [[TMP10]], align 8
1552; INTERLEAVE-NEXT:    [[TMP23:%.*]] = extractelement <4 x i32> [[TMP19]], i64 2
1553; INTERLEAVE-NEXT:    store i32 [[TMP23]], i32* [[TMP11]], align 8
1554; INTERLEAVE-NEXT:    [[TMP24:%.*]] = extractelement <4 x i32> [[TMP19]], i64 3
1555; INTERLEAVE-NEXT:    store i32 [[TMP24]], i32* [[TMP12]], align 8
1556; INTERLEAVE-NEXT:    [[TMP25:%.*]] = extractelement <4 x i32> [[TMP20]], i64 0
1557; INTERLEAVE-NEXT:    store i32 [[TMP25]], i32* [[TMP13]], align 8
1558; INTERLEAVE-NEXT:    [[TMP26:%.*]] = extractelement <4 x i32> [[TMP20]], i64 1
1559; INTERLEAVE-NEXT:    store i32 [[TMP26]], i32* [[TMP14]], align 8
1560; INTERLEAVE-NEXT:    [[TMP27:%.*]] = extractelement <4 x i32> [[TMP20]], i64 2
1561; INTERLEAVE-NEXT:    store i32 [[TMP27]], i32* [[TMP15]], align 8
1562; INTERLEAVE-NEXT:    [[TMP28:%.*]] = extractelement <4 x i32> [[TMP20]], i64 3
1563; INTERLEAVE-NEXT:    store i32 [[TMP28]], i32* [[TMP16]], align 8
1564; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1565; INTERLEAVE-NEXT:    [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1566; INTERLEAVE-NEXT:    br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
1567; INTERLEAVE:       middle.block:
1568; INTERLEAVE-NEXT:    br label [[SCALAR_PH]]
1569; INTERLEAVE:       scalar.ph:
1570; INTERLEAVE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1571; INTERLEAVE-NEXT:    br label [[FOR_BODY:%.*]]
1572; INTERLEAVE:       for.body:
1573; INTERLEAVE-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1574; INTERLEAVE-NEXT:    [[F:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1
1575; INTERLEAVE-NEXT:    [[TMP30:%.*]] = load i32, i32* [[F]], align 8
1576; INTERLEAVE-NEXT:    [[TMP31:%.*]] = xor i32 [[TMP30]], [[Y]]
1577; INTERLEAVE-NEXT:    store i32 [[TMP31]], i32* [[F]], align 8
1578; INTERLEAVE-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1579; INTERLEAVE-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
1580; INTERLEAVE-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END:%.*]], !llvm.loop [[LOOP16:![0-9]+]]
1581; INTERLEAVE:       for.end:
1582; INTERLEAVE-NEXT:    ret void
1583;
1584entry:
1585  br label %for.body
1586
1587for.body:
1588  %i  = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
1589  %f = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1
1590  %0 = load i32, i32* %f, align 8
1591  %1 = xor i32 %0, %y
1592  store i32 %1, i32* %f, align 8
1593  %i.next = add nuw nsw i64 %i, 1
1594  %cond = icmp slt i64 %i.next, %n
1595  br i1 %cond, label %for.body, label %for.end
1596
1597for.end:
1598  ret void
1599}
1600
1601; Make sure we scalarize the step vectors used for the pointer arithmetic. We
1602; can't easily simplify vectorized step vectors. (Interleaved accesses.)
1603;
1604; for (int i = 0; i < n; ++i)
1605;   p[i].f = a[i * 4]
1606;
1607
1608define void @scalarize_induction_variable_04(i32* %a, %pair.i32* %p, i32 %n) {
1609; CHECK-LABEL: @scalarize_induction_variable_04(
1610; CHECK-NEXT:  entry:
1611; CHECK-NEXT:    [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8*
1612; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
1613; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
1614; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
1615; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2
1616; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
1617; CHECK:       vector.memcheck:
1618; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 0, i32 1
1619; CHECK-NEXT:    [[SCEVGEP1:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
1620; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[N]], -1
1621; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1622; CHECK-NEXT:    [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
1623; CHECK-NEXT:    [[SCEVGEP2:%.*]] = getelementptr [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP5]]
1624; CHECK-NEXT:    [[SCEVGEP23:%.*]] = bitcast %pair.i32* [[SCEVGEP2]] to i8*
1625; CHECK-NEXT:    [[TMP6:%.*]] = shl nuw nsw i64 [[TMP4]], 2
1626; CHECK-NEXT:    [[TMP7:%.*]] = add nuw nsw i64 [[TMP6]], 1
1627; CHECK-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP7]]
1628; CHECK-NEXT:    [[SCEVGEP56:%.*]] = bitcast i32* [[SCEVGEP5]] to i8*
1629; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP56]]
1630; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[A4]], [[SCEVGEP23]]
1631; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1632; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1633; CHECK:       vector.ph:
1634; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2
1635; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
1636; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1637; CHECK:       vector.body:
1638; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1639; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1640; CHECK-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 0
1641; CHECK-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 1
1642; CHECK-NEXT:    [[TMP10:%.*]] = shl nsw <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1643; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i32 0
1644; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]]
1645; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <2 x i64> [[TMP10]], i32 1
1646; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]]
1647; CHECK-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 1, !alias.scope !17
1648; CHECK-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP14]], align 1, !alias.scope !17
1649; CHECK-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP8]], i32 1
1650; CHECK-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP9]], i32 1
1651; CHECK-NEXT:    store i32 [[TMP15]], i32* [[TMP17]], align 1, !alias.scope !20, !noalias !17
1652; CHECK-NEXT:    store i32 [[TMP16]], i32* [[TMP18]], align 1, !alias.scope !20, !noalias !17
1653; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1654; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1655; CHECK-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1656; CHECK-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
1657; CHECK:       middle.block:
1658; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1659; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1660; CHECK:       scalar.ph:
1661; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
1662; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
1663; CHECK:       for.body:
1664; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1665; CHECK-NEXT:    [[TMP20:%.*]] = shl nsw i64 [[I]], 2
1666; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP20]]
1667; CHECK-NEXT:    [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 1
1668; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1
1669; CHECK-NEXT:    store i32 [[TMP22]], i32* [[TMP23]], align 1
1670; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1671; CHECK-NEXT:    [[TMP24:%.*]] = trunc i64 [[I_NEXT]] to i32
1672; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[TMP24]], [[N]]
1673; CHECK-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
1674; CHECK:       for.end:
1675; CHECK-NEXT:    ret void
1676;
1677; IND-LABEL: @scalarize_induction_variable_04(
1678; IND-NEXT:  entry:
1679; IND-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
1680; IND-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
1681; IND-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
1682; IND-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0
1683; IND-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
1684; IND:       vector.memcheck:
1685; IND-NEXT:    [[SCEVGEP:%.*]] = getelementptr [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 0, i32 1
1686; IND-NEXT:    [[TMP3:%.*]] = add i32 [[N]], -1
1687; IND-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1688; IND-NEXT:    [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
1689; IND-NEXT:    [[TMP6:%.*]] = shl nuw nsw i64 [[TMP4]], 2
1690; IND-NEXT:    [[TMP7:%.*]] = or i64 [[TMP6]], 1
1691; IND-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP7]]
1692; IND-NEXT:    [[BOUND0:%.*]] = icmp ult i32* [[SCEVGEP]], [[SCEVGEP5]]
1693; IND-NEXT:    [[TMP8:%.*]] = getelementptr [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP5]], i32 0
1694; IND-NEXT:    [[BOUND1:%.*]] = icmp ugt i32* [[TMP8]], [[A]]
1695; IND-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1696; IND-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1697; IND:       vector.ph:
1698; IND-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590
1699; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
1700; IND:       vector.body:
1701; IND-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1702; IND-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1703; IND-NEXT:    [[TMP9:%.*]] = or i64 [[INDEX]], 1
1704; IND-NEXT:    [[TMP10:%.*]] = shl nsw <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1705; IND-NEXT:    [[TMP11:%.*]] = extractelement <2 x i64> [[TMP10]], i64 0
1706; IND-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP11]]
1707; IND-NEXT:    [[TMP13:%.*]] = extractelement <2 x i64> [[TMP10]], i64 1
1708; IND-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]]
1709; IND-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 1, !alias.scope !17
1710; IND-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP14]], align 1, !alias.scope !17
1711; IND-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[INDEX]], i32 1
1712; IND-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP9]], i32 1
1713; IND-NEXT:    store i32 [[TMP15]], i32* [[TMP17]], align 1, !alias.scope !20, !noalias !17
1714; IND-NEXT:    store i32 [[TMP16]], i32* [[TMP18]], align 1, !alias.scope !20, !noalias !17
1715; IND-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1716; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1717; IND-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1718; IND-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
1719; IND:       middle.block:
1720; IND-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1721; IND-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1722; IND:       scalar.ph:
1723; IND-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
1724; IND-NEXT:    br label [[FOR_BODY:%.*]]
1725; IND:       for.body:
1726; IND-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1727; IND-NEXT:    [[TMP20:%.*]] = shl nsw i64 [[I]], 2
1728; IND-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP20]]
1729; IND-NEXT:    [[TMP22:%.*]] = load i32, i32* [[TMP21]], align 1
1730; IND-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1
1731; IND-NEXT:    store i32 [[TMP22]], i32* [[TMP23]], align 1
1732; IND-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1733; IND-NEXT:    [[TMP24:%.*]] = trunc i64 [[I_NEXT]] to i32
1734; IND-NEXT:    [[COND:%.*]] = icmp eq i32 [[TMP24]], [[N]]
1735; IND-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
1736; IND:       for.end:
1737; IND-NEXT:    ret void
1738;
1739; UNROLL-LABEL: @scalarize_induction_variable_04(
1740; UNROLL-NEXT:  entry:
1741; UNROLL-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
1742; UNROLL-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
1743; UNROLL-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
1744; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
1745; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
1746; UNROLL:       vector.memcheck:
1747; UNROLL-NEXT:    [[SCEVGEP:%.*]] = getelementptr [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 0, i32 1
1748; UNROLL-NEXT:    [[TMP3:%.*]] = add i32 [[N]], -1
1749; UNROLL-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1750; UNROLL-NEXT:    [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
1751; UNROLL-NEXT:    [[TMP6:%.*]] = shl nuw nsw i64 [[TMP4]], 2
1752; UNROLL-NEXT:    [[TMP7:%.*]] = or i64 [[TMP6]], 1
1753; UNROLL-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP7]]
1754; UNROLL-NEXT:    [[BOUND0:%.*]] = icmp ult i32* [[SCEVGEP]], [[SCEVGEP5]]
1755; UNROLL-NEXT:    [[TMP8:%.*]] = getelementptr [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP5]], i32 0
1756; UNROLL-NEXT:    [[BOUND1:%.*]] = icmp ugt i32* [[TMP8]], [[A]]
1757; UNROLL-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1758; UNROLL-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1759; UNROLL:       vector.ph:
1760; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
1761; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
1762; UNROLL:       vector.body:
1763; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1764; UNROLL-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1765; UNROLL-NEXT:    [[TMP9:%.*]] = or i64 [[INDEX]], 1
1766; UNROLL-NEXT:    [[TMP10:%.*]] = or i64 [[INDEX]], 2
1767; UNROLL-NEXT:    [[TMP11:%.*]] = or i64 [[INDEX]], 3
1768; UNROLL-NEXT:    [[TMP12:%.*]] = shl nsw <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1769; UNROLL-NEXT:    [[STEP_ADD:%.*]] = shl <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1770; UNROLL-NEXT:    [[TMP13:%.*]] = add <2 x i64> [[STEP_ADD]], <i64 8, i64 8>
1771; UNROLL-NEXT:    [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i64 0
1772; UNROLL-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP14]]
1773; UNROLL-NEXT:    [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i64 1
1774; UNROLL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP16]]
1775; UNROLL-NEXT:    [[TMP18:%.*]] = extractelement <2 x i64> [[TMP13]], i64 0
1776; UNROLL-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP18]]
1777; UNROLL-NEXT:    [[TMP20:%.*]] = extractelement <2 x i64> [[TMP13]], i64 1
1778; UNROLL-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP20]]
1779; UNROLL-NEXT:    [[TMP22:%.*]] = load i32, i32* [[TMP15]], align 1, !alias.scope !17
1780; UNROLL-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP17]], align 1, !alias.scope !17
1781; UNROLL-NEXT:    [[TMP24:%.*]] = load i32, i32* [[TMP19]], align 1, !alias.scope !17
1782; UNROLL-NEXT:    [[TMP25:%.*]] = load i32, i32* [[TMP21]], align 1, !alias.scope !17
1783; UNROLL-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[INDEX]], i32 1
1784; UNROLL-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP9]], i32 1
1785; UNROLL-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP10]], i32 1
1786; UNROLL-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP11]], i32 1
1787; UNROLL-NEXT:    store i32 [[TMP22]], i32* [[TMP26]], align 1, !alias.scope !20, !noalias !17
1788; UNROLL-NEXT:    store i32 [[TMP23]], i32* [[TMP27]], align 1, !alias.scope !20, !noalias !17
1789; UNROLL-NEXT:    store i32 [[TMP24]], i32* [[TMP28]], align 1, !alias.scope !20, !noalias !17
1790; UNROLL-NEXT:    store i32 [[TMP25]], i32* [[TMP29]], align 1, !alias.scope !20, !noalias !17
1791; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1792; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
1793; UNROLL-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1794; UNROLL-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
1795; UNROLL:       middle.block:
1796; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1797; UNROLL-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1798; UNROLL:       scalar.ph:
1799; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
1800; UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
1801; UNROLL:       for.body:
1802; UNROLL-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1803; UNROLL-NEXT:    [[TMP31:%.*]] = shl nsw i64 [[I]], 2
1804; UNROLL-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]]
1805; UNROLL-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 1
1806; UNROLL-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1
1807; UNROLL-NEXT:    store i32 [[TMP33]], i32* [[TMP34]], align 1
1808; UNROLL-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1809; UNROLL-NEXT:    [[TMP35:%.*]] = trunc i64 [[I_NEXT]] to i32
1810; UNROLL-NEXT:    [[COND:%.*]] = icmp eq i32 [[TMP35]], [[N]]
1811; UNROLL-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
1812; UNROLL:       for.end:
1813; UNROLL-NEXT:    ret void
1814;
1815; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_04(
1816; UNROLL-NO-IC-NEXT:  entry:
1817; UNROLL-NO-IC-NEXT:    [[A4:%.*]] = bitcast i32* [[A:%.*]] to i8*
1818; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
1819; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
1820; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
1821; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
1822; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
1823; UNROLL-NO-IC:       vector.memcheck:
1824; UNROLL-NO-IC-NEXT:    [[SCEVGEP:%.*]] = getelementptr [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 0, i32 1
1825; UNROLL-NO-IC-NEXT:    [[SCEVGEP1:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
1826; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add i32 [[N]], -1
1827; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1828; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
1829; UNROLL-NO-IC-NEXT:    [[SCEVGEP2:%.*]] = getelementptr [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP5]]
1830; UNROLL-NO-IC-NEXT:    [[SCEVGEP23:%.*]] = bitcast %pair.i32* [[SCEVGEP2]] to i8*
1831; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = shl nuw nsw i64 [[TMP4]], 2
1832; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = add nuw nsw i64 [[TMP6]], 1
1833; UNROLL-NO-IC-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP7]]
1834; UNROLL-NO-IC-NEXT:    [[SCEVGEP56:%.*]] = bitcast i32* [[SCEVGEP5]] to i8*
1835; UNROLL-NO-IC-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP56]]
1836; UNROLL-NO-IC-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[A4]], [[SCEVGEP23]]
1837; UNROLL-NO-IC-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1838; UNROLL-NO-IC-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1839; UNROLL-NO-IC:       vector.ph:
1840; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
1841; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
1842; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
1843; UNROLL-NO-IC:       vector.body:
1844; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1845; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1846; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1847; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 0
1848; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = add i64 [[INDEX]], 1
1849; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 2
1850; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = add i64 [[INDEX]], 3
1851; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = shl nsw <2 x i64> [[VEC_IND]], <i64 2, i64 2>
1852; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = shl nsw <2 x i64> [[STEP_ADD]], <i64 2, i64 2>
1853; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = extractelement <2 x i64> [[TMP12]], i32 0
1854; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP14]]
1855; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = extractelement <2 x i64> [[TMP12]], i32 1
1856; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP16]]
1857; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = extractelement <2 x i64> [[TMP13]], i32 0
1858; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP18]]
1859; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = extractelement <2 x i64> [[TMP13]], i32 1
1860; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP20]]
1861; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = load i32, i32* [[TMP15]], align 1, !alias.scope !17
1862; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = load i32, i32* [[TMP17]], align 1, !alias.scope !17
1863; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = load i32, i32* [[TMP19]], align 1, !alias.scope !17
1864; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = load i32, i32* [[TMP21]], align 1, !alias.scope !17
1865; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP8]], i32 1
1866; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP9]], i32 1
1867; UNROLL-NO-IC-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP10]], i32 1
1868; UNROLL-NO-IC-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP11]], i32 1
1869; UNROLL-NO-IC-NEXT:    store i32 [[TMP22]], i32* [[TMP26]], align 1, !alias.scope !20, !noalias !17
1870; UNROLL-NO-IC-NEXT:    store i32 [[TMP23]], i32* [[TMP27]], align 1, !alias.scope !20, !noalias !17
1871; UNROLL-NO-IC-NEXT:    store i32 [[TMP24]], i32* [[TMP28]], align 1, !alias.scope !20, !noalias !17
1872; UNROLL-NO-IC-NEXT:    store i32 [[TMP25]], i32* [[TMP29]], align 1, !alias.scope !20, !noalias !17
1873; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1874; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 2, i64 2>
1875; UNROLL-NO-IC-NEXT:    [[TMP30:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1876; UNROLL-NO-IC-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
1877; UNROLL-NO-IC:       middle.block:
1878; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1879; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1880; UNROLL-NO-IC:       scalar.ph:
1881; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
1882; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
1883; UNROLL-NO-IC:       for.body:
1884; UNROLL-NO-IC-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1885; UNROLL-NO-IC-NEXT:    [[TMP31:%.*]] = shl nsw i64 [[I]], 2
1886; UNROLL-NO-IC-NEXT:    [[TMP32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP31]]
1887; UNROLL-NO-IC-NEXT:    [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 1
1888; UNROLL-NO-IC-NEXT:    [[TMP34:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1
1889; UNROLL-NO-IC-NEXT:    store i32 [[TMP33]], i32* [[TMP34]], align 1
1890; UNROLL-NO-IC-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1891; UNROLL-NO-IC-NEXT:    [[TMP35:%.*]] = trunc i64 [[I_NEXT]] to i32
1892; UNROLL-NO-IC-NEXT:    [[COND:%.*]] = icmp eq i32 [[TMP35]], [[N]]
1893; UNROLL-NO-IC-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
1894; UNROLL-NO-IC:       for.end:
1895; UNROLL-NO-IC-NEXT:    ret void
1896;
1897; INTERLEAVE-LABEL: @scalarize_induction_variable_04(
1898; INTERLEAVE-NEXT:  entry:
1899; INTERLEAVE-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
1900; INTERLEAVE-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
1901; INTERLEAVE-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
1902; INTERLEAVE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 8
1903; INTERLEAVE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
1904; INTERLEAVE:       vector.memcheck:
1905; INTERLEAVE-NEXT:    [[SCEVGEP:%.*]] = getelementptr [[PAIR_I32:%.*]], %pair.i32* [[P:%.*]], i64 0, i32 1
1906; INTERLEAVE-NEXT:    [[TMP3:%.*]] = add i32 [[N]], -1
1907; INTERLEAVE-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
1908; INTERLEAVE-NEXT:    [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
1909; INTERLEAVE-NEXT:    [[TMP6:%.*]] = shl nuw nsw i64 [[TMP4]], 2
1910; INTERLEAVE-NEXT:    [[TMP7:%.*]] = or i64 [[TMP6]], 1
1911; INTERLEAVE-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A:%.*]], i64 [[TMP7]]
1912; INTERLEAVE-NEXT:    [[BOUND0:%.*]] = icmp ult i32* [[SCEVGEP]], [[SCEVGEP5]]
1913; INTERLEAVE-NEXT:    [[TMP8:%.*]] = getelementptr [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP5]], i32 0
1914; INTERLEAVE-NEXT:    [[BOUND1:%.*]] = icmp ugt i32* [[TMP8]], [[A]]
1915; INTERLEAVE-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1916; INTERLEAVE-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1917; INTERLEAVE:       vector.ph:
1918; INTERLEAVE-NEXT:    [[N_MOD_VF:%.*]] = and i64 [[TMP2]], 7
1919; INTERLEAVE-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[N_MOD_VF]], 0
1920; INTERLEAVE-NEXT:    [[TMP10:%.*]] = select i1 [[TMP9]], i64 8, i64 [[N_MOD_VF]]
1921; INTERLEAVE-NEXT:    [[N_VEC:%.*]] = sub nsw i64 [[TMP2]], [[TMP10]]
1922; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
1923; INTERLEAVE:       vector.body:
1924; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1925; INTERLEAVE-NEXT:    [[TMP11:%.*]] = or i64 [[INDEX]], 1
1926; INTERLEAVE-NEXT:    [[TMP12:%.*]] = or i64 [[INDEX]], 2
1927; INTERLEAVE-NEXT:    [[TMP13:%.*]] = or i64 [[INDEX]], 3
1928; INTERLEAVE-NEXT:    [[TMP14:%.*]] = or i64 [[INDEX]], 4
1929; INTERLEAVE-NEXT:    [[TMP15:%.*]] = or i64 [[INDEX]], 5
1930; INTERLEAVE-NEXT:    [[TMP16:%.*]] = or i64 [[INDEX]], 6
1931; INTERLEAVE-NEXT:    [[TMP17:%.*]] = or i64 [[INDEX]], 7
1932; INTERLEAVE-NEXT:    [[TMP18:%.*]] = shl nsw i64 [[INDEX]], 2
1933; INTERLEAVE-NEXT:    [[TMP19:%.*]] = shl nsw i64 [[TMP14]], 2
1934; INTERLEAVE-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP18]]
1935; INTERLEAVE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]]
1936; INTERLEAVE-NEXT:    [[TMP22:%.*]] = bitcast i32* [[TMP20]] to <16 x i32>*
1937; INTERLEAVE-NEXT:    [[TMP23:%.*]] = bitcast i32* [[TMP21]] to <16 x i32>*
1938; INTERLEAVE-NEXT:    [[WIDE_VEC:%.*]] = load <16 x i32>, <16 x i32>* [[TMP22]], align 1
1939; INTERLEAVE-NEXT:    [[WIDE_VEC7:%.*]] = load <16 x i32>, <16 x i32>* [[TMP23]], align 1
1940; INTERLEAVE-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[INDEX]], i32 1
1941; INTERLEAVE-NEXT:    [[TMP25:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP11]], i32 1
1942; INTERLEAVE-NEXT:    [[TMP26:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP12]], i32 1
1943; INTERLEAVE-NEXT:    [[TMP27:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP13]], i32 1
1944; INTERLEAVE-NEXT:    [[TMP28:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP14]], i32 1
1945; INTERLEAVE-NEXT:    [[TMP29:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP15]], i32 1
1946; INTERLEAVE-NEXT:    [[TMP30:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP16]], i32 1
1947; INTERLEAVE-NEXT:    [[TMP31:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[TMP17]], i32 1
1948; INTERLEAVE-NEXT:    [[TMP32:%.*]] = extractelement <16 x i32> [[WIDE_VEC]], i64 0
1949; INTERLEAVE-NEXT:    store i32 [[TMP32]], i32* [[TMP24]], align 1, !alias.scope !17, !noalias !20
1950; INTERLEAVE-NEXT:    [[TMP33:%.*]] = extractelement <16 x i32> [[WIDE_VEC]], i64 4
1951; INTERLEAVE-NEXT:    store i32 [[TMP33]], i32* [[TMP25]], align 1, !alias.scope !17, !noalias !20
1952; INTERLEAVE-NEXT:    [[TMP34:%.*]] = extractelement <16 x i32> [[WIDE_VEC]], i64 8
1953; INTERLEAVE-NEXT:    store i32 [[TMP34]], i32* [[TMP26]], align 1, !alias.scope !17, !noalias !20
1954; INTERLEAVE-NEXT:    [[TMP35:%.*]] = extractelement <16 x i32> [[WIDE_VEC]], i64 12
1955; INTERLEAVE-NEXT:    store i32 [[TMP35]], i32* [[TMP27]], align 1, !alias.scope !17, !noalias !20
1956; INTERLEAVE-NEXT:    [[TMP36:%.*]] = extractelement <16 x i32> [[WIDE_VEC7]], i64 0
1957; INTERLEAVE-NEXT:    store i32 [[TMP36]], i32* [[TMP28]], align 1, !alias.scope !17, !noalias !20
1958; INTERLEAVE-NEXT:    [[TMP37:%.*]] = extractelement <16 x i32> [[WIDE_VEC7]], i64 4
1959; INTERLEAVE-NEXT:    store i32 [[TMP37]], i32* [[TMP29]], align 1, !alias.scope !17, !noalias !20
1960; INTERLEAVE-NEXT:    [[TMP38:%.*]] = extractelement <16 x i32> [[WIDE_VEC7]], i64 8
1961; INTERLEAVE-NEXT:    store i32 [[TMP38]], i32* [[TMP30]], align 1, !alias.scope !17, !noalias !20
1962; INTERLEAVE-NEXT:    [[TMP39:%.*]] = extractelement <16 x i32> [[WIDE_VEC7]], i64 12
1963; INTERLEAVE-NEXT:    store i32 [[TMP39]], i32* [[TMP31]], align 1, !alias.scope !17, !noalias !20
1964; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1965; INTERLEAVE-NEXT:    [[TMP40:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1966; INTERLEAVE-NEXT:    br i1 [[TMP40]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
1967; INTERLEAVE:       middle.block:
1968; INTERLEAVE-NEXT:    br label [[SCALAR_PH]]
1969; INTERLEAVE:       scalar.ph:
1970; INTERLEAVE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ], [ 0, [[VECTOR_MEMCHECK]] ]
1971; INTERLEAVE-NEXT:    br label [[FOR_BODY:%.*]]
1972; INTERLEAVE:       for.body:
1973; INTERLEAVE-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1974; INTERLEAVE-NEXT:    [[TMP41:%.*]] = shl nsw i64 [[I]], 2
1975; INTERLEAVE-NEXT:    [[TMP42:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP41]]
1976; INTERLEAVE-NEXT:    [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 1
1977; INTERLEAVE-NEXT:    [[TMP44:%.*]] = getelementptr inbounds [[PAIR_I32]], %pair.i32* [[P]], i64 [[I]], i32 1
1978; INTERLEAVE-NEXT:    store i32 [[TMP43]], i32* [[TMP44]], align 1
1979; INTERLEAVE-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1980; INTERLEAVE-NEXT:    [[TMP45:%.*]] = trunc i64 [[I_NEXT]] to i32
1981; INTERLEAVE-NEXT:    [[COND:%.*]] = icmp eq i32 [[TMP45]], [[N]]
1982; INTERLEAVE-NEXT:    br i1 [[COND]], label [[FOR_END:%.*]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
1983; INTERLEAVE:       for.end:
1984; INTERLEAVE-NEXT:    ret void
1985;
1986entry:
1987  br label %for.body
1988
1989for.body:
1990  %i = phi i64 [ %i.next, %for.body ], [ 0, %entry]
1991  %0 = shl nsw i64 %i, 2
1992  %1 = getelementptr inbounds i32, i32* %a, i64 %0
1993  %2 = load i32, i32* %1, align 1
1994  %3 = getelementptr inbounds %pair.i32, %pair.i32* %p, i64 %i, i32 1
1995  store i32 %2, i32* %3, align 1
1996  %i.next = add nuw nsw i64 %i, 1
1997  %4 = trunc i64 %i.next to i32
1998  %cond = icmp eq i32 %4, %n
1999  br i1 %cond, label %for.end, label %for.body
2000
2001for.end:
2002  ret void
2003}
2004
2005; PR30542. Ensure we generate all the scalar steps for the induction variable.
2006; The scalar induction variable is used by a getelementptr instruction
2007; (uniform), and a udiv (non-uniform).
2008;
2009; int sum = 0;
2010; for (int i = 0; i < n; ++i) {
2011;   int x = a[i];
2012;   if (c)
2013;     x /= i;
2014;   sum += x;
2015; }
2016;
2017;
2018;
2019;
2020
2021define i32 @scalarize_induction_variable_05(i32* %a, i32 %x, i1 %c, i32 %n) {
2022; CHECK-LABEL: @scalarize_induction_variable_05(
2023; CHECK-NEXT:  entry:
2024; CHECK-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 1)
2025; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SMAX]], 2
2026; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2027; CHECK:       vector.ph:
2028; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[SMAX]], 2
2029; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[SMAX]], [[N_MOD_VF]]
2030; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i32 0
2031; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
2032; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
2033; CHECK:       vector.body:
2034; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ]
2035; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UDIV_CONTINUE2]] ]
2036; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[PRED_UDIV_CONTINUE2]] ]
2037; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
2038; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]]
2039; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
2040; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>*
2041; CHECK-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP3]], align 4
2042; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 0
2043; CHECK-NEXT:    br i1 [[TMP4]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
2044; CHECK:       pred.udiv.if:
2045; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0
2046; CHECK-NEXT:    [[TMP6:%.*]] = udiv i32 [[TMP5]], [[TMP0]]
2047; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP6]], i32 0
2048; CHECK-NEXT:    br label [[PRED_UDIV_CONTINUE]]
2049; CHECK:       pred.udiv.continue:
2050; CHECK-NEXT:    [[TMP8:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_UDIV_IF]] ]
2051; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 1
2052; CHECK-NEXT:    br i1 [[TMP9]], label [[PRED_UDIV_IF1:%.*]], label [[PRED_UDIV_CONTINUE2]]
2053; CHECK:       pred.udiv.if1:
2054; CHECK-NEXT:    [[TMP10:%.*]] = add i32 [[INDEX]], 1
2055; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1
2056; CHECK-NEXT:    [[TMP12:%.*]] = udiv i32 [[TMP11]], [[TMP10]]
2057; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <2 x i32> [[TMP8]], i32 [[TMP12]], i32 1
2058; CHECK-NEXT:    br label [[PRED_UDIV_CONTINUE2]]
2059; CHECK:       pred.udiv.continue2:
2060; CHECK-NEXT:    [[TMP14:%.*]] = phi <2 x i32> [ [[TMP8]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP13]], [[PRED_UDIV_IF1]] ]
2061; CHECK-NEXT:    [[TMP15:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], <i1 true, i1 true>
2062; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP15]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP14]]
2063; CHECK-NEXT:    [[TMP16]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]]
2064; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
2065; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
2066; CHECK-NEXT:    [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
2067; CHECK-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
2068; CHECK:       middle.block:
2069; CHECK-NEXT:    [[TMP18:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP16]])
2070; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[SMAX]], [[N_VEC]]
2071; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2072; CHECK:       scalar.ph:
2073; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2074; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ]
2075; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
2076; CHECK:       for.body:
2077; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[IF_END:%.*]] ]
2078; CHECK-NEXT:    [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR4:%.*]], [[IF_END]] ]
2079; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[I]]
2080; CHECK-NEXT:    [[VAR1:%.*]] = load i32, i32* [[VAR0]], align 4
2081; CHECK-NEXT:    br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END]]
2082; CHECK:       if.then:
2083; CHECK-NEXT:    [[VAR2:%.*]] = udiv i32 [[VAR1]], [[I]]
2084; CHECK-NEXT:    br label [[IF_END]]
2085; CHECK:       if.end:
2086; CHECK-NEXT:    [[VAR3:%.*]] = phi i32 [ [[VAR2]], [[IF_THEN]] ], [ [[VAR1]], [[FOR_BODY]] ]
2087; CHECK-NEXT:    [[VAR4]] = add i32 [[VAR3]], [[SUM]]
2088; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
2089; CHECK-NEXT:    [[COND:%.*]] = icmp slt i32 [[I_NEXT]], [[N]]
2090; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP25:![0-9]+]]
2091; CHECK:       for.end:
2092; CHECK-NEXT:    [[VAR5:%.*]] = phi i32 [ [[VAR4]], [[IF_END]] ], [ [[TMP18]], [[MIDDLE_BLOCK]] ]
2093; CHECK-NEXT:    ret i32 [[VAR5]]
2094;
2095; IND-LABEL: @scalarize_induction_variable_05(
2096; IND-NEXT:  entry:
2097; IND-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 1)
2098; IND-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SMAX]], 2
2099; IND-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2100; IND:       vector.ph:
2101; IND-NEXT:    [[N_VEC:%.*]] = and i32 [[SMAX]], 2147483646
2102; IND-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0
2103; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
2104; IND:       vector.body:
2105; IND-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE2:%.*]] ]
2106; IND-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[PRED_UDIV_CONTINUE2]] ]
2107; IND-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
2108; IND-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]]
2109; IND-NEXT:    [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <2 x i32>*
2110; IND-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP2]], align 4
2111; IND-NEXT:    br i1 [[C]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
2112; IND:       pred.udiv.if:
2113; IND-NEXT:    [[TMP3:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i64 0
2114; IND-NEXT:    [[TMP4:%.*]] = udiv i32 [[TMP3]], [[INDEX]]
2115; IND-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> poison, i32 [[TMP4]], i64 0
2116; IND-NEXT:    br label [[PRED_UDIV_CONTINUE]]
2117; IND:       pred.udiv.continue:
2118; IND-NEXT:    [[TMP6:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_UDIV_IF]] ]
2119; IND-NEXT:    br i1 [[C]], label [[PRED_UDIV_IF1:%.*]], label [[PRED_UDIV_CONTINUE2]]
2120; IND:       pred.udiv.if1:
2121; IND-NEXT:    [[TMP7:%.*]] = or i32 [[INDEX]], 1
2122; IND-NEXT:    [[TMP8:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i64 1
2123; IND-NEXT:    [[TMP9:%.*]] = udiv i32 [[TMP8]], [[TMP7]]
2124; IND-NEXT:    [[TMP10:%.*]] = insertelement <2 x i32> [[TMP6]], i32 [[TMP9]], i64 1
2125; IND-NEXT:    br label [[PRED_UDIV_CONTINUE2]]
2126; IND:       pred.udiv.continue2:
2127; IND-NEXT:    [[TMP11:%.*]] = phi <2 x i32> [ [[TMP6]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP10]], [[PRED_UDIV_IF1]] ]
2128; IND-NEXT:    [[TMP12:%.*]] = xor <2 x i1> [[BROADCAST_SPLATINSERT]], <i1 true, i1 poison>
2129; IND-NEXT:    [[TMP13:%.*]] = shufflevector <2 x i1> [[TMP12]], <2 x i1> poison, <2 x i32> zeroinitializer
2130; IND-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP13]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP11]]
2131; IND-NEXT:    [[TMP14]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]]
2132; IND-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
2133; IND-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
2134; IND-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
2135; IND:       middle.block:
2136; IND-NEXT:    [[TMP16:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[TMP14]])
2137; IND-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[SMAX]], [[N_VEC]]
2138; IND-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2139; IND:       scalar.ph:
2140; IND-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2141; IND-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP16]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2142; IND-NEXT:    br label [[FOR_BODY:%.*]]
2143; IND:       for.body:
2144; IND-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[IF_END:%.*]] ]
2145; IND-NEXT:    [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR4:%.*]], [[IF_END]] ]
2146; IND-NEXT:    [[TMP17:%.*]] = zext i32 [[I]] to i64
2147; IND-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP17]]
2148; IND-NEXT:    [[VAR1:%.*]] = load i32, i32* [[VAR0]], align 4
2149; IND-NEXT:    br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END]]
2150; IND:       if.then:
2151; IND-NEXT:    [[VAR2:%.*]] = udiv i32 [[VAR1]], [[I]]
2152; IND-NEXT:    br label [[IF_END]]
2153; IND:       if.end:
2154; IND-NEXT:    [[VAR3:%.*]] = phi i32 [ [[VAR2]], [[IF_THEN]] ], [ [[VAR1]], [[FOR_BODY]] ]
2155; IND-NEXT:    [[VAR4]] = add i32 [[VAR3]], [[SUM]]
2156; IND-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
2157; IND-NEXT:    [[COND:%.*]] = icmp slt i32 [[I_NEXT]], [[N]]
2158; IND-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP25:![0-9]+]]
2159; IND:       for.end:
2160; IND-NEXT:    [[VAR5:%.*]] = phi i32 [ [[VAR4]], [[IF_END]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ]
2161; IND-NEXT:    ret i32 [[VAR5]]
2162;
2163; UNROLL-LABEL: @scalarize_induction_variable_05(
2164; UNROLL-NEXT:  entry:
2165; UNROLL-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 1)
2166; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SMAX]], 4
2167; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2168; UNROLL:       vector.ph:
2169; UNROLL-NEXT:    [[N_VEC:%.*]] = and i32 [[SMAX]], 2147483644
2170; UNROLL-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i64 0
2171; UNROLL-NEXT:    [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i64 0
2172; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
2173; UNROLL:       vector.body:
2174; UNROLL-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE11:%.*]] ]
2175; UNROLL-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[PRED_UDIV_CONTINUE11]] ]
2176; UNROLL-NEXT:    [[VEC_PHI2:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_UDIV_CONTINUE11]] ]
2177; UNROLL-NEXT:    [[TMP0:%.*]] = or i32 [[INDEX]], 2
2178; UNROLL-NEXT:    [[TMP1:%.*]] = sext i32 [[INDEX]] to i64
2179; UNROLL-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP1]]
2180; UNROLL-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>*
2181; UNROLL-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP3]], align 4
2182; UNROLL-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 2
2183; UNROLL-NEXT:    [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <2 x i32>*
2184; UNROLL-NEXT:    [[WIDE_LOAD3:%.*]] = load <2 x i32>, <2 x i32>* [[TMP5]], align 4
2185; UNROLL-NEXT:    br i1 [[C]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
2186; UNROLL:       pred.udiv.if:
2187; UNROLL-NEXT:    [[TMP6:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i64 0
2188; UNROLL-NEXT:    [[TMP7:%.*]] = udiv i32 [[TMP6]], [[INDEX]]
2189; UNROLL-NEXT:    [[TMP8:%.*]] = insertelement <2 x i32> poison, i32 [[TMP7]], i64 0
2190; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE]]
2191; UNROLL:       pred.udiv.continue:
2192; UNROLL-NEXT:    [[TMP9:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_UDIV_IF]] ]
2193; UNROLL-NEXT:    br i1 [[C]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]]
2194; UNROLL:       pred.udiv.if4:
2195; UNROLL-NEXT:    [[TMP10:%.*]] = or i32 [[INDEX]], 1
2196; UNROLL-NEXT:    [[TMP11:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i64 1
2197; UNROLL-NEXT:    [[TMP12:%.*]] = udiv i32 [[TMP11]], [[TMP10]]
2198; UNROLL-NEXT:    [[TMP13:%.*]] = insertelement <2 x i32> [[TMP9]], i32 [[TMP12]], i64 1
2199; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE5]]
2200; UNROLL:       pred.udiv.continue5:
2201; UNROLL-NEXT:    [[TMP14:%.*]] = phi <2 x i32> [ [[TMP9]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP13]], [[PRED_UDIV_IF4]] ]
2202; UNROLL-NEXT:    br i1 [[C]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]]
2203; UNROLL:       pred.udiv.if8:
2204; UNROLL-NEXT:    [[TMP15:%.*]] = extractelement <2 x i32> [[WIDE_LOAD3]], i64 0
2205; UNROLL-NEXT:    [[TMP16:%.*]] = udiv i32 [[TMP15]], [[TMP0]]
2206; UNROLL-NEXT:    [[TMP17:%.*]] = insertelement <2 x i32> poison, i32 [[TMP16]], i64 0
2207; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE9]]
2208; UNROLL:       pred.udiv.continue9:
2209; UNROLL-NEXT:    [[TMP18:%.*]] = phi <2 x i32> [ poison, [[PRED_UDIV_CONTINUE5]] ], [ [[TMP17]], [[PRED_UDIV_IF8]] ]
2210; UNROLL-NEXT:    br i1 [[C]], label [[PRED_UDIV_IF10:%.*]], label [[PRED_UDIV_CONTINUE11]]
2211; UNROLL:       pred.udiv.if10:
2212; UNROLL-NEXT:    [[TMP19:%.*]] = or i32 [[INDEX]], 3
2213; UNROLL-NEXT:    [[TMP20:%.*]] = extractelement <2 x i32> [[WIDE_LOAD3]], i64 1
2214; UNROLL-NEXT:    [[TMP21:%.*]] = udiv i32 [[TMP20]], [[TMP19]]
2215; UNROLL-NEXT:    [[TMP22:%.*]] = insertelement <2 x i32> [[TMP18]], i32 [[TMP21]], i64 1
2216; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE11]]
2217; UNROLL:       pred.udiv.continue11:
2218; UNROLL-NEXT:    [[TMP23:%.*]] = phi <2 x i32> [ [[TMP18]], [[PRED_UDIV_CONTINUE9]] ], [ [[TMP22]], [[PRED_UDIV_IF10]] ]
2219; UNROLL-NEXT:    [[TMP24:%.*]] = xor <2 x i1> [[BROADCAST_SPLATINSERT]], <i1 true, i1 poison>
2220; UNROLL-NEXT:    [[TMP25:%.*]] = shufflevector <2 x i1> [[TMP24]], <2 x i1> poison, <2 x i32> zeroinitializer
2221; UNROLL-NEXT:    [[TMP26:%.*]] = xor <2 x i1> [[BROADCAST_SPLATINSERT6]], <i1 true, i1 poison>
2222; UNROLL-NEXT:    [[TMP27:%.*]] = shufflevector <2 x i1> [[TMP26]], <2 x i1> poison, <2 x i32> zeroinitializer
2223; UNROLL-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP25]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP14]]
2224; UNROLL-NEXT:    [[PREDPHI12:%.*]] = select <2 x i1> [[TMP27]], <2 x i32> [[WIDE_LOAD3]], <2 x i32> [[TMP23]]
2225; UNROLL-NEXT:    [[TMP28]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]]
2226; UNROLL-NEXT:    [[TMP29]] = add <2 x i32> [[PREDPHI12]], [[VEC_PHI2]]
2227; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
2228; UNROLL-NEXT:    [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
2229; UNROLL-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
2230; UNROLL:       middle.block:
2231; UNROLL-NEXT:    [[BIN_RDX:%.*]] = add <2 x i32> [[TMP29]], [[TMP28]]
2232; UNROLL-NEXT:    [[TMP31:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[BIN_RDX]])
2233; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[SMAX]], [[N_VEC]]
2234; UNROLL-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2235; UNROLL:       scalar.ph:
2236; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2237; UNROLL-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP31]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2238; UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
2239; UNROLL:       for.body:
2240; UNROLL-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[IF_END:%.*]] ]
2241; UNROLL-NEXT:    [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR4:%.*]], [[IF_END]] ]
2242; UNROLL-NEXT:    [[TMP32:%.*]] = zext i32 [[I]] to i64
2243; UNROLL-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP32]]
2244; UNROLL-NEXT:    [[VAR1:%.*]] = load i32, i32* [[VAR0]], align 4
2245; UNROLL-NEXT:    br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END]]
2246; UNROLL:       if.then:
2247; UNROLL-NEXT:    [[VAR2:%.*]] = udiv i32 [[VAR1]], [[I]]
2248; UNROLL-NEXT:    br label [[IF_END]]
2249; UNROLL:       if.end:
2250; UNROLL-NEXT:    [[VAR3:%.*]] = phi i32 [ [[VAR2]], [[IF_THEN]] ], [ [[VAR1]], [[FOR_BODY]] ]
2251; UNROLL-NEXT:    [[VAR4]] = add i32 [[VAR3]], [[SUM]]
2252; UNROLL-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
2253; UNROLL-NEXT:    [[COND:%.*]] = icmp slt i32 [[I_NEXT]], [[N]]
2254; UNROLL-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP25:![0-9]+]]
2255; UNROLL:       for.end:
2256; UNROLL-NEXT:    [[VAR5:%.*]] = phi i32 [ [[VAR4]], [[IF_END]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ]
2257; UNROLL-NEXT:    ret i32 [[VAR5]]
2258;
2259; UNROLL-NO-IC-LABEL: @scalarize_induction_variable_05(
2260; UNROLL-NO-IC-NEXT:  entry:
2261; UNROLL-NO-IC-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 1)
2262; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SMAX]], 4
2263; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2264; UNROLL-NO-IC:       vector.ph:
2265; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[SMAX]], 4
2266; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i32 [[SMAX]], [[N_MOD_VF]]
2267; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i1> poison, i1 [[C:%.*]], i32 0
2268; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT]], <2 x i1> poison, <2 x i32> zeroinitializer
2269; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <2 x i1> poison, i1 [[C]], i32 0
2270; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT7:%.*]] = shufflevector <2 x i1> [[BROADCAST_SPLATINSERT6]], <2 x i1> poison, <2 x i32> zeroinitializer
2271; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
2272; UNROLL-NO-IC:       vector.body:
2273; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE11:%.*]] ]
2274; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UDIV_CONTINUE11]] ]
2275; UNROLL-NO-IC-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP32:%.*]], [[PRED_UDIV_CONTINUE11]] ]
2276; UNROLL-NO-IC-NEXT:    [[VEC_PHI2:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP33:%.*]], [[PRED_UDIV_CONTINUE11]] ]
2277; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
2278; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
2279; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 2
2280; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]]
2281; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP1]]
2282; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 0
2283; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <2 x i32>*
2284; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x i32>, <2 x i32>* [[TMP5]], align 4
2285; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 2
2286; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <2 x i32>*
2287; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD3:%.*]] = load <2 x i32>, <2 x i32>* [[TMP7]], align 4
2288; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 0
2289; UNROLL-NO-IC-NEXT:    br i1 [[TMP8]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
2290; UNROLL-NO-IC:       pred.udiv.if:
2291; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 0
2292; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = udiv i32 [[TMP9]], [[TMP0]]
2293; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0
2294; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE]]
2295; UNROLL-NO-IC:       pred.udiv.continue:
2296; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = phi <2 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP11]], [[PRED_UDIV_IF]] ]
2297; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT]], i32 1
2298; UNROLL-NO-IC-NEXT:    br i1 [[TMP13]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]]
2299; UNROLL-NO-IC:       pred.udiv.if4:
2300; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = add i32 [[INDEX]], 1
2301; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = extractelement <2 x i32> [[WIDE_LOAD]], i32 1
2302; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = udiv i32 [[TMP15]], [[TMP14]]
2303; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = insertelement <2 x i32> [[TMP12]], i32 [[TMP16]], i32 1
2304; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE5]]
2305; UNROLL-NO-IC:       pred.udiv.continue5:
2306; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = phi <2 x i32> [ [[TMP12]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP17]], [[PRED_UDIV_IF4]] ]
2307; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT7]], i32 0
2308; UNROLL-NO-IC-NEXT:    br i1 [[TMP19]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]]
2309; UNROLL-NO-IC:       pred.udiv.if8:
2310; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = extractelement <2 x i32> [[WIDE_LOAD3]], i32 0
2311; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = udiv i32 [[TMP20]], [[TMP1]]
2312; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = insertelement <2 x i32> poison, i32 [[TMP21]], i32 0
2313; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE9]]
2314; UNROLL-NO-IC:       pred.udiv.continue9:
2315; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = phi <2 x i32> [ poison, [[PRED_UDIV_CONTINUE5]] ], [ [[TMP22]], [[PRED_UDIV_IF8]] ]
2316; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = extractelement <2 x i1> [[BROADCAST_SPLAT7]], i32 1
2317; UNROLL-NO-IC-NEXT:    br i1 [[TMP24]], label [[PRED_UDIV_IF10:%.*]], label [[PRED_UDIV_CONTINUE11]]
2318; UNROLL-NO-IC:       pred.udiv.if10:
2319; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = add i32 [[INDEX]], 3
2320; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = extractelement <2 x i32> [[WIDE_LOAD3]], i32 1
2321; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = udiv i32 [[TMP26]], [[TMP25]]
2322; UNROLL-NO-IC-NEXT:    [[TMP28:%.*]] = insertelement <2 x i32> [[TMP23]], i32 [[TMP27]], i32 1
2323; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE11]]
2324; UNROLL-NO-IC:       pred.udiv.continue11:
2325; UNROLL-NO-IC-NEXT:    [[TMP29:%.*]] = phi <2 x i32> [ [[TMP23]], [[PRED_UDIV_CONTINUE9]] ], [ [[TMP28]], [[PRED_UDIV_IF10]] ]
2326; UNROLL-NO-IC-NEXT:    [[TMP30:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT]], <i1 true, i1 true>
2327; UNROLL-NO-IC-NEXT:    [[TMP31:%.*]] = xor <2 x i1> [[BROADCAST_SPLAT7]], <i1 true, i1 true>
2328; UNROLL-NO-IC-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP30]], <2 x i32> [[WIDE_LOAD]], <2 x i32> [[TMP18]]
2329; UNROLL-NO-IC-NEXT:    [[PREDPHI12:%.*]] = select <2 x i1> [[TMP31]], <2 x i32> [[WIDE_LOAD3]], <2 x i32> [[TMP29]]
2330; UNROLL-NO-IC-NEXT:    [[TMP32]] = add <2 x i32> [[PREDPHI]], [[VEC_PHI]]
2331; UNROLL-NO-IC-NEXT:    [[TMP33]] = add <2 x i32> [[PREDPHI12]], [[VEC_PHI2]]
2332; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
2333; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2>
2334; UNROLL-NO-IC-NEXT:    [[TMP34:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
2335; UNROLL-NO-IC-NEXT:    br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
2336; UNROLL-NO-IC:       middle.block:
2337; UNROLL-NO-IC-NEXT:    [[BIN_RDX:%.*]] = add <2 x i32> [[TMP33]], [[TMP32]]
2338; UNROLL-NO-IC-NEXT:    [[TMP35:%.*]] = call i32 @llvm.vector.reduce.add.v2i32(<2 x i32> [[BIN_RDX]])
2339; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[SMAX]], [[N_VEC]]
2340; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2341; UNROLL-NO-IC:       scalar.ph:
2342; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2343; UNROLL-NO-IC-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP35]], [[MIDDLE_BLOCK]] ]
2344; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
2345; UNROLL-NO-IC:       for.body:
2346; UNROLL-NO-IC-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[IF_END:%.*]] ]
2347; UNROLL-NO-IC-NEXT:    [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR4:%.*]], [[IF_END]] ]
2348; UNROLL-NO-IC-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[I]]
2349; UNROLL-NO-IC-NEXT:    [[VAR1:%.*]] = load i32, i32* [[VAR0]], align 4
2350; UNROLL-NO-IC-NEXT:    br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END]]
2351; UNROLL-NO-IC:       if.then:
2352; UNROLL-NO-IC-NEXT:    [[VAR2:%.*]] = udiv i32 [[VAR1]], [[I]]
2353; UNROLL-NO-IC-NEXT:    br label [[IF_END]]
2354; UNROLL-NO-IC:       if.end:
2355; UNROLL-NO-IC-NEXT:    [[VAR3:%.*]] = phi i32 [ [[VAR2]], [[IF_THEN]] ], [ [[VAR1]], [[FOR_BODY]] ]
2356; UNROLL-NO-IC-NEXT:    [[VAR4]] = add i32 [[VAR3]], [[SUM]]
2357; UNROLL-NO-IC-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
2358; UNROLL-NO-IC-NEXT:    [[COND:%.*]] = icmp slt i32 [[I_NEXT]], [[N]]
2359; UNROLL-NO-IC-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP25:![0-9]+]]
2360; UNROLL-NO-IC:       for.end:
2361; UNROLL-NO-IC-NEXT:    [[VAR5:%.*]] = phi i32 [ [[VAR4]], [[IF_END]] ], [ [[TMP35]], [[MIDDLE_BLOCK]] ]
2362; UNROLL-NO-IC-NEXT:    ret i32 [[VAR5]]
2363;
2364; INTERLEAVE-LABEL: @scalarize_induction_variable_05(
2365; INTERLEAVE-NEXT:  entry:
2366; INTERLEAVE-NEXT:    [[SMAX:%.*]] = call i32 @llvm.smax.i32(i32 [[N:%.*]], i32 1)
2367; INTERLEAVE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[SMAX]], 8
2368; INTERLEAVE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2369; INTERLEAVE:       vector.ph:
2370; INTERLEAVE-NEXT:    [[N_VEC:%.*]] = and i32 [[SMAX]], 2147483640
2371; INTERLEAVE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i1> poison, i1 [[C:%.*]], i64 0
2372; INTERLEAVE-NEXT:    [[BROADCAST_SPLATINSERT10:%.*]] = insertelement <4 x i1> poison, i1 [[C]], i64 0
2373; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
2374; INTERLEAVE:       vector.body:
2375; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE19:%.*]] ]
2376; INTERLEAVE-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_UDIV_CONTINUE19]] ]
2377; INTERLEAVE-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[PRED_UDIV_CONTINUE19]] ]
2378; INTERLEAVE-NEXT:    [[TMP0:%.*]] = or i32 [[INDEX]], 4
2379; INTERLEAVE-NEXT:    [[TMP1:%.*]] = sext i32 [[INDEX]] to i64
2380; INTERLEAVE-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP1]]
2381; INTERLEAVE-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
2382; INTERLEAVE-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP3]], align 4
2383; INTERLEAVE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 4
2384; INTERLEAVE-NEXT:    [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>*
2385; INTERLEAVE-NEXT:    [[WIDE_LOAD3:%.*]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 4
2386; INTERLEAVE-NEXT:    br i1 [[C]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
2387; INTERLEAVE:       pred.udiv.if:
2388; INTERLEAVE-NEXT:    [[TMP6:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 0
2389; INTERLEAVE-NEXT:    [[TMP7:%.*]] = udiv i32 [[TMP6]], [[INDEX]]
2390; INTERLEAVE-NEXT:    [[TMP8:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0
2391; INTERLEAVE-NEXT:    br label [[PRED_UDIV_CONTINUE]]
2392; INTERLEAVE:       pred.udiv.continue:
2393; INTERLEAVE-NEXT:    [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_UDIV_IF]] ]
2394; INTERLEAVE-NEXT:    br i1 [[C]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]]
2395; INTERLEAVE:       pred.udiv.if4:
2396; INTERLEAVE-NEXT:    [[TMP10:%.*]] = or i32 [[INDEX]], 1
2397; INTERLEAVE-NEXT:    [[TMP11:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 1
2398; INTERLEAVE-NEXT:    [[TMP12:%.*]] = udiv i32 [[TMP11]], [[TMP10]]
2399; INTERLEAVE-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP12]], i64 1
2400; INTERLEAVE-NEXT:    br label [[PRED_UDIV_CONTINUE5]]
2401; INTERLEAVE:       pred.udiv.continue5:
2402; INTERLEAVE-NEXT:    [[TMP14:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP13]], [[PRED_UDIV_IF4]] ]
2403; INTERLEAVE-NEXT:    br i1 [[C]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]]
2404; INTERLEAVE:       pred.udiv.if6:
2405; INTERLEAVE-NEXT:    [[TMP15:%.*]] = or i32 [[INDEX]], 2
2406; INTERLEAVE-NEXT:    [[TMP16:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 2
2407; INTERLEAVE-NEXT:    [[TMP17:%.*]] = udiv i32 [[TMP16]], [[TMP15]]
2408; INTERLEAVE-NEXT:    [[TMP18:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP17]], i64 2
2409; INTERLEAVE-NEXT:    br label [[PRED_UDIV_CONTINUE7]]
2410; INTERLEAVE:       pred.udiv.continue7:
2411; INTERLEAVE-NEXT:    [[TMP19:%.*]] = phi <4 x i32> [ [[TMP14]], [[PRED_UDIV_CONTINUE5]] ], [ [[TMP18]], [[PRED_UDIV_IF6]] ]
2412; INTERLEAVE-NEXT:    br i1 [[C]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]]
2413; INTERLEAVE:       pred.udiv.if8:
2414; INTERLEAVE-NEXT:    [[TMP20:%.*]] = or i32 [[INDEX]], 3
2415; INTERLEAVE-NEXT:    [[TMP21:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 3
2416; INTERLEAVE-NEXT:    [[TMP22:%.*]] = udiv i32 [[TMP21]], [[TMP20]]
2417; INTERLEAVE-NEXT:    [[TMP23:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP22]], i64 3
2418; INTERLEAVE-NEXT:    br label [[PRED_UDIV_CONTINUE9]]
2419; INTERLEAVE:       pred.udiv.continue9:
2420; INTERLEAVE-NEXT:    [[TMP24:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_UDIV_CONTINUE7]] ], [ [[TMP23]], [[PRED_UDIV_IF8]] ]
2421; INTERLEAVE-NEXT:    br i1 [[C]], label [[PRED_UDIV_IF12:%.*]], label [[PRED_UDIV_CONTINUE13:%.*]]
2422; INTERLEAVE:       pred.udiv.if12:
2423; INTERLEAVE-NEXT:    [[TMP25:%.*]] = extractelement <4 x i32> [[WIDE_LOAD3]], i64 0
2424; INTERLEAVE-NEXT:    [[TMP26:%.*]] = udiv i32 [[TMP25]], [[TMP0]]
2425; INTERLEAVE-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i64 0
2426; INTERLEAVE-NEXT:    br label [[PRED_UDIV_CONTINUE13]]
2427; INTERLEAVE:       pred.udiv.continue13:
2428; INTERLEAVE-NEXT:    [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_UDIV_CONTINUE9]] ], [ [[TMP27]], [[PRED_UDIV_IF12]] ]
2429; INTERLEAVE-NEXT:    br i1 [[C]], label [[PRED_UDIV_IF14:%.*]], label [[PRED_UDIV_CONTINUE15:%.*]]
2430; INTERLEAVE:       pred.udiv.if14:
2431; INTERLEAVE-NEXT:    [[TMP29:%.*]] = or i32 [[INDEX]], 5
2432; INTERLEAVE-NEXT:    [[TMP30:%.*]] = extractelement <4 x i32> [[WIDE_LOAD3]], i64 1
2433; INTERLEAVE-NEXT:    [[TMP31:%.*]] = udiv i32 [[TMP30]], [[TMP29]]
2434; INTERLEAVE-NEXT:    [[TMP32:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP31]], i64 1
2435; INTERLEAVE-NEXT:    br label [[PRED_UDIV_CONTINUE15]]
2436; INTERLEAVE:       pred.udiv.continue15:
2437; INTERLEAVE-NEXT:    [[TMP33:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_UDIV_CONTINUE13]] ], [ [[TMP32]], [[PRED_UDIV_IF14]] ]
2438; INTERLEAVE-NEXT:    br i1 [[C]], label [[PRED_UDIV_IF16:%.*]], label [[PRED_UDIV_CONTINUE17:%.*]]
2439; INTERLEAVE:       pred.udiv.if16:
2440; INTERLEAVE-NEXT:    [[TMP34:%.*]] = or i32 [[INDEX]], 6
2441; INTERLEAVE-NEXT:    [[TMP35:%.*]] = extractelement <4 x i32> [[WIDE_LOAD3]], i64 2
2442; INTERLEAVE-NEXT:    [[TMP36:%.*]] = udiv i32 [[TMP35]], [[TMP34]]
2443; INTERLEAVE-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i64 2
2444; INTERLEAVE-NEXT:    br label [[PRED_UDIV_CONTINUE17]]
2445; INTERLEAVE:       pred.udiv.continue17:
2446; INTERLEAVE-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_UDIV_CONTINUE15]] ], [ [[TMP37]], [[PRED_UDIV_IF16]] ]
2447; INTERLEAVE-NEXT:    br i1 [[C]], label [[PRED_UDIV_IF18:%.*]], label [[PRED_UDIV_CONTINUE19]]
2448; INTERLEAVE:       pred.udiv.if18:
2449; INTERLEAVE-NEXT:    [[TMP39:%.*]] = or i32 [[INDEX]], 7
2450; INTERLEAVE-NEXT:    [[TMP40:%.*]] = extractelement <4 x i32> [[WIDE_LOAD3]], i64 3
2451; INTERLEAVE-NEXT:    [[TMP41:%.*]] = udiv i32 [[TMP40]], [[TMP39]]
2452; INTERLEAVE-NEXT:    [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i64 3
2453; INTERLEAVE-NEXT:    br label [[PRED_UDIV_CONTINUE19]]
2454; INTERLEAVE:       pred.udiv.continue19:
2455; INTERLEAVE-NEXT:    [[TMP43:%.*]] = phi <4 x i32> [ [[TMP38]], [[PRED_UDIV_CONTINUE17]] ], [ [[TMP42]], [[PRED_UDIV_IF18]] ]
2456; INTERLEAVE-NEXT:    [[TMP44:%.*]] = xor <4 x i1> [[BROADCAST_SPLATINSERT]], <i1 true, i1 poison, i1 poison, i1 poison>
2457; INTERLEAVE-NEXT:    [[TMP45:%.*]] = shufflevector <4 x i1> [[TMP44]], <4 x i1> poison, <4 x i32> zeroinitializer
2458; INTERLEAVE-NEXT:    [[TMP46:%.*]] = xor <4 x i1> [[BROADCAST_SPLATINSERT10]], <i1 true, i1 poison, i1 poison, i1 poison>
2459; INTERLEAVE-NEXT:    [[TMP47:%.*]] = shufflevector <4 x i1> [[TMP46]], <4 x i1> poison, <4 x i32> zeroinitializer
2460; INTERLEAVE-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP45]], <4 x i32> [[WIDE_LOAD]], <4 x i32> [[TMP24]]
2461; INTERLEAVE-NEXT:    [[PREDPHI20:%.*]] = select <4 x i1> [[TMP47]], <4 x i32> [[WIDE_LOAD3]], <4 x i32> [[TMP43]]
2462; INTERLEAVE-NEXT:    [[TMP48]] = add <4 x i32> [[PREDPHI]], [[VEC_PHI]]
2463; INTERLEAVE-NEXT:    [[TMP49]] = add <4 x i32> [[PREDPHI20]], [[VEC_PHI2]]
2464; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
2465; INTERLEAVE-NEXT:    [[TMP50:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
2466; INTERLEAVE-NEXT:    br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
2467; INTERLEAVE:       middle.block:
2468; INTERLEAVE-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP49]], [[TMP48]]
2469; INTERLEAVE-NEXT:    [[TMP51:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
2470; INTERLEAVE-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[SMAX]], [[N_VEC]]
2471; INTERLEAVE-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2472; INTERLEAVE:       scalar.ph:
2473; INTERLEAVE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2474; INTERLEAVE-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP51]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2475; INTERLEAVE-NEXT:    br label [[FOR_BODY:%.*]]
2476; INTERLEAVE:       for.body:
2477; INTERLEAVE-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[IF_END:%.*]] ]
2478; INTERLEAVE-NEXT:    [[SUM:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR4:%.*]], [[IF_END]] ]
2479; INTERLEAVE-NEXT:    [[TMP52:%.*]] = zext i32 [[I]] to i64
2480; INTERLEAVE-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP52]]
2481; INTERLEAVE-NEXT:    [[VAR1:%.*]] = load i32, i32* [[VAR0]], align 4
2482; INTERLEAVE-NEXT:    br i1 [[C]], label [[IF_THEN:%.*]], label [[IF_END]]
2483; INTERLEAVE:       if.then:
2484; INTERLEAVE-NEXT:    [[VAR2:%.*]] = udiv i32 [[VAR1]], [[I]]
2485; INTERLEAVE-NEXT:    br label [[IF_END]]
2486; INTERLEAVE:       if.end:
2487; INTERLEAVE-NEXT:    [[VAR3:%.*]] = phi i32 [ [[VAR2]], [[IF_THEN]] ], [ [[VAR1]], [[FOR_BODY]] ]
2488; INTERLEAVE-NEXT:    [[VAR4]] = add i32 [[VAR3]], [[SUM]]
2489; INTERLEAVE-NEXT:    [[I_NEXT]] = add nuw nsw i32 [[I]], 1
2490; INTERLEAVE-NEXT:    [[COND:%.*]] = icmp slt i32 [[I_NEXT]], [[N]]
2491; INTERLEAVE-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP25:![0-9]+]]
2492; INTERLEAVE:       for.end:
2493; INTERLEAVE-NEXT:    [[VAR5:%.*]] = phi i32 [ [[VAR4]], [[IF_END]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ]
2494; INTERLEAVE-NEXT:    ret i32 [[VAR5]]
2495;
2496entry:
2497  br label %for.body
2498
2499for.body:
2500  %i = phi i32 [ 0, %entry ], [ %i.next, %if.end ]
2501  %sum = phi i32 [ 0, %entry ], [ %var4, %if.end ]
2502  %var0 = getelementptr inbounds i32, i32* %a, i32 %i
2503  %var1 = load i32, i32* %var0, align 4
2504  br i1 %c, label %if.then, label %if.end
2505
2506if.then:
2507  %var2 = udiv i32 %var1, %i
2508  br label %if.end
2509
2510if.end:
2511  %var3 = phi i32 [ %var2, %if.then ], [ %var1, %for.body ]
2512  %var4 = add i32 %var3, %sum
2513  %i.next = add nuw nsw i32 %i, 1
2514  %cond = icmp slt i32 %i.next, %n
2515  br i1 %cond, label %for.body, label %for.end
2516
2517for.end:
2518  %var5  = phi i32 [ %var4, %if.end ]
2519  ret i32 %var5
2520}
2521
2522; Ensure we generate both a vector and a scalar induction variable. In this
2523; test, the induction variable is used by an instruction that will be
2524; vectorized (trunc) as well as an instruction that will remain in scalar form
2525; (gepelementptr).
2526;
2527;
2528;
2529
2530%pair.i16 = type { i16, i16 }
2531define void @iv_vector_and_scalar_users(%pair.i16* %p, i32 %a, i32 %n) {
2532; CHECK-LABEL: @iv_vector_and_scalar_users(
2533; CHECK-NEXT:  entry:
2534; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
2535; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
2536; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
2537; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2
2538; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2539; CHECK:       vector.ph:
2540; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2
2541; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
2542; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i32 0
2543; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
2544; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
2545; CHECK:       vector.body:
2546; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2547; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
2548; CHECK-NEXT:    [[VEC_IND1:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
2549; CHECK-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
2550; CHECK-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 1
2551; CHECK-NEXT:    [[TMP5:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], [[VEC_IND1]]
2552; CHECK-NEXT:    [[TMP6:%.*]] = trunc <2 x i32> [[TMP5]] to <2 x i16>
2553; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], %pair.i16* [[P:%.*]], i64 [[TMP3]], i32 1
2554; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP4]], i32 1
2555; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i16> [[TMP6]], i32 0
2556; CHECK-NEXT:    store i16 [[TMP9]], i16* [[TMP7]], align 2
2557; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x i16> [[TMP6]], i32 1
2558; CHECK-NEXT:    store i16 [[TMP10]], i16* [[TMP8]], align 2
2559; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
2560; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
2561; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <2 x i32> [[VEC_IND1]], <i32 2, i32 2>
2562; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2563; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
2564; CHECK:       middle.block:
2565; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
2566; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2567; CHECK:       scalar.ph:
2568; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2569; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
2570; CHECK:       for.body:
2571; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
2572; CHECK-NEXT:    [[TMP12:%.*]] = trunc i64 [[I]] to i32
2573; CHECK-NEXT:    [[TMP13:%.*]] = add i32 [[A]], [[TMP12]]
2574; CHECK-NEXT:    [[TMP14:%.*]] = trunc i32 [[TMP13]] to i16
2575; CHECK-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[I]], i32 1
2576; CHECK-NEXT:    store i16 [[TMP14]], i16* [[TMP15]], align 2
2577; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
2578; CHECK-NEXT:    [[TMP16:%.*]] = trunc i64 [[I_NEXT]] to i32
2579; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[TMP16]], [[N]]
2580; CHECK-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
2581; CHECK:       for.end:
2582; CHECK-NEXT:    ret void
2583;
2584; IND-LABEL: @iv_vector_and_scalar_users(
2585; IND-NEXT:  entry:
2586; IND-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
2587; IND-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
2588; IND-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
2589; IND-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0
2590; IND-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2591; IND:       vector.ph:
2592; IND-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590
2593; IND-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i64 0
2594; IND-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
2595; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
2596; IND:       vector.body:
2597; IND-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2598; IND-NEXT:    [[VEC_IND1:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
2599; IND-NEXT:    [[TMP3:%.*]] = or i64 [[INDEX]], 1
2600; IND-NEXT:    [[TMP4:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], [[VEC_IND1]]
2601; IND-NEXT:    [[TMP5:%.*]] = trunc <2 x i32> [[TMP4]] to <2 x i16>
2602; IND-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], %pair.i16* [[P:%.*]], i64 [[INDEX]], i32 1
2603; IND-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP3]], i32 1
2604; IND-NEXT:    [[TMP8:%.*]] = extractelement <2 x i16> [[TMP5]], i64 0
2605; IND-NEXT:    store i16 [[TMP8]], i16* [[TMP6]], align 2
2606; IND-NEXT:    [[TMP9:%.*]] = extractelement <2 x i16> [[TMP5]], i64 1
2607; IND-NEXT:    store i16 [[TMP9]], i16* [[TMP7]], align 2
2608; IND-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
2609; IND-NEXT:    [[VEC_IND_NEXT2]] = add <2 x i32> [[VEC_IND1]], <i32 2, i32 2>
2610; IND-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2611; IND-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
2612; IND:       middle.block:
2613; IND-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
2614; IND-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2615; IND:       scalar.ph:
2616; IND-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2617; IND-NEXT:    br label [[FOR_BODY:%.*]]
2618; IND:       for.body:
2619; IND-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
2620; IND-NEXT:    [[TMP11:%.*]] = trunc i64 [[I]] to i32
2621; IND-NEXT:    [[TMP12:%.*]] = add i32 [[TMP11]], [[A]]
2622; IND-NEXT:    [[TMP13:%.*]] = trunc i32 [[TMP12]] to i16
2623; IND-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[I]], i32 1
2624; IND-NEXT:    store i16 [[TMP13]], i16* [[TMP14]], align 2
2625; IND-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
2626; IND-NEXT:    [[TMP15:%.*]] = trunc i64 [[I_NEXT]] to i32
2627; IND-NEXT:    [[COND:%.*]] = icmp eq i32 [[TMP15]], [[N]]
2628; IND-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
2629; IND:       for.end:
2630; IND-NEXT:    ret void
2631;
2632; UNROLL-LABEL: @iv_vector_and_scalar_users(
2633; UNROLL-NEXT:  entry:
2634; UNROLL-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
2635; UNROLL-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
2636; UNROLL-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
2637; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
2638; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2639; UNROLL:       vector.ph:
2640; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
2641; UNROLL-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i64 0
2642; UNROLL-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
2643; UNROLL-NEXT:    [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <2 x i32> poison, i32 [[A]], i64 0
2644; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
2645; UNROLL:       vector.body:
2646; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2647; UNROLL-NEXT:    [[VEC_IND2:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ]
2648; UNROLL-NEXT:    [[TMP3:%.*]] = or i64 [[INDEX]], 1
2649; UNROLL-NEXT:    [[TMP4:%.*]] = or i64 [[INDEX]], 2
2650; UNROLL-NEXT:    [[TMP5:%.*]] = or i64 [[INDEX]], 3
2651; UNROLL-NEXT:    [[TMP6:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], [[VEC_IND2]]
2652; UNROLL-NEXT:    [[TMP7:%.*]] = add <2 x i32> [[BROADCAST_SPLATINSERT6]], <i32 2, i32 poison>
2653; UNROLL-NEXT:    [[TMP8:%.*]] = shufflevector <2 x i32> [[TMP7]], <2 x i32> poison, <2 x i32> zeroinitializer
2654; UNROLL-NEXT:    [[TMP9:%.*]] = add <2 x i32> [[TMP8]], [[VEC_IND2]]
2655; UNROLL-NEXT:    [[TMP10:%.*]] = trunc <2 x i32> [[TMP6]] to <2 x i16>
2656; UNROLL-NEXT:    [[TMP11:%.*]] = trunc <2 x i32> [[TMP9]] to <2 x i16>
2657; UNROLL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], %pair.i16* [[P:%.*]], i64 [[INDEX]], i32 1
2658; UNROLL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP3]], i32 1
2659; UNROLL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP4]], i32 1
2660; UNROLL-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP5]], i32 1
2661; UNROLL-NEXT:    [[TMP16:%.*]] = extractelement <2 x i16> [[TMP10]], i64 0
2662; UNROLL-NEXT:    store i16 [[TMP16]], i16* [[TMP12]], align 2
2663; UNROLL-NEXT:    [[TMP17:%.*]] = extractelement <2 x i16> [[TMP10]], i64 1
2664; UNROLL-NEXT:    store i16 [[TMP17]], i16* [[TMP13]], align 2
2665; UNROLL-NEXT:    [[TMP18:%.*]] = extractelement <2 x i16> [[TMP11]], i64 0
2666; UNROLL-NEXT:    store i16 [[TMP18]], i16* [[TMP14]], align 2
2667; UNROLL-NEXT:    [[TMP19:%.*]] = extractelement <2 x i16> [[TMP11]], i64 1
2668; UNROLL-NEXT:    store i16 [[TMP19]], i16* [[TMP15]], align 2
2669; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
2670; UNROLL-NEXT:    [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND2]], <i32 4, i32 4>
2671; UNROLL-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2672; UNROLL-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
2673; UNROLL:       middle.block:
2674; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
2675; UNROLL-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2676; UNROLL:       scalar.ph:
2677; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2678; UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
2679; UNROLL:       for.body:
2680; UNROLL-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
2681; UNROLL-NEXT:    [[TMP21:%.*]] = trunc i64 [[I]] to i32
2682; UNROLL-NEXT:    [[TMP22:%.*]] = add i32 [[TMP21]], [[A]]
2683; UNROLL-NEXT:    [[TMP23:%.*]] = trunc i32 [[TMP22]] to i16
2684; UNROLL-NEXT:    [[TMP24:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[I]], i32 1
2685; UNROLL-NEXT:    store i16 [[TMP23]], i16* [[TMP24]], align 2
2686; UNROLL-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
2687; UNROLL-NEXT:    [[TMP25:%.*]] = trunc i64 [[I_NEXT]] to i32
2688; UNROLL-NEXT:    [[COND:%.*]] = icmp eq i32 [[TMP25]], [[N]]
2689; UNROLL-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
2690; UNROLL:       for.end:
2691; UNROLL-NEXT:    ret void
2692;
2693; UNROLL-NO-IC-LABEL: @iv_vector_and_scalar_users(
2694; UNROLL-NO-IC-NEXT:  entry:
2695; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
2696; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
2697; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
2698; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
2699; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2700; UNROLL-NO-IC:       vector.ph:
2701; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
2702; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
2703; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[A:%.*]], i32 0
2704; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
2705; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <2 x i32> poison, i32 [[A]], i32 0
2706; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT7:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT6]], <2 x i32> poison, <2 x i32> zeroinitializer
2707; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
2708; UNROLL-NO-IC:       vector.body:
2709; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2710; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
2711; UNROLL-NO-IC-NEXT:    [[VEC_IND2:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ]
2712; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
2713; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
2714; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 1
2715; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 2
2716; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 3
2717; UNROLL-NO-IC-NEXT:    [[STEP_ADD3:%.*]] = add <2 x i32> [[VEC_IND2]], <i32 2, i32 2>
2718; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = add <2 x i32> [[BROADCAST_SPLAT]], [[VEC_IND2]]
2719; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = add <2 x i32> [[BROADCAST_SPLAT7]], [[STEP_ADD3]]
2720; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = trunc <2 x i32> [[TMP7]] to <2 x i16>
2721; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = trunc <2 x i32> [[TMP8]] to <2 x i16>
2722; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], %pair.i16* [[P:%.*]], i64 [[TMP3]], i32 1
2723; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP4]], i32 1
2724; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP5]], i32 1
2725; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP6]], i32 1
2726; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = extractelement <2 x i16> [[TMP9]], i32 0
2727; UNROLL-NO-IC-NEXT:    store i16 [[TMP15]], i16* [[TMP11]], align 2
2728; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = extractelement <2 x i16> [[TMP9]], i32 1
2729; UNROLL-NO-IC-NEXT:    store i16 [[TMP16]], i16* [[TMP12]], align 2
2730; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = extractelement <2 x i16> [[TMP10]], i32 0
2731; UNROLL-NO-IC-NEXT:    store i16 [[TMP17]], i16* [[TMP13]], align 2
2732; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = extractelement <2 x i16> [[TMP10]], i32 1
2733; UNROLL-NO-IC-NEXT:    store i16 [[TMP18]], i16* [[TMP14]], align 2
2734; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
2735; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 2, i64 2>
2736; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT5]] = add <2 x i32> [[STEP_ADD3]], <i32 2, i32 2>
2737; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2738; UNROLL-NO-IC-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
2739; UNROLL-NO-IC:       middle.block:
2740; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
2741; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2742; UNROLL-NO-IC:       scalar.ph:
2743; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2744; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
2745; UNROLL-NO-IC:       for.body:
2746; UNROLL-NO-IC-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
2747; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = trunc i64 [[I]] to i32
2748; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = add i32 [[A]], [[TMP20]]
2749; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = trunc i32 [[TMP21]] to i16
2750; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[I]], i32 1
2751; UNROLL-NO-IC-NEXT:    store i16 [[TMP22]], i16* [[TMP23]], align 2
2752; UNROLL-NO-IC-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
2753; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = trunc i64 [[I_NEXT]] to i32
2754; UNROLL-NO-IC-NEXT:    [[COND:%.*]] = icmp eq i32 [[TMP24]], [[N]]
2755; UNROLL-NO-IC-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
2756; UNROLL-NO-IC:       for.end:
2757; UNROLL-NO-IC-NEXT:    ret void
2758;
2759; INTERLEAVE-LABEL: @iv_vector_and_scalar_users(
2760; INTERLEAVE-NEXT:  entry:
2761; INTERLEAVE-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
2762; INTERLEAVE-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
2763; INTERLEAVE-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
2764; INTERLEAVE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7
2765; INTERLEAVE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2766; INTERLEAVE:       vector.ph:
2767; INTERLEAVE-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584
2768; INTERLEAVE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[A:%.*]], i64 0
2769; INTERLEAVE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
2770; INTERLEAVE-NEXT:    [[BROADCAST_SPLATINSERT6:%.*]] = insertelement <4 x i32> poison, i32 [[A]], i64 0
2771; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
2772; INTERLEAVE:       vector.body:
2773; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2774; INTERLEAVE-NEXT:    [[VEC_IND2:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ]
2775; INTERLEAVE-NEXT:    [[TMP3:%.*]] = or i64 [[INDEX]], 1
2776; INTERLEAVE-NEXT:    [[TMP4:%.*]] = or i64 [[INDEX]], 2
2777; INTERLEAVE-NEXT:    [[TMP5:%.*]] = or i64 [[INDEX]], 3
2778; INTERLEAVE-NEXT:    [[TMP6:%.*]] = or i64 [[INDEX]], 4
2779; INTERLEAVE-NEXT:    [[TMP7:%.*]] = or i64 [[INDEX]], 5
2780; INTERLEAVE-NEXT:    [[TMP8:%.*]] = or i64 [[INDEX]], 6
2781; INTERLEAVE-NEXT:    [[TMP9:%.*]] = or i64 [[INDEX]], 7
2782; INTERLEAVE-NEXT:    [[TMP10:%.*]] = add <4 x i32> [[BROADCAST_SPLAT]], [[VEC_IND2]]
2783; INTERLEAVE-NEXT:    [[TMP11:%.*]] = add <4 x i32> [[BROADCAST_SPLATINSERT6]], <i32 4, i32 poison, i32 poison, i32 poison>
2784; INTERLEAVE-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i32> [[TMP11]], <4 x i32> poison, <4 x i32> zeroinitializer
2785; INTERLEAVE-NEXT:    [[TMP13:%.*]] = add <4 x i32> [[TMP12]], [[VEC_IND2]]
2786; INTERLEAVE-NEXT:    [[TMP14:%.*]] = trunc <4 x i32> [[TMP10]] to <4 x i16>
2787; INTERLEAVE-NEXT:    [[TMP15:%.*]] = trunc <4 x i32> [[TMP13]] to <4 x i16>
2788; INTERLEAVE-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [[PAIR_I16:%.*]], %pair.i16* [[P:%.*]], i64 [[INDEX]], i32 1
2789; INTERLEAVE-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP3]], i32 1
2790; INTERLEAVE-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP4]], i32 1
2791; INTERLEAVE-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP5]], i32 1
2792; INTERLEAVE-NEXT:    [[TMP20:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP6]], i32 1
2793; INTERLEAVE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP7]], i32 1
2794; INTERLEAVE-NEXT:    [[TMP22:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP8]], i32 1
2795; INTERLEAVE-NEXT:    [[TMP23:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[TMP9]], i32 1
2796; INTERLEAVE-NEXT:    [[TMP24:%.*]] = extractelement <4 x i16> [[TMP14]], i64 0
2797; INTERLEAVE-NEXT:    store i16 [[TMP24]], i16* [[TMP16]], align 2
2798; INTERLEAVE-NEXT:    [[TMP25:%.*]] = extractelement <4 x i16> [[TMP14]], i64 1
2799; INTERLEAVE-NEXT:    store i16 [[TMP25]], i16* [[TMP17]], align 2
2800; INTERLEAVE-NEXT:    [[TMP26:%.*]] = extractelement <4 x i16> [[TMP14]], i64 2
2801; INTERLEAVE-NEXT:    store i16 [[TMP26]], i16* [[TMP18]], align 2
2802; INTERLEAVE-NEXT:    [[TMP27:%.*]] = extractelement <4 x i16> [[TMP14]], i64 3
2803; INTERLEAVE-NEXT:    store i16 [[TMP27]], i16* [[TMP19]], align 2
2804; INTERLEAVE-NEXT:    [[TMP28:%.*]] = extractelement <4 x i16> [[TMP15]], i64 0
2805; INTERLEAVE-NEXT:    store i16 [[TMP28]], i16* [[TMP20]], align 2
2806; INTERLEAVE-NEXT:    [[TMP29:%.*]] = extractelement <4 x i16> [[TMP15]], i64 1
2807; INTERLEAVE-NEXT:    store i16 [[TMP29]], i16* [[TMP21]], align 2
2808; INTERLEAVE-NEXT:    [[TMP30:%.*]] = extractelement <4 x i16> [[TMP15]], i64 2
2809; INTERLEAVE-NEXT:    store i16 [[TMP30]], i16* [[TMP22]], align 2
2810; INTERLEAVE-NEXT:    [[TMP31:%.*]] = extractelement <4 x i16> [[TMP15]], i64 3
2811; INTERLEAVE-NEXT:    store i16 [[TMP31]], i16* [[TMP23]], align 2
2812; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
2813; INTERLEAVE-NEXT:    [[VEC_IND_NEXT5]] = add <4 x i32> [[VEC_IND2]], <i32 8, i32 8, i32 8, i32 8>
2814; INTERLEAVE-NEXT:    [[TMP32:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2815; INTERLEAVE-NEXT:    br i1 [[TMP32]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP26:![0-9]+]]
2816; INTERLEAVE:       middle.block:
2817; INTERLEAVE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
2818; INTERLEAVE-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2819; INTERLEAVE:       scalar.ph:
2820; INTERLEAVE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
2821; INTERLEAVE-NEXT:    br label [[FOR_BODY:%.*]]
2822; INTERLEAVE:       for.body:
2823; INTERLEAVE-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
2824; INTERLEAVE-NEXT:    [[TMP33:%.*]] = trunc i64 [[I]] to i32
2825; INTERLEAVE-NEXT:    [[TMP34:%.*]] = add i32 [[TMP33]], [[A]]
2826; INTERLEAVE-NEXT:    [[TMP35:%.*]] = trunc i32 [[TMP34]] to i16
2827; INTERLEAVE-NEXT:    [[TMP36:%.*]] = getelementptr inbounds [[PAIR_I16]], %pair.i16* [[P]], i64 [[I]], i32 1
2828; INTERLEAVE-NEXT:    store i16 [[TMP35]], i16* [[TMP36]], align 2
2829; INTERLEAVE-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
2830; INTERLEAVE-NEXT:    [[TMP37:%.*]] = trunc i64 [[I_NEXT]] to i32
2831; INTERLEAVE-NEXT:    [[COND:%.*]] = icmp eq i32 [[TMP37]], [[N]]
2832; INTERLEAVE-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP27:![0-9]+]]
2833; INTERLEAVE:       for.end:
2834; INTERLEAVE-NEXT:    ret void
2835;
2836entry:
2837  br label %for.body
2838
2839for.body:
2840  %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
2841  %0 = trunc i64 %i to i32
2842  %1 = add i32 %a, %0
2843  %2 = trunc i32 %1 to i16
2844  %3 = getelementptr inbounds %pair.i16, %pair.i16* %p, i64 %i, i32 1
2845  store i16 %2, i16* %3, align 2
2846  %i.next = add nuw nsw i64 %i, 1
2847  %4 = trunc i64 %i.next to i32
2848  %cond = icmp eq i32 %4, %n
2849  br i1 %cond, label %for.end, label %for.body
2850
2851for.end:
2852  ret void
2853}
2854
2855; Make sure that the loop exit count computation does not overflow for i8 and
2856; i16. The exit count of these loops is i8/i16 max + 1. If we don't cast the
2857; induction variable to a bigger type the exit count computation will overflow
2858; to 0.
2859; PR17532
2860
2861define i32 @i8_loop() nounwind readnone ssp uwtable {
2862; CHECK-LABEL: @i8_loop(
2863; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2864; CHECK:       vector.ph:
2865; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
2866; CHECK:       vector.body:
2867; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2868; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
2869; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[INDEX]] to i8
2870; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i8 0, [[TMP1]]
2871; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[OFFSET_IDX]], 0
2872; CHECK-NEXT:    [[TMP3]] = and <2 x i32> [[VEC_PHI]], <i32 4, i32 4>
2873; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
2874; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
2875; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
2876; CHECK:       middle.block:
2877; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP3]])
2878; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 256, 256
2879; CHECK-NEXT:    br i1 [[CMP_N]], label [[TMP10:%.*]], label [[SCALAR_PH]]
2880; CHECK:       scalar.ph:
2881; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[TMP0:%.*]] ]
2882; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[TMP0]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
2883; CHECK-NEXT:    br label [[TMP6:%.*]]
2884; CHECK:       6:
2885; CHECK-NEXT:    [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP7:%.*]], [[TMP6]] ]
2886; CHECK-NEXT:    [[B_0:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP8:%.*]], [[TMP6]] ]
2887; CHECK-NEXT:    [[TMP7]] = and i32 [[A_0]], 4
2888; CHECK-NEXT:    [[TMP8]] = add i8 [[B_0]], -1
2889; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i8 [[TMP8]], 0
2890; CHECK-NEXT:    br i1 [[TMP9]], label [[TMP10]], label [[TMP6]], !llvm.loop [[LOOP29:![0-9]+]]
2891; CHECK:       10:
2892; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi i32 [ [[TMP7]], [[TMP6]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
2893; CHECK-NEXT:    ret i32 [[DOTLCSSA]]
2894;
2895; IND-LABEL: @i8_loop(
2896; IND-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2897; IND:       vector.ph:
2898; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
2899; IND:       vector.body:
2900; IND-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2901; IND-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
2902; IND-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
2903; IND-NEXT:    br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
2904; IND:       middle.block:
2905; IND-NEXT:    br i1 true, label [[TMP3:%.*]], label [[SCALAR_PH]]
2906; IND:       scalar.ph:
2907; IND-NEXT:    br label [[TMP2:%.*]]
2908; IND:       2:
2909; IND-NEXT:    br i1 undef, label [[TMP3]], label [[TMP2]], !llvm.loop [[LOOP29:![0-9]+]]
2910; IND:       3:
2911; IND-NEXT:    ret i32 0
2912;
2913; UNROLL-LABEL: @i8_loop(
2914; UNROLL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2915; UNROLL:       vector.ph:
2916; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
2917; UNROLL:       vector.body:
2918; UNROLL-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2919; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
2920; UNROLL-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
2921; UNROLL-NEXT:    br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
2922; UNROLL:       middle.block:
2923; UNROLL-NEXT:    br i1 true, label [[TMP3:%.*]], label [[SCALAR_PH]]
2924; UNROLL:       scalar.ph:
2925; UNROLL-NEXT:    br label [[TMP2:%.*]]
2926; UNROLL:       2:
2927; UNROLL-NEXT:    br i1 undef, label [[TMP3]], label [[TMP2]], !llvm.loop [[LOOP29:![0-9]+]]
2928; UNROLL:       3:
2929; UNROLL-NEXT:    ret i32 0
2930;
2931; UNROLL-NO-IC-LABEL: @i8_loop(
2932; UNROLL-NO-IC-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2933; UNROLL-NO-IC:       vector.ph:
2934; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
2935; UNROLL-NO-IC:       vector.body:
2936; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2937; UNROLL-NO-IC-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
2938; UNROLL-NO-IC-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x i32> [ <i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
2939; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = trunc i32 [[INDEX]] to i8
2940; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = sub i8 0, [[TMP1]]
2941; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add i8 [[OFFSET_IDX]], 0
2942; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add i8 [[OFFSET_IDX]], -2
2943; UNROLL-NO-IC-NEXT:    [[TMP4]] = and <2 x i32> [[VEC_PHI]], <i32 4, i32 4>
2944; UNROLL-NO-IC-NEXT:    [[TMP5]] = and <2 x i32> [[VEC_PHI1]], <i32 4, i32 4>
2945; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
2946; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
2947; UNROLL-NO-IC-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
2948; UNROLL-NO-IC:       middle.block:
2949; UNROLL-NO-IC-NEXT:    [[BIN_RDX:%.*]] = and <2 x i32> [[TMP5]], [[TMP4]]
2950; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]])
2951; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i32 256, 256
2952; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[TMP12:%.*]], label [[SCALAR_PH]]
2953; UNROLL-NO-IC:       scalar.ph:
2954; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[TMP0:%.*]] ]
2955; UNROLL-NO-IC-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[TMP0]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
2956; UNROLL-NO-IC-NEXT:    br label [[TMP8:%.*]]
2957; UNROLL-NO-IC:       8:
2958; UNROLL-NO-IC-NEXT:    [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP9:%.*]], [[TMP8]] ]
2959; UNROLL-NO-IC-NEXT:    [[B_0:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP10:%.*]], [[TMP8]] ]
2960; UNROLL-NO-IC-NEXT:    [[TMP9]] = and i32 [[A_0]], 4
2961; UNROLL-NO-IC-NEXT:    [[TMP10]] = add i8 [[B_0]], -1
2962; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = icmp eq i8 [[TMP10]], 0
2963; UNROLL-NO-IC-NEXT:    br i1 [[TMP11]], label [[TMP12]], label [[TMP8]], !llvm.loop [[LOOP29:![0-9]+]]
2964; UNROLL-NO-IC:       12:
2965; UNROLL-NO-IC-NEXT:    [[DOTLCSSA:%.*]] = phi i32 [ [[TMP9]], [[TMP8]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
2966; UNROLL-NO-IC-NEXT:    ret i32 [[DOTLCSSA]]
2967;
2968; INTERLEAVE-LABEL: @i8_loop(
2969; INTERLEAVE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2970; INTERLEAVE:       vector.ph:
2971; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
2972; INTERLEAVE:       vector.body:
2973; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2974; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
2975; INTERLEAVE-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 256
2976; INTERLEAVE-NEXT:    br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP28:![0-9]+]]
2977; INTERLEAVE:       middle.block:
2978; INTERLEAVE-NEXT:    br i1 true, label [[TMP3:%.*]], label [[SCALAR_PH]]
2979; INTERLEAVE:       scalar.ph:
2980; INTERLEAVE-NEXT:    br label [[TMP2:%.*]]
2981; INTERLEAVE:       2:
2982; INTERLEAVE-NEXT:    br i1 undef, label [[TMP3]], label [[TMP2]], !llvm.loop [[LOOP29:![0-9]+]]
2983; INTERLEAVE:       3:
2984; INTERLEAVE-NEXT:    ret i32 0
2985;
2986  br label %1
2987
2988; <label>:1                                       ; preds = %1, %0
2989  %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]
2990  %b.0 = phi i8 [ 0, %0 ], [ %3, %1 ]
2991  %2 = and i32 %a.0, 4
2992  %3 = add i8 %b.0, -1
2993  %4 = icmp eq i8 %3, 0
2994  br i1 %4, label %5, label %1
2995
2996; <label>:5                                       ; preds = %1
2997  ret i32 %2
2998}
2999
3000
3001define i32 @i16_loop() nounwind readnone ssp uwtable {
3002; CHECK-LABEL: @i16_loop(
3003; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3004; CHECK:       vector.ph:
3005; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
3006; CHECK:       vector.body:
3007; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3008; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
3009; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[INDEX]] to i16
3010; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i16 0, [[TMP1]]
3011; CHECK-NEXT:    [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 0
3012; CHECK-NEXT:    [[TMP3]] = and <2 x i32> [[VEC_PHI]], <i32 4, i32 4>
3013; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
3014; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536
3015; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
3016; CHECK:       middle.block:
3017; CHECK-NEXT:    [[TMP5:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP3]])
3018; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 65536, 65536
3019; CHECK-NEXT:    br i1 [[CMP_N]], label [[TMP10:%.*]], label [[SCALAR_PH]]
3020; CHECK:       scalar.ph:
3021; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[TMP0:%.*]] ]
3022; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[TMP0]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
3023; CHECK-NEXT:    br label [[TMP6:%.*]]
3024; CHECK:       6:
3025; CHECK-NEXT:    [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP7:%.*]], [[TMP6]] ]
3026; CHECK-NEXT:    [[B_0:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP8:%.*]], [[TMP6]] ]
3027; CHECK-NEXT:    [[TMP7]] = and i32 [[A_0]], 4
3028; CHECK-NEXT:    [[TMP8]] = add i16 [[B_0]], -1
3029; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i16 [[TMP8]], 0
3030; CHECK-NEXT:    br i1 [[TMP9]], label [[TMP10]], label [[TMP6]], !llvm.loop [[LOOP31:![0-9]+]]
3031; CHECK:       10:
3032; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi i32 [ [[TMP7]], [[TMP6]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
3033; CHECK-NEXT:    ret i32 [[DOTLCSSA]]
3034;
3035; IND-LABEL: @i16_loop(
3036; IND-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3037; IND:       vector.ph:
3038; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
3039; IND:       vector.body:
3040; IND-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3041; IND-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
3042; IND-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536
3043; IND-NEXT:    br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
3044; IND:       middle.block:
3045; IND-NEXT:    br i1 true, label [[TMP3:%.*]], label [[SCALAR_PH]]
3046; IND:       scalar.ph:
3047; IND-NEXT:    br label [[TMP2:%.*]]
3048; IND:       2:
3049; IND-NEXT:    br i1 undef, label [[TMP3]], label [[TMP2]], !llvm.loop [[LOOP31:![0-9]+]]
3050; IND:       3:
3051; IND-NEXT:    ret i32 0
3052;
3053; UNROLL-LABEL: @i16_loop(
3054; UNROLL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3055; UNROLL:       vector.ph:
3056; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
3057; UNROLL:       vector.body:
3058; UNROLL-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3059; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
3060; UNROLL-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536
3061; UNROLL-NEXT:    br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
3062; UNROLL:       middle.block:
3063; UNROLL-NEXT:    br i1 true, label [[TMP3:%.*]], label [[SCALAR_PH]]
3064; UNROLL:       scalar.ph:
3065; UNROLL-NEXT:    br label [[TMP2:%.*]]
3066; UNROLL:       2:
3067; UNROLL-NEXT:    br i1 undef, label [[TMP3]], label [[TMP2]], !llvm.loop [[LOOP31:![0-9]+]]
3068; UNROLL:       3:
3069; UNROLL-NEXT:    ret i32 0
3070;
3071; UNROLL-NO-IC-LABEL: @i16_loop(
3072; UNROLL-NO-IC-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3073; UNROLL-NO-IC:       vector.ph:
3074; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
3075; UNROLL-NO-IC:       vector.body:
3076; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3077; UNROLL-NO-IC-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
3078; UNROLL-NO-IC-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x i32> [ <i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
3079; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = trunc i32 [[INDEX]] to i16
3080; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = sub i16 0, [[TMP1]]
3081; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add i16 [[OFFSET_IDX]], 0
3082; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add i16 [[OFFSET_IDX]], -2
3083; UNROLL-NO-IC-NEXT:    [[TMP4]] = and <2 x i32> [[VEC_PHI]], <i32 4, i32 4>
3084; UNROLL-NO-IC-NEXT:    [[TMP5]] = and <2 x i32> [[VEC_PHI1]], <i32 4, i32 4>
3085; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
3086; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536
3087; UNROLL-NO-IC-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
3088; UNROLL-NO-IC:       middle.block:
3089; UNROLL-NO-IC-NEXT:    [[BIN_RDX:%.*]] = and <2 x i32> [[TMP5]], [[TMP4]]
3090; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]])
3091; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i32 65536, 65536
3092; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[TMP12:%.*]], label [[SCALAR_PH]]
3093; UNROLL-NO-IC:       scalar.ph:
3094; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[TMP0:%.*]] ]
3095; UNROLL-NO-IC-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[TMP0]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
3096; UNROLL-NO-IC-NEXT:    br label [[TMP8:%.*]]
3097; UNROLL-NO-IC:       8:
3098; UNROLL-NO-IC-NEXT:    [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP9:%.*]], [[TMP8]] ]
3099; UNROLL-NO-IC-NEXT:    [[B_0:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP10:%.*]], [[TMP8]] ]
3100; UNROLL-NO-IC-NEXT:    [[TMP9]] = and i32 [[A_0]], 4
3101; UNROLL-NO-IC-NEXT:    [[TMP10]] = add i16 [[B_0]], -1
3102; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = icmp eq i16 [[TMP10]], 0
3103; UNROLL-NO-IC-NEXT:    br i1 [[TMP11]], label [[TMP12]], label [[TMP8]], !llvm.loop [[LOOP31:![0-9]+]]
3104; UNROLL-NO-IC:       12:
3105; UNROLL-NO-IC-NEXT:    [[DOTLCSSA:%.*]] = phi i32 [ [[TMP9]], [[TMP8]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
3106; UNROLL-NO-IC-NEXT:    ret i32 [[DOTLCSSA]]
3107;
3108; INTERLEAVE-LABEL: @i16_loop(
3109; INTERLEAVE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3110; INTERLEAVE:       vector.ph:
3111; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
3112; INTERLEAVE:       vector.body:
3113; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3114; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
3115; INTERLEAVE-NEXT:    [[TMP1:%.*]] = icmp eq i32 [[INDEX_NEXT]], 65536
3116; INTERLEAVE-NEXT:    br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
3117; INTERLEAVE:       middle.block:
3118; INTERLEAVE-NEXT:    br i1 true, label [[TMP3:%.*]], label [[SCALAR_PH]]
3119; INTERLEAVE:       scalar.ph:
3120; INTERLEAVE-NEXT:    br label [[TMP2:%.*]]
3121; INTERLEAVE:       2:
3122; INTERLEAVE-NEXT:    br i1 undef, label [[TMP3]], label [[TMP2]], !llvm.loop [[LOOP31:![0-9]+]]
3123; INTERLEAVE:       3:
3124; INTERLEAVE-NEXT:    ret i32 0
3125;
3126  br label %1
3127
3128; <label>:1                                       ; preds = %1, %0
3129  %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]
3130  %b.0 = phi i16 [ 0, %0 ], [ %3, %1 ]
3131  %2 = and i32 %a.0, 4
3132  %3 = add i16 %b.0, -1
3133  %4 = icmp eq i16 %3, 0
3134  br i1 %4, label %5, label %1
3135
3136; <label>:5                                       ; preds = %1
3137  ret i32 %2
3138}
3139
3140; This loop has a backedge taken count of i32_max. We need to check for this
3141; condition and branch directly to the scalar loop.
3142
3143
3144
3145define i32 @max_i32_backedgetaken() nounwind readnone ssp uwtable {
3146; CHECK-LABEL: @max_i32_backedgetaken(
3147; CHECK-NEXT:    br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3148; CHECK:       vector.ph:
3149; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
3150; CHECK:       vector.body:
3151; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3152; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP2:%.*]], [[VECTOR_BODY]] ]
3153; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 0, [[INDEX]]
3154; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0
3155; CHECK-NEXT:    [[TMP2]] = and <2 x i32> [[VEC_PHI]], <i32 4, i32 4>
3156; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
3157; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0
3158; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
3159; CHECK:       middle.block:
3160; CHECK-NEXT:    [[TMP4:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP2]])
3161; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 0, 0
3162; CHECK-NEXT:    br i1 [[CMP_N]], label [[TMP9:%.*]], label [[SCALAR_PH]]
3163; CHECK:       scalar.ph:
3164; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[TMP0:%.*]] ]
3165; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[TMP0]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
3166; CHECK-NEXT:    br label [[TMP5:%.*]]
3167; CHECK:       5:
3168; CHECK-NEXT:    [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP6:%.*]], [[TMP5]] ]
3169; CHECK-NEXT:    [[B_0:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP7:%.*]], [[TMP5]] ]
3170; CHECK-NEXT:    [[TMP6]] = and i32 [[A_0]], 4
3171; CHECK-NEXT:    [[TMP7]] = add i32 [[B_0]], -1
3172; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[TMP7]], 0
3173; CHECK-NEXT:    br i1 [[TMP8]], label [[TMP9]], label [[TMP5]], !llvm.loop [[LOOP33:![0-9]+]]
3174; CHECK:       9:
3175; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi i32 [ [[TMP6]], [[TMP5]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
3176; CHECK-NEXT:    ret i32 [[DOTLCSSA]]
3177;
3178; IND-LABEL: @max_i32_backedgetaken(
3179; IND-NEXT:    br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3180; IND:       vector.ph:
3181; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
3182; IND:       vector.body:
3183; IND-NEXT:    br i1 undef, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
3184; IND:       middle.block:
3185; IND-NEXT:    br i1 undef, label [[TMP4:%.*]], label [[SCALAR_PH]]
3186; IND:       scalar.ph:
3187; IND-NEXT:    br label [[TMP1:%.*]]
3188; IND:       1:
3189; IND-NEXT:    [[B_0:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[TMP2:%.*]], [[TMP1]] ]
3190; IND-NEXT:    [[TMP2]] = add i32 [[B_0]], -1
3191; IND-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
3192; IND-NEXT:    br i1 [[TMP3]], label [[TMP4]], label [[TMP1]], !llvm.loop [[LOOP33:![0-9]+]]
3193; IND:       4:
3194; IND-NEXT:    ret i32 0
3195;
3196; UNROLL-LABEL: @max_i32_backedgetaken(
3197; UNROLL-NEXT:    br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3198; UNROLL:       vector.ph:
3199; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
3200; UNROLL:       vector.body:
3201; UNROLL-NEXT:    br i1 undef, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
3202; UNROLL:       middle.block:
3203; UNROLL-NEXT:    br i1 undef, label [[TMP4:%.*]], label [[SCALAR_PH]]
3204; UNROLL:       scalar.ph:
3205; UNROLL-NEXT:    br label [[TMP1:%.*]]
3206; UNROLL:       1:
3207; UNROLL-NEXT:    [[B_0:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[TMP2:%.*]], [[TMP1]] ]
3208; UNROLL-NEXT:    [[TMP2]] = add i32 [[B_0]], -1
3209; UNROLL-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
3210; UNROLL-NEXT:    br i1 [[TMP3]], label [[TMP4]], label [[TMP1]], !llvm.loop [[LOOP33:![0-9]+]]
3211; UNROLL:       4:
3212; UNROLL-NEXT:    ret i32 0
3213;
3214; UNROLL-NO-IC-LABEL: @max_i32_backedgetaken(
3215; UNROLL-NO-IC-NEXT:    br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3216; UNROLL-NO-IC:       vector.ph:
3217; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
3218; UNROLL-NO-IC:       vector.body:
3219; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3220; UNROLL-NO-IC-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ <i32 1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
3221; UNROLL-NO-IC-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x i32> [ <i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
3222; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 0, [[INDEX]]
3223; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0
3224; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], -2
3225; UNROLL-NO-IC-NEXT:    [[TMP3]] = and <2 x i32> [[VEC_PHI]], <i32 4, i32 4>
3226; UNROLL-NO-IC-NEXT:    [[TMP4]] = and <2 x i32> [[VEC_PHI1]], <i32 4, i32 4>
3227; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
3228; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 0
3229; UNROLL-NO-IC-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
3230; UNROLL-NO-IC:       middle.block:
3231; UNROLL-NO-IC-NEXT:    [[BIN_RDX:%.*]] = and <2 x i32> [[TMP4]], [[TMP3]]
3232; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]])
3233; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i32 0, 0
3234; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[TMP11:%.*]], label [[SCALAR_PH]]
3235; UNROLL-NO-IC:       scalar.ph:
3236; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ 0, [[TMP0:%.*]] ]
3237; UNROLL-NO-IC-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 1, [[TMP0]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
3238; UNROLL-NO-IC-NEXT:    br label [[TMP7:%.*]]
3239; UNROLL-NO-IC:       7:
3240; UNROLL-NO-IC-NEXT:    [[A_0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP8:%.*]], [[TMP7]] ]
3241; UNROLL-NO-IC-NEXT:    [[B_0:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[TMP9:%.*]], [[TMP7]] ]
3242; UNROLL-NO-IC-NEXT:    [[TMP8]] = and i32 [[A_0]], 4
3243; UNROLL-NO-IC-NEXT:    [[TMP9]] = add i32 [[B_0]], -1
3244; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[TMP9]], 0
3245; UNROLL-NO-IC-NEXT:    br i1 [[TMP10]], label [[TMP11]], label [[TMP7]], !llvm.loop [[LOOP33:![0-9]+]]
3246; UNROLL-NO-IC:       11:
3247; UNROLL-NO-IC-NEXT:    [[DOTLCSSA:%.*]] = phi i32 [ [[TMP8]], [[TMP7]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
3248; UNROLL-NO-IC-NEXT:    ret i32 [[DOTLCSSA]]
3249;
3250; INTERLEAVE-LABEL: @max_i32_backedgetaken(
3251; INTERLEAVE-NEXT:    br i1 true, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3252; INTERLEAVE:       vector.ph:
3253; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
3254; INTERLEAVE:       vector.body:
3255; INTERLEAVE-NEXT:    br i1 undef, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
3256; INTERLEAVE:       middle.block:
3257; INTERLEAVE-NEXT:    br i1 undef, label [[TMP4:%.*]], label [[SCALAR_PH]]
3258; INTERLEAVE:       scalar.ph:
3259; INTERLEAVE-NEXT:    br label [[TMP1:%.*]]
3260; INTERLEAVE:       1:
3261; INTERLEAVE-NEXT:    [[B_0:%.*]] = phi i32 [ 0, [[SCALAR_PH]] ], [ [[TMP2:%.*]], [[TMP1]] ]
3262; INTERLEAVE-NEXT:    [[TMP2]] = add i32 [[B_0]], -1
3263; INTERLEAVE-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[TMP2]], 0
3264; INTERLEAVE-NEXT:    br i1 [[TMP3]], label [[TMP4]], label [[TMP1]], !llvm.loop [[LOOP33:![0-9]+]]
3265; INTERLEAVE:       4:
3266; INTERLEAVE-NEXT:    ret i32 0
3267;
3268  br label %1
3269
3270; <label>:1                                       ; preds = %1, %0
3271  %a.0 = phi i32 [ 1, %0 ], [ %2, %1 ]
3272  %b.0 = phi i32 [ 0, %0 ], [ %3, %1 ]
3273  %2 = and i32 %a.0, 4
3274  %3 = add i32 %b.0, -1
3275  %4 = icmp eq i32 %3, 0
3276  br i1 %4, label %5, label %1
3277
3278; <label>:5                                       ; preds = %1
3279  ret i32 %2
3280}
3281
3282; When generating the overflow check we must sure that the induction start value
3283; is defined before the branch to the scalar preheader.
3284
3285
3286
3287@e = global i8 1, align 1
3288@d = common global i32 0, align 4
3289@c = common global i32 0, align 4
3290define i32 @testoverflowcheck() {
3291; CHECK-LABEL: @testoverflowcheck(
3292; CHECK-NEXT:  entry:
3293; CHECK-NEXT:    [[DOTPR_I:%.*]] = load i8, i8* @e, align 1
3294; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* @d, align 4
3295; CHECK-NEXT:    [[C_PROMOTED_I:%.*]] = load i32, i32* @c, align 4
3296; CHECK-NEXT:    [[TMP1:%.*]] = sub i8 -1, [[DOTPR_I]]
3297; CHECK-NEXT:    [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
3298; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1
3299; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP3]], 2
3300; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3301; CHECK:       vector.ph:
3302; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP3]], 2
3303; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP3]], [[N_MOD_VF]]
3304; CHECK-NEXT:    [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8
3305; CHECK-NEXT:    [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[CAST_CRD]]
3306; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> <i32 -1, i32 -1>, i32 [[C_PROMOTED_I]], i32 0
3307; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i32 0
3308; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
3309; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
3310; CHECK:       vector.body:
3311; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3312; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP4]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
3313; CHECK-NEXT:    [[TMP5:%.*]] = trunc i32 [[INDEX]] to i8
3314; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[DOTPR_I]], [[TMP5]]
3315; CHECK-NEXT:    [[TMP6:%.*]] = add i8 [[OFFSET_IDX]], 0
3316; CHECK-NEXT:    [[TMP7]] = and <2 x i32> [[BROADCAST_SPLAT]], [[VEC_PHI]]
3317; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
3318; CHECK-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
3319; CHECK-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
3320; CHECK:       middle.block:
3321; CHECK-NEXT:    [[TMP9:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP7]])
3322; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]]
3323; CHECK-NEXT:    br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]]
3324; CHECK:       scalar.ph:
3325; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPR_I]], [[ENTRY:%.*]] ]
3326; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[C_PROMOTED_I]], [[ENTRY]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
3327; CHECK-NEXT:    br label [[COND_END_I:%.*]]
3328; CHECK:       cond.end.i:
3329; CHECK-NEXT:    [[INC4_I:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC_I:%.*]], [[COND_END_I]] ]
3330; CHECK-NEXT:    [[AND3_I:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[AND_I:%.*]], [[COND_END_I]] ]
3331; CHECK-NEXT:    [[AND_I]] = and i32 [[TMP0]], [[AND3_I]]
3332; CHECK-NEXT:    [[INC_I]] = add i8 [[INC4_I]], 1
3333; CHECK-NEXT:    [[TOBOOL_I:%.*]] = icmp eq i8 [[INC_I]], 0
3334; CHECK-NEXT:    br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP35:![0-9]+]]
3335; CHECK:       loopexit:
3336; CHECK-NEXT:    [[AND_I_LCSSA:%.*]] = phi i32 [ [[AND_I]], [[COND_END_I]] ], [ [[TMP9]], [[MIDDLE_BLOCK]] ]
3337; CHECK-NEXT:    ret i32 [[AND_I_LCSSA]]
3338;
3339; IND-LABEL: @testoverflowcheck(
3340; IND-NEXT:  entry:
3341; IND-NEXT:    [[DOTPR_I:%.*]] = load i8, i8* @e, align 1
3342; IND-NEXT:    [[TMP0:%.*]] = load i32, i32* @d, align 4
3343; IND-NEXT:    [[C_PROMOTED_I:%.*]] = load i32, i32* @c, align 4
3344; IND-NEXT:    [[TMP1:%.*]] = xor i8 [[DOTPR_I]], -1
3345; IND-NEXT:    [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
3346; IND-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1
3347; IND-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp eq i8 [[DOTPR_I]], -1
3348; IND-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3349; IND:       vector.ph:
3350; IND-NEXT:    [[N_VEC:%.*]] = and i32 [[TMP3]], 510
3351; IND-NEXT:    [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8
3352; IND-NEXT:    [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[CAST_CRD]]
3353; IND-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> <i32 poison, i32 -1>, i32 [[C_PROMOTED_I]], i64 0
3354; IND-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0
3355; IND-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
3356; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
3357; IND:       vector.body:
3358; IND-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3359; IND-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
3360; IND-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
3361; IND-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
3362; IND:       middle.block:
3363; IND-NEXT:    [[TMP6:%.*]] = and <2 x i32> [[TMP4]], [[BROADCAST_SPLAT]]
3364; IND-NEXT:    [[TMP7:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[TMP6]])
3365; IND-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]]
3366; IND-NEXT:    br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]]
3367; IND:       scalar.ph:
3368; IND-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPR_I]], [[ENTRY:%.*]] ]
3369; IND-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ [[C_PROMOTED_I]], [[ENTRY]] ]
3370; IND-NEXT:    br label [[COND_END_I:%.*]]
3371; IND:       cond.end.i:
3372; IND-NEXT:    [[INC4_I:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC_I:%.*]], [[COND_END_I]] ]
3373; IND-NEXT:    [[TMP8:%.*]] = and i32 [[BC_MERGE_RDX]], [[TMP0]]
3374; IND-NEXT:    [[INC_I]] = add i8 [[INC4_I]], 1
3375; IND-NEXT:    [[TOBOOL_I:%.*]] = icmp eq i8 [[INC_I]], 0
3376; IND-NEXT:    br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP35:![0-9]+]]
3377; IND:       loopexit:
3378; IND-NEXT:    [[AND_I_LCSSA:%.*]] = phi i32 [ [[TMP8]], [[COND_END_I]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
3379; IND-NEXT:    ret i32 [[AND_I_LCSSA]]
3380;
3381; UNROLL-LABEL: @testoverflowcheck(
3382; UNROLL-NEXT:  entry:
3383; UNROLL-NEXT:    [[DOTPR_I:%.*]] = load i8, i8* @e, align 1
3384; UNROLL-NEXT:    [[TMP0:%.*]] = load i32, i32* @d, align 4
3385; UNROLL-NEXT:    [[C_PROMOTED_I:%.*]] = load i32, i32* @c, align 4
3386; UNROLL-NEXT:    [[TMP1:%.*]] = xor i8 [[DOTPR_I]], -1
3387; UNROLL-NEXT:    [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
3388; UNROLL-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1
3389; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ugt i8 [[DOTPR_I]], -4
3390; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3391; UNROLL:       vector.ph:
3392; UNROLL-NEXT:    [[N_VEC:%.*]] = and i32 [[TMP3]], 508
3393; UNROLL-NEXT:    [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8
3394; UNROLL-NEXT:    [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[CAST_CRD]]
3395; UNROLL-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> <i32 poison, i32 -1>, i32 [[C_PROMOTED_I]], i64 0
3396; UNROLL-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0
3397; UNROLL-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i64 0
3398; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
3399; UNROLL:       vector.body:
3400; UNROLL-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3401; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
3402; UNROLL-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
3403; UNROLL-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
3404; UNROLL:       middle.block:
3405; UNROLL-NEXT:    [[TMP6:%.*]] = and <2 x i32> [[BROADCAST_SPLATINSERT2]], [[BROADCAST_SPLATINSERT]]
3406; UNROLL-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i32> [[TMP6]], <2 x i32> poison, <2 x i32> zeroinitializer
3407; UNROLL-NEXT:    [[BIN_RDX:%.*]] = and <2 x i32> [[TMP7]], [[TMP4]]
3408; UNROLL-NEXT:    [[TMP8:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]])
3409; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]]
3410; UNROLL-NEXT:    br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]]
3411; UNROLL:       scalar.ph:
3412; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPR_I]], [[ENTRY:%.*]] ]
3413; UNROLL-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ [[C_PROMOTED_I]], [[ENTRY]] ]
3414; UNROLL-NEXT:    br label [[COND_END_I:%.*]]
3415; UNROLL:       cond.end.i:
3416; UNROLL-NEXT:    [[INC4_I:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC_I:%.*]], [[COND_END_I]] ]
3417; UNROLL-NEXT:    [[TMP9:%.*]] = and i32 [[BC_MERGE_RDX]], [[TMP0]]
3418; UNROLL-NEXT:    [[INC_I]] = add i8 [[INC4_I]], 1
3419; UNROLL-NEXT:    [[TOBOOL_I:%.*]] = icmp eq i8 [[INC_I]], 0
3420; UNROLL-NEXT:    br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP35:![0-9]+]]
3421; UNROLL:       loopexit:
3422; UNROLL-NEXT:    [[AND_I_LCSSA:%.*]] = phi i32 [ [[TMP9]], [[COND_END_I]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ]
3423; UNROLL-NEXT:    ret i32 [[AND_I_LCSSA]]
3424;
3425; UNROLL-NO-IC-LABEL: @testoverflowcheck(
3426; UNROLL-NO-IC-NEXT:  entry:
3427; UNROLL-NO-IC-NEXT:    [[DOTPR_I:%.*]] = load i8, i8* @e, align 1
3428; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = load i32, i32* @d, align 4
3429; UNROLL-NO-IC-NEXT:    [[C_PROMOTED_I:%.*]] = load i32, i32* @c, align 4
3430; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = sub i8 -1, [[DOTPR_I]]
3431; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
3432; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1
3433; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP3]], 4
3434; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3435; UNROLL-NO-IC:       vector.ph:
3436; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP3]], 4
3437; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP3]], [[N_MOD_VF]]
3438; UNROLL-NO-IC-NEXT:    [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8
3439; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[CAST_CRD]]
3440; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> <i32 -1, i32 -1>, i32 [[C_PROMOTED_I]], i32 0
3441; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i32 0
3442; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
3443; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP0]], i32 0
3444; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer
3445; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
3446; UNROLL-NO-IC:       vector.body:
3447; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3448; UNROLL-NO-IC-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP4]], [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
3449; UNROLL-NO-IC-NEXT:    [[VEC_PHI1:%.*]] = phi <2 x i32> [ <i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[VECTOR_BODY]] ]
3450; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = trunc i32 [[INDEX]] to i8
3451; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[DOTPR_I]], [[TMP5]]
3452; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = add i8 [[OFFSET_IDX]], 0
3453; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = add i8 [[OFFSET_IDX]], 2
3454; UNROLL-NO-IC-NEXT:    [[TMP8]] = and <2 x i32> [[BROADCAST_SPLAT]], [[VEC_PHI]]
3455; UNROLL-NO-IC-NEXT:    [[TMP9]] = and <2 x i32> [[BROADCAST_SPLAT3]], [[VEC_PHI1]]
3456; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
3457; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
3458; UNROLL-NO-IC-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
3459; UNROLL-NO-IC:       middle.block:
3460; UNROLL-NO-IC-NEXT:    [[BIN_RDX:%.*]] = and <2 x i32> [[TMP9]], [[TMP8]]
3461; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = call i32 @llvm.vector.reduce.and.v2i32(<2 x i32> [[BIN_RDX]])
3462; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]]
3463; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]]
3464; UNROLL-NO-IC:       scalar.ph:
3465; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPR_I]], [[ENTRY:%.*]] ]
3466; UNROLL-NO-IC-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[C_PROMOTED_I]], [[ENTRY]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
3467; UNROLL-NO-IC-NEXT:    br label [[COND_END_I:%.*]]
3468; UNROLL-NO-IC:       cond.end.i:
3469; UNROLL-NO-IC-NEXT:    [[INC4_I:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC_I:%.*]], [[COND_END_I]] ]
3470; UNROLL-NO-IC-NEXT:    [[AND3_I:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[AND_I:%.*]], [[COND_END_I]] ]
3471; UNROLL-NO-IC-NEXT:    [[AND_I]] = and i32 [[TMP0]], [[AND3_I]]
3472; UNROLL-NO-IC-NEXT:    [[INC_I]] = add i8 [[INC4_I]], 1
3473; UNROLL-NO-IC-NEXT:    [[TOBOOL_I:%.*]] = icmp eq i8 [[INC_I]], 0
3474; UNROLL-NO-IC-NEXT:    br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP35:![0-9]+]]
3475; UNROLL-NO-IC:       loopexit:
3476; UNROLL-NO-IC-NEXT:    [[AND_I_LCSSA:%.*]] = phi i32 [ [[AND_I]], [[COND_END_I]] ], [ [[TMP11]], [[MIDDLE_BLOCK]] ]
3477; UNROLL-NO-IC-NEXT:    ret i32 [[AND_I_LCSSA]]
3478;
3479; INTERLEAVE-LABEL: @testoverflowcheck(
3480; INTERLEAVE-NEXT:  entry:
3481; INTERLEAVE-NEXT:    [[DOTPR_I:%.*]] = load i8, i8* @e, align 1
3482; INTERLEAVE-NEXT:    [[TMP0:%.*]] = load i32, i32* @d, align 4
3483; INTERLEAVE-NEXT:    [[C_PROMOTED_I:%.*]] = load i32, i32* @c, align 4
3484; INTERLEAVE-NEXT:    [[TMP1:%.*]] = xor i8 [[DOTPR_I]], -1
3485; INTERLEAVE-NEXT:    [[TMP2:%.*]] = zext i8 [[TMP1]] to i32
3486; INTERLEAVE-NEXT:    [[TMP3:%.*]] = add nuw nsw i32 [[TMP2]], 1
3487; INTERLEAVE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ugt i8 [[DOTPR_I]], -8
3488; INTERLEAVE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3489; INTERLEAVE:       vector.ph:
3490; INTERLEAVE-NEXT:    [[N_VEC:%.*]] = and i32 [[TMP3]], 504
3491; INTERLEAVE-NEXT:    [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8
3492; INTERLEAVE-NEXT:    [[IND_END:%.*]] = add i8 [[DOTPR_I]], [[CAST_CRD]]
3493; INTERLEAVE-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> <i32 poison, i32 -1, i32 -1, i32 -1>, i32 [[C_PROMOTED_I]], i64 0
3494; INTERLEAVE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
3495; INTERLEAVE-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP0]], i64 0
3496; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
3497; INTERLEAVE:       vector.body:
3498; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3499; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
3500; INTERLEAVE-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
3501; INTERLEAVE-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP34:![0-9]+]]
3502; INTERLEAVE:       middle.block:
3503; INTERLEAVE-NEXT:    [[TMP6:%.*]] = and <4 x i32> [[BROADCAST_SPLATINSERT2]], [[BROADCAST_SPLATINSERT]]
3504; INTERLEAVE-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> poison, <4 x i32> zeroinitializer
3505; INTERLEAVE-NEXT:    [[BIN_RDX:%.*]] = and <4 x i32> [[TMP7]], [[TMP4]]
3506; INTERLEAVE-NEXT:    [[TMP8:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[BIN_RDX]])
3507; INTERLEAVE-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP3]], [[N_VEC]]
3508; INTERLEAVE-NEXT:    br i1 [[CMP_N]], label [[LOOPEXIT:%.*]], label [[SCALAR_PH]]
3509; INTERLEAVE:       scalar.ph:
3510; INTERLEAVE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[DOTPR_I]], [[ENTRY:%.*]] ]
3511; INTERLEAVE-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ [[C_PROMOTED_I]], [[ENTRY]] ]
3512; INTERLEAVE-NEXT:    br label [[COND_END_I:%.*]]
3513; INTERLEAVE:       cond.end.i:
3514; INTERLEAVE-NEXT:    [[INC4_I:%.*]] = phi i8 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC_I:%.*]], [[COND_END_I]] ]
3515; INTERLEAVE-NEXT:    [[TMP9:%.*]] = and i32 [[BC_MERGE_RDX]], [[TMP0]]
3516; INTERLEAVE-NEXT:    [[INC_I]] = add i8 [[INC4_I]], 1
3517; INTERLEAVE-NEXT:    [[TOBOOL_I:%.*]] = icmp eq i8 [[INC_I]], 0
3518; INTERLEAVE-NEXT:    br i1 [[TOBOOL_I]], label [[LOOPEXIT]], label [[COND_END_I]], !llvm.loop [[LOOP35:![0-9]+]]
3519; INTERLEAVE:       loopexit:
3520; INTERLEAVE-NEXT:    [[AND_I_LCSSA:%.*]] = phi i32 [ [[TMP9]], [[COND_END_I]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ]
3521; INTERLEAVE-NEXT:    ret i32 [[AND_I_LCSSA]]
3522;
3523entry:
3524  %.pr.i = load i8, i8* @e, align 1
3525  %0 = load i32, i32* @d, align 4
3526  %c.promoted.i = load i32, i32* @c, align 4
3527  br label %cond.end.i
3528
3529cond.end.i:
3530  %inc4.i = phi i8 [ %.pr.i, %entry ], [ %inc.i, %cond.end.i ]
3531  %and3.i = phi i32 [ %c.promoted.i, %entry ], [ %and.i, %cond.end.i ]
3532  %and.i = and i32 %0, %and3.i
3533  %inc.i = add i8 %inc4.i, 1
3534  %tobool.i = icmp eq i8 %inc.i, 0
3535  br i1 %tobool.i, label %loopexit, label %cond.end.i
3536
3537loopexit:
3538  ret i32 %and.i
3539}
3540
3541; The SCEV expression of %sphi is (zext i8 {%t,+,1}<%loop> to i32)
3542; In order to recognize %sphi as an induction PHI and vectorize this loop,
3543; we need to convert the SCEV expression into an AddRecExpr.
3544; The expression gets converted to {zext i8 %t to i32,+,1}.
3545
3546define void @wrappingindvars1(i8 %t, i32 %len, i32 *%A) {
3547; CHECK-LABEL: @wrappingindvars1(
3548; CHECK-NEXT:  entry:
3549; CHECK-NEXT:    [[ST:%.*]] = zext i8 [[T:%.*]] to i16
3550; CHECK-NEXT:    [[EXT:%.*]] = zext i8 [[T]] to i32
3551; CHECK-NEXT:    [[ECMP:%.*]] = icmp ult i16 [[ST]], 42
3552; CHECK-NEXT:    br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
3553; CHECK:       loop.preheader:
3554; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
3555; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2
3556; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
3557; CHECK:       vector.scevcheck:
3558; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
3559; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[T]], [[TMP1]]
3560; CHECK-NEXT:    [[TMP3:%.*]] = sub i8 [[T]], [[TMP1]]
3561; CHECK-NEXT:    [[TMP4:%.*]] = icmp ugt i8 [[TMP3]], [[T]]
3562; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i8 [[TMP2]], [[T]]
3563; CHECK-NEXT:    [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
3564; CHECK-NEXT:    [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
3565; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
3566; CHECK-NEXT:    [[TMP10:%.*]] = trunc i32 [[LEN]] to i8
3567; CHECK-NEXT:    [[TMP11:%.*]] = add i8 [[T]], [[TMP10]]
3568; CHECK-NEXT:    [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]]
3569; CHECK-NEXT:    [[TMP13:%.*]] = icmp sgt i8 [[TMP12]], [[T]]
3570; CHECK-NEXT:    [[TMP14:%.*]] = icmp slt i8 [[TMP11]], [[T]]
3571; CHECK-NEXT:    [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]]
3572; CHECK-NEXT:    [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255
3573; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
3574; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]]
3575; CHECK-NEXT:    br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
3576; CHECK:       vector.ph:
3577; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
3578; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
3579; CHECK-NEXT:    [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8
3580; CHECK-NEXT:    [[IND_END:%.*]] = add i8 [[T]], [[CAST_CRD]]
3581; CHECK-NEXT:    [[IND_END3:%.*]] = add i32 [[EXT]], [[N_VEC]]
3582; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[T]], i32 0
3583; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer
3584; CHECK-NEXT:    [[INDUCTION:%.*]] = add <2 x i8> [[DOTSPLAT]], <i8 0, i8 1>
3585; CHECK-NEXT:    [[DOTSPLATINSERT4:%.*]] = insertelement <2 x i32> poison, i32 [[EXT]], i32 0
3586; CHECK-NEXT:    [[DOTSPLAT5:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT4]], <2 x i32> poison, <2 x i32> zeroinitializer
3587; CHECK-NEXT:    [[INDUCTION6:%.*]] = add <2 x i32> [[DOTSPLAT5]], <i32 0, i32 1>
3588; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
3589; CHECK:       vector.body:
3590; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3591; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i8> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
3592; CHECK-NEXT:    [[VEC_IND7:%.*]] = phi <2 x i32> [ [[INDUCTION6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], [[VECTOR_BODY]] ]
3593; CHECK-NEXT:    [[TMP19:%.*]] = trunc i32 [[INDEX]] to i8
3594; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[T]], [[TMP19]]
3595; CHECK-NEXT:    [[TMP20:%.*]] = add i8 [[OFFSET_IDX]], 0
3596; CHECK-NEXT:    [[TMP21:%.*]] = add i8 [[OFFSET_IDX]], 1
3597; CHECK-NEXT:    [[TMP22:%.*]] = add i32 [[INDEX]], 0
3598; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i8 [[TMP20]]
3599; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 0
3600; CHECK-NEXT:    [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <2 x i32>*
3601; CHECK-NEXT:    store <2 x i32> [[VEC_IND7]], <2 x i32>* [[TMP25]], align 4
3602; CHECK-NEXT:    [[TMP26:%.*]] = add <2 x i8> [[VEC_IND]], <i8 1, i8 1>
3603; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
3604; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i8> [[VEC_IND]], <i8 2, i8 2>
3605; CHECK-NEXT:    [[VEC_IND_NEXT8]] = add <2 x i32> [[VEC_IND7]], <i32 2, i32 2>
3606; CHECK-NEXT:    [[TMP27:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
3607; CHECK-NEXT:    br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
3608; CHECK:       middle.block:
3609; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
3610; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
3611; CHECK:       scalar.ph:
3612; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
3613; CHECK-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
3614; CHECK-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ]
3615; CHECK-NEXT:    br label [[LOOP:%.*]]
3616; CHECK:       loop:
3617; CHECK-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
3618; CHECK-NEXT:    [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
3619; CHECK-NEXT:    [[SPHI:%.*]] = phi i32 [ [[IDX_INC_EXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
3620; CHECK-NEXT:    [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i8 [[IDX]]
3621; CHECK-NEXT:    store i32 [[SPHI]], i32* [[PTR]], align 4
3622; CHECK-NEXT:    [[IDX_INC]] = add i8 [[IDX]], 1
3623; CHECK-NEXT:    [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32
3624; CHECK-NEXT:    [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
3625; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
3626; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]]
3627; CHECK:       exit.loopexit:
3628; CHECK-NEXT:    br label [[EXIT]]
3629; CHECK:       exit:
3630; CHECK-NEXT:    ret void
3631;
3632; IND-LABEL: @wrappingindvars1(
3633; IND-NEXT:  entry:
3634; IND-NEXT:    [[EXT:%.*]] = zext i8 [[T:%.*]] to i32
3635; IND-NEXT:    [[ECMP:%.*]] = icmp ult i8 [[T]], 42
3636; IND-NEXT:    br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
3637; IND:       loop.preheader:
3638; IND-NEXT:    [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
3639; IND-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2
3640; IND-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
3641; IND:       vector.scevcheck:
3642; IND-NEXT:    [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
3643; IND-NEXT:    [[TMP2:%.*]] = xor i8 [[T]], -1
3644; IND-NEXT:    [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]]
3645; IND-NEXT:    [[TMP4:%.*]] = icmp ugt i32 [[LEN]], 255
3646; IND-NEXT:    [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
3647; IND-NEXT:    [[TMP6:%.*]] = trunc i32 [[LEN]] to i8
3648; IND-NEXT:    [[TMP7:%.*]] = add i8 [[TMP6]], [[T]]
3649; IND-NEXT:    [[TMP8:%.*]] = icmp slt i8 [[TMP7]], [[T]]
3650; IND-NEXT:    [[TMP9:%.*]] = icmp ugt i32 [[LEN]], 255
3651; IND-NEXT:    [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
3652; IND-NEXT:    [[TMP11:%.*]] = or i1 [[TMP5]], [[TMP10]]
3653; IND-NEXT:    br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
3654; IND:       vector.ph:
3655; IND-NEXT:    [[N_VEC:%.*]] = and i32 [[TMP0]], -2
3656; IND-NEXT:    [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8
3657; IND-NEXT:    [[IND_END:%.*]] = add i8 [[CAST_CRD]], [[T]]
3658; IND-NEXT:    [[IND_END3:%.*]] = add i32 [[N_VEC]], [[EXT]]
3659; IND-NEXT:    [[DOTSPLATINSERT4:%.*]] = insertelement <2 x i32> poison, i32 [[EXT]], i64 0
3660; IND-NEXT:    [[DOTSPLAT5:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT4]], <2 x i32> poison, <2 x i32> zeroinitializer
3661; IND-NEXT:    [[INDUCTION6:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT5]], <i32 0, i32 1>
3662; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
3663; IND:       vector.body:
3664; IND-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3665; IND-NEXT:    [[VEC_IND7:%.*]] = phi <2 x i32> [ [[INDUCTION6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], [[VECTOR_BODY]] ]
3666; IND-NEXT:    [[TMP12:%.*]] = trunc i32 [[INDEX]] to i8
3667; IND-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[TMP12]], [[T]]
3668; IND-NEXT:    [[TMP13:%.*]] = sext i8 [[OFFSET_IDX]] to i64
3669; IND-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP13]]
3670; IND-NEXT:    [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <2 x i32>*
3671; IND-NEXT:    store <2 x i32> [[VEC_IND7]], <2 x i32>* [[TMP15]], align 4
3672; IND-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
3673; IND-NEXT:    [[VEC_IND_NEXT8]] = add <2 x i32> [[VEC_IND7]], <i32 2, i32 2>
3674; IND-NEXT:    [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
3675; IND-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
3676; IND:       middle.block:
3677; IND-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
3678; IND-NEXT:    br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
3679; IND:       scalar.ph:
3680; IND-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
3681; IND-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
3682; IND-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ]
3683; IND-NEXT:    br label [[LOOP:%.*]]
3684; IND:       loop:
3685; IND-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
3686; IND-NEXT:    [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
3687; IND-NEXT:    [[SPHI:%.*]] = phi i32 [ [[IDX_INC_EXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
3688; IND-NEXT:    [[TMP17:%.*]] = sext i8 [[IDX]] to i64
3689; IND-NEXT:    [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP17]]
3690; IND-NEXT:    store i32 [[SPHI]], i32* [[PTR]], align 4
3691; IND-NEXT:    [[IDX_INC]] = add i8 [[IDX]], 1
3692; IND-NEXT:    [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32
3693; IND-NEXT:    [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
3694; IND-NEXT:    [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
3695; IND-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]]
3696; IND:       exit.loopexit:
3697; IND-NEXT:    br label [[EXIT]]
3698; IND:       exit:
3699; IND-NEXT:    ret void
3700;
3701; UNROLL-LABEL: @wrappingindvars1(
3702; UNROLL-NEXT:  entry:
3703; UNROLL-NEXT:    [[EXT:%.*]] = zext i8 [[T:%.*]] to i32
3704; UNROLL-NEXT:    [[ECMP:%.*]] = icmp ult i8 [[T]], 42
3705; UNROLL-NEXT:    br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
3706; UNROLL:       loop.preheader:
3707; UNROLL-NEXT:    [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
3708; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4
3709; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
3710; UNROLL:       vector.scevcheck:
3711; UNROLL-NEXT:    [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
3712; UNROLL-NEXT:    [[TMP2:%.*]] = xor i8 [[T]], -1
3713; UNROLL-NEXT:    [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]]
3714; UNROLL-NEXT:    [[TMP4:%.*]] = icmp ugt i32 [[LEN]], 255
3715; UNROLL-NEXT:    [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
3716; UNROLL-NEXT:    [[TMP6:%.*]] = trunc i32 [[LEN]] to i8
3717; UNROLL-NEXT:    [[TMP7:%.*]] = add i8 [[TMP6]], [[T]]
3718; UNROLL-NEXT:    [[TMP8:%.*]] = icmp slt i8 [[TMP7]], [[T]]
3719; UNROLL-NEXT:    [[TMP9:%.*]] = icmp ugt i32 [[LEN]], 255
3720; UNROLL-NEXT:    [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
3721; UNROLL-NEXT:    [[TMP11:%.*]] = or i1 [[TMP5]], [[TMP10]]
3722; UNROLL-NEXT:    br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
3723; UNROLL:       vector.ph:
3724; UNROLL-NEXT:    [[N_VEC:%.*]] = and i32 [[TMP0]], -4
3725; UNROLL-NEXT:    [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8
3726; UNROLL-NEXT:    [[IND_END:%.*]] = add i8 [[CAST_CRD]], [[T]]
3727; UNROLL-NEXT:    [[IND_END3:%.*]] = add i32 [[N_VEC]], [[EXT]]
3728; UNROLL-NEXT:    [[DOTSPLATINSERT5:%.*]] = insertelement <2 x i32> poison, i32 [[EXT]], i64 0
3729; UNROLL-NEXT:    [[DOTSPLAT6:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT5]], <2 x i32> poison, <2 x i32> zeroinitializer
3730; UNROLL-NEXT:    [[INDUCTION7:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT6]], <i32 0, i32 1>
3731; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
3732; UNROLL:       vector.body:
3733; UNROLL-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3734; UNROLL-NEXT:    [[VEC_IND8:%.*]] = phi <2 x i32> [ [[INDUCTION7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ]
3735; UNROLL-NEXT:    [[TMP12:%.*]] = trunc i32 [[INDEX]] to i8
3736; UNROLL-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[TMP12]], [[T]]
3737; UNROLL-NEXT:    [[STEP_ADD9:%.*]] = add <2 x i32> [[VEC_IND8]], <i32 2, i32 2>
3738; UNROLL-NEXT:    [[TMP13:%.*]] = sext i8 [[OFFSET_IDX]] to i64
3739; UNROLL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP13]]
3740; UNROLL-NEXT:    [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <2 x i32>*
3741; UNROLL-NEXT:    store <2 x i32> [[VEC_IND8]], <2 x i32>* [[TMP15]], align 4
3742; UNROLL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i64 2
3743; UNROLL-NEXT:    [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <2 x i32>*
3744; UNROLL-NEXT:    store <2 x i32> [[STEP_ADD9]], <2 x i32>* [[TMP17]], align 4
3745; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
3746; UNROLL-NEXT:    [[VEC_IND_NEXT11]] = add <2 x i32> [[VEC_IND8]], <i32 4, i32 4>
3747; UNROLL-NEXT:    [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
3748; UNROLL-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
3749; UNROLL:       middle.block:
3750; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
3751; UNROLL-NEXT:    br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
3752; UNROLL:       scalar.ph:
3753; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
3754; UNROLL-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
3755; UNROLL-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ]
3756; UNROLL-NEXT:    br label [[LOOP:%.*]]
3757; UNROLL:       loop:
3758; UNROLL-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
3759; UNROLL-NEXT:    [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
3760; UNROLL-NEXT:    [[SPHI:%.*]] = phi i32 [ [[IDX_INC_EXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
3761; UNROLL-NEXT:    [[TMP19:%.*]] = sext i8 [[IDX]] to i64
3762; UNROLL-NEXT:    [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]]
3763; UNROLL-NEXT:    store i32 [[SPHI]], i32* [[PTR]], align 4
3764; UNROLL-NEXT:    [[IDX_INC]] = add i8 [[IDX]], 1
3765; UNROLL-NEXT:    [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32
3766; UNROLL-NEXT:    [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
3767; UNROLL-NEXT:    [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
3768; UNROLL-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]]
3769; UNROLL:       exit.loopexit:
3770; UNROLL-NEXT:    br label [[EXIT]]
3771; UNROLL:       exit:
3772; UNROLL-NEXT:    ret void
3773;
3774; UNROLL-NO-IC-LABEL: @wrappingindvars1(
3775; UNROLL-NO-IC-NEXT:  entry:
3776; UNROLL-NO-IC-NEXT:    [[ST:%.*]] = zext i8 [[T:%.*]] to i16
3777; UNROLL-NO-IC-NEXT:    [[EXT:%.*]] = zext i8 [[T]] to i32
3778; UNROLL-NO-IC-NEXT:    [[ECMP:%.*]] = icmp ult i16 [[ST]], 42
3779; UNROLL-NO-IC-NEXT:    br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
3780; UNROLL-NO-IC:       loop.preheader:
3781; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
3782; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4
3783; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
3784; UNROLL-NO-IC:       vector.scevcheck:
3785; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
3786; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add i8 [[T]], [[TMP1]]
3787; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = sub i8 [[T]], [[TMP1]]
3788; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = icmp ugt i8 [[TMP3]], [[T]]
3789; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = icmp ult i8 [[TMP2]], [[T]]
3790; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
3791; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
3792; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
3793; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = trunc i32 [[LEN]] to i8
3794; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = add i8 [[T]], [[TMP10]]
3795; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]]
3796; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = icmp sgt i8 [[TMP12]], [[T]]
3797; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = icmp slt i8 [[TMP11]], [[T]]
3798; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]]
3799; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255
3800; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
3801; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]]
3802; UNROLL-NO-IC-NEXT:    br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
3803; UNROLL-NO-IC:       vector.ph:
3804; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4
3805; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
3806; UNROLL-NO-IC-NEXT:    [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8
3807; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = add i8 [[T]], [[CAST_CRD]]
3808; UNROLL-NO-IC-NEXT:    [[IND_END3:%.*]] = add i32 [[EXT]], [[N_VEC]]
3809; UNROLL-NO-IC-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[T]], i32 0
3810; UNROLL-NO-IC-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer
3811; UNROLL-NO-IC-NEXT:    [[INDUCTION:%.*]] = add <2 x i8> [[DOTSPLAT]], <i8 0, i8 1>
3812; UNROLL-NO-IC-NEXT:    [[DOTSPLATINSERT5:%.*]] = insertelement <2 x i32> poison, i32 [[EXT]], i32 0
3813; UNROLL-NO-IC-NEXT:    [[DOTSPLAT6:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT5]], <2 x i32> poison, <2 x i32> zeroinitializer
3814; UNROLL-NO-IC-NEXT:    [[INDUCTION7:%.*]] = add <2 x i32> [[DOTSPLAT6]], <i32 0, i32 1>
3815; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
3816; UNROLL-NO-IC:       vector.body:
3817; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3818; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <2 x i8> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
3819; UNROLL-NO-IC-NEXT:    [[VEC_IND8:%.*]] = phi <2 x i32> [ [[INDUCTION7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ]
3820; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i8> [[VEC_IND]], <i8 2, i8 2>
3821; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = trunc i32 [[INDEX]] to i8
3822; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[T]], [[TMP19]]
3823; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = add i8 [[OFFSET_IDX]], 0
3824; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = add i8 [[OFFSET_IDX]], 1
3825; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = add i8 [[OFFSET_IDX]], 2
3826; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = add i8 [[OFFSET_IDX]], 3
3827; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = add i32 [[INDEX]], 0
3828; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = add i32 [[INDEX]], 2
3829; UNROLL-NO-IC-NEXT:    [[STEP_ADD9:%.*]] = add <2 x i32> [[VEC_IND8]], <i32 2, i32 2>
3830; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i8 [[TMP20]]
3831; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[A]], i8 [[TMP22]]
3832; UNROLL-NO-IC-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 0
3833; UNROLL-NO-IC-NEXT:    [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <2 x i32>*
3834; UNROLL-NO-IC-NEXT:    store <2 x i32> [[VEC_IND8]], <2 x i32>* [[TMP29]], align 4
3835; UNROLL-NO-IC-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 2
3836; UNROLL-NO-IC-NEXT:    [[TMP31:%.*]] = bitcast i32* [[TMP30]] to <2 x i32>*
3837; UNROLL-NO-IC-NEXT:    store <2 x i32> [[STEP_ADD9]], <2 x i32>* [[TMP31]], align 4
3838; UNROLL-NO-IC-NEXT:    [[TMP32:%.*]] = add <2 x i8> [[VEC_IND]], <i8 1, i8 1>
3839; UNROLL-NO-IC-NEXT:    [[TMP33:%.*]] = add <2 x i8> [[STEP_ADD]], <i8 1, i8 1>
3840; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
3841; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i8> [[STEP_ADD]], <i8 2, i8 2>
3842; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT11]] = add <2 x i32> [[STEP_ADD9]], <i32 2, i32 2>
3843; UNROLL-NO-IC-NEXT:    [[TMP34:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
3844; UNROLL-NO-IC-NEXT:    br i1 [[TMP34]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
3845; UNROLL-NO-IC:       middle.block:
3846; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
3847; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
3848; UNROLL-NO-IC:       scalar.ph:
3849; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
3850; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
3851; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ]
3852; UNROLL-NO-IC-NEXT:    br label [[LOOP:%.*]]
3853; UNROLL-NO-IC:       loop:
3854; UNROLL-NO-IC-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
3855; UNROLL-NO-IC-NEXT:    [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
3856; UNROLL-NO-IC-NEXT:    [[SPHI:%.*]] = phi i32 [ [[IDX_INC_EXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
3857; UNROLL-NO-IC-NEXT:    [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i8 [[IDX]]
3858; UNROLL-NO-IC-NEXT:    store i32 [[SPHI]], i32* [[PTR]], align 4
3859; UNROLL-NO-IC-NEXT:    [[IDX_INC]] = add i8 [[IDX]], 1
3860; UNROLL-NO-IC-NEXT:    [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32
3861; UNROLL-NO-IC-NEXT:    [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
3862; UNROLL-NO-IC-NEXT:    [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
3863; UNROLL-NO-IC-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]]
3864; UNROLL-NO-IC:       exit.loopexit:
3865; UNROLL-NO-IC-NEXT:    br label [[EXIT]]
3866; UNROLL-NO-IC:       exit:
3867; UNROLL-NO-IC-NEXT:    ret void
3868;
3869; INTERLEAVE-LABEL: @wrappingindvars1(
3870; INTERLEAVE-NEXT:  entry:
3871; INTERLEAVE-NEXT:    [[EXT:%.*]] = zext i8 [[T:%.*]] to i32
3872; INTERLEAVE-NEXT:    [[ECMP:%.*]] = icmp ult i8 [[T]], 42
3873; INTERLEAVE-NEXT:    br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
3874; INTERLEAVE:       loop.preheader:
3875; INTERLEAVE-NEXT:    [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
3876; INTERLEAVE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 8
3877; INTERLEAVE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
3878; INTERLEAVE:       vector.scevcheck:
3879; INTERLEAVE-NEXT:    [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
3880; INTERLEAVE-NEXT:    [[TMP2:%.*]] = xor i8 [[T]], -1
3881; INTERLEAVE-NEXT:    [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]]
3882; INTERLEAVE-NEXT:    [[TMP4:%.*]] = icmp ugt i32 [[LEN]], 255
3883; INTERLEAVE-NEXT:    [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
3884; INTERLEAVE-NEXT:    [[TMP6:%.*]] = trunc i32 [[LEN]] to i8
3885; INTERLEAVE-NEXT:    [[TMP7:%.*]] = add i8 [[TMP6]], [[T]]
3886; INTERLEAVE-NEXT:    [[TMP8:%.*]] = icmp slt i8 [[TMP7]], [[T]]
3887; INTERLEAVE-NEXT:    [[TMP9:%.*]] = icmp ugt i32 [[LEN]], 255
3888; INTERLEAVE-NEXT:    [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
3889; INTERLEAVE-NEXT:    [[TMP11:%.*]] = or i1 [[TMP5]], [[TMP10]]
3890; INTERLEAVE-NEXT:    br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
3891; INTERLEAVE:       vector.ph:
3892; INTERLEAVE-NEXT:    [[N_VEC:%.*]] = and i32 [[TMP0]], -8
3893; INTERLEAVE-NEXT:    [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8
3894; INTERLEAVE-NEXT:    [[IND_END:%.*]] = add i8 [[CAST_CRD]], [[T]]
3895; INTERLEAVE-NEXT:    [[IND_END3:%.*]] = add i32 [[N_VEC]], [[EXT]]
3896; INTERLEAVE-NEXT:    [[DOTSPLATINSERT5:%.*]] = insertelement <4 x i32> poison, i32 [[EXT]], i64 0
3897; INTERLEAVE-NEXT:    [[DOTSPLAT6:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT5]], <4 x i32> poison, <4 x i32> zeroinitializer
3898; INTERLEAVE-NEXT:    [[INDUCTION7:%.*]] = add nuw nsw <4 x i32> [[DOTSPLAT6]], <i32 0, i32 1, i32 2, i32 3>
3899; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
3900; INTERLEAVE:       vector.body:
3901; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3902; INTERLEAVE-NEXT:    [[VEC_IND8:%.*]] = phi <4 x i32> [ [[INDUCTION7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ]
3903; INTERLEAVE-NEXT:    [[TMP12:%.*]] = trunc i32 [[INDEX]] to i8
3904; INTERLEAVE-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[TMP12]], [[T]]
3905; INTERLEAVE-NEXT:    [[STEP_ADD9:%.*]] = add <4 x i32> [[VEC_IND8]], <i32 4, i32 4, i32 4, i32 4>
3906; INTERLEAVE-NEXT:    [[TMP13:%.*]] = sext i8 [[OFFSET_IDX]] to i64
3907; INTERLEAVE-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP13]]
3908; INTERLEAVE-NEXT:    [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <4 x i32>*
3909; INTERLEAVE-NEXT:    store <4 x i32> [[VEC_IND8]], <4 x i32>* [[TMP15]], align 4
3910; INTERLEAVE-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i64 4
3911; INTERLEAVE-NEXT:    [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>*
3912; INTERLEAVE-NEXT:    store <4 x i32> [[STEP_ADD9]], <4 x i32>* [[TMP17]], align 4
3913; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
3914; INTERLEAVE-NEXT:    [[VEC_IND_NEXT11]] = add <4 x i32> [[VEC_IND8]], <i32 8, i32 8, i32 8, i32 8>
3915; INTERLEAVE-NEXT:    [[TMP18:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
3916; INTERLEAVE-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP36:![0-9]+]]
3917; INTERLEAVE:       middle.block:
3918; INTERLEAVE-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
3919; INTERLEAVE-NEXT:    br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
3920; INTERLEAVE:       scalar.ph:
3921; INTERLEAVE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
3922; INTERLEAVE-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
3923; INTERLEAVE-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi i32 [ [[IND_END3]], [[MIDDLE_BLOCK]] ], [ [[EXT]], [[LOOP_PREHEADER]] ], [ [[EXT]], [[VECTOR_SCEVCHECK]] ]
3924; INTERLEAVE-NEXT:    br label [[LOOP:%.*]]
3925; INTERLEAVE:       loop:
3926; INTERLEAVE-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
3927; INTERLEAVE-NEXT:    [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
3928; INTERLEAVE-NEXT:    [[SPHI:%.*]] = phi i32 [ [[IDX_INC_EXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ]
3929; INTERLEAVE-NEXT:    [[TMP19:%.*]] = sext i8 [[IDX]] to i64
3930; INTERLEAVE-NEXT:    [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP19]]
3931; INTERLEAVE-NEXT:    store i32 [[SPHI]], i32* [[PTR]], align 4
3932; INTERLEAVE-NEXT:    [[IDX_INC]] = add i8 [[IDX]], 1
3933; INTERLEAVE-NEXT:    [[IDX_INC_EXT]] = zext i8 [[IDX_INC]] to i32
3934; INTERLEAVE-NEXT:    [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
3935; INTERLEAVE-NEXT:    [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
3936; INTERLEAVE-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP37:![0-9]+]]
3937; INTERLEAVE:       exit.loopexit:
3938; INTERLEAVE-NEXT:    br label [[EXIT]]
3939; INTERLEAVE:       exit:
3940; INTERLEAVE-NEXT:    ret void
3941;
3942  entry:
3943  %st = zext i8 %t to i16
3944  %ext = zext i8 %t to i32
3945  %ecmp = icmp ult i16 %st, 42
3946  br i1 %ecmp, label %loop, label %exit
3947
3948  loop:
3949
3950  %idx = phi i8 [ %t, %entry ], [ %idx.inc, %loop ]
3951  %idx.b = phi i32 [ 0, %entry ], [ %idx.b.inc, %loop ]
3952  %sphi = phi i32 [ %ext, %entry ], [%idx.inc.ext, %loop]
3953
3954  %ptr = getelementptr inbounds i32, i32* %A, i8 %idx
3955  store i32 %sphi, i32* %ptr
3956
3957  %idx.inc = add i8 %idx, 1
3958  %idx.inc.ext = zext i8 %idx.inc to i32
3959  %idx.b.inc = add nuw nsw i32 %idx.b, 1
3960
3961  %c = icmp ult i32 %idx.b, %len
3962  br i1 %c, label %loop, label %exit
3963
3964  exit:
3965  ret void
3966}
3967
3968; The SCEV expression of %sphi is (4 * (zext i8 {%t,+,1}<%loop> to i32))
3969; In order to recognize %sphi as an induction PHI and vectorize this loop,
3970; we need to convert the SCEV expression into an AddRecExpr.
3971; The expression gets converted to ({4 * (zext %t to i32),+,4}).
3972define void @wrappingindvars2(i8 %t, i32 %len, i32 *%A) {
3973; CHECK-LABEL: @wrappingindvars2(
3974; CHECK-NEXT:  entry:
3975; CHECK-NEXT:    [[ST:%.*]] = zext i8 [[T:%.*]] to i16
3976; CHECK-NEXT:    [[EXT:%.*]] = zext i8 [[T]] to i32
3977; CHECK-NEXT:    [[EXT_MUL:%.*]] = mul i32 [[EXT]], 4
3978; CHECK-NEXT:    [[ECMP:%.*]] = icmp ult i16 [[ST]], 42
3979; CHECK-NEXT:    br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
3980; CHECK:       loop.preheader:
3981; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
3982; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2
3983; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
3984; CHECK:       vector.scevcheck:
3985; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
3986; CHECK-NEXT:    [[TMP2:%.*]] = add i8 [[T]], [[TMP1]]
3987; CHECK-NEXT:    [[TMP3:%.*]] = sub i8 [[T]], [[TMP1]]
3988; CHECK-NEXT:    [[TMP4:%.*]] = icmp ugt i8 [[TMP3]], [[T]]
3989; CHECK-NEXT:    [[TMP5:%.*]] = icmp ult i8 [[TMP2]], [[T]]
3990; CHECK-NEXT:    [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
3991; CHECK-NEXT:    [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
3992; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
3993; CHECK-NEXT:    [[TMP10:%.*]] = trunc i32 [[LEN]] to i8
3994; CHECK-NEXT:    [[TMP11:%.*]] = add i8 [[T]], [[TMP10]]
3995; CHECK-NEXT:    [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]]
3996; CHECK-NEXT:    [[TMP13:%.*]] = icmp sgt i8 [[TMP12]], [[T]]
3997; CHECK-NEXT:    [[TMP14:%.*]] = icmp slt i8 [[TMP11]], [[T]]
3998; CHECK-NEXT:    [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]]
3999; CHECK-NEXT:    [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255
4000; CHECK-NEXT:    [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
4001; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]]
4002; CHECK-NEXT:    br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
4003; CHECK:       vector.ph:
4004; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
4005; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
4006; CHECK-NEXT:    [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8
4007; CHECK-NEXT:    [[IND_END:%.*]] = add i8 [[T]], [[CAST_CRD]]
4008; CHECK-NEXT:    [[TMP19:%.*]] = mul i32 [[N_VEC]], 4
4009; CHECK-NEXT:    [[IND_END2:%.*]] = add i32 [[EXT_MUL]], [[TMP19]]
4010; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[T]], i32 0
4011; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer
4012; CHECK-NEXT:    [[INDUCTION:%.*]] = add <2 x i8> [[DOTSPLAT]], <i8 0, i8 1>
4013; CHECK-NEXT:    [[DOTSPLATINSERT4:%.*]] = insertelement <2 x i32> poison, i32 [[EXT_MUL]], i32 0
4014; CHECK-NEXT:    [[DOTSPLAT5:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT4]], <2 x i32> poison, <2 x i32> zeroinitializer
4015; CHECK-NEXT:    [[INDUCTION6:%.*]] = add <2 x i32> [[DOTSPLAT5]], <i32 0, i32 4>
4016; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
4017; CHECK:       vector.body:
4018; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4019; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i8> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4020; CHECK-NEXT:    [[VEC_IND7:%.*]] = phi <2 x i32> [ [[INDUCTION6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], [[VECTOR_BODY]] ]
4021; CHECK-NEXT:    [[TMP20:%.*]] = trunc i32 [[INDEX]] to i8
4022; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[T]], [[TMP20]]
4023; CHECK-NEXT:    [[TMP21:%.*]] = add i8 [[OFFSET_IDX]], 0
4024; CHECK-NEXT:    [[TMP22:%.*]] = add i8 [[OFFSET_IDX]], 1
4025; CHECK-NEXT:    [[TMP23:%.*]] = add i32 [[INDEX]], 0
4026; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i8 [[TMP21]]
4027; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[TMP24]], i32 0
4028; CHECK-NEXT:    [[TMP26:%.*]] = bitcast i32* [[TMP25]] to <2 x i32>*
4029; CHECK-NEXT:    store <2 x i32> [[VEC_IND7]], <2 x i32>* [[TMP26]], align 4
4030; CHECK-NEXT:    [[TMP27:%.*]] = add <2 x i8> [[VEC_IND]], <i8 1, i8 1>
4031; CHECK-NEXT:    [[TMP28:%.*]] = zext <2 x i8> [[TMP27]] to <2 x i32>
4032; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
4033; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i8> [[VEC_IND]], <i8 2, i8 2>
4034; CHECK-NEXT:    [[VEC_IND_NEXT8]] = add <2 x i32> [[VEC_IND7]], <i32 8, i32 8>
4035; CHECK-NEXT:    [[TMP29:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4036; CHECK-NEXT:    br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
4037; CHECK:       middle.block:
4038; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
4039; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
4040; CHECK:       scalar.ph:
4041; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
4042; CHECK-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ]
4043; CHECK-NEXT:    [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
4044; CHECK-NEXT:    br label [[LOOP:%.*]]
4045; CHECK:       loop:
4046; CHECK-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4047; CHECK-NEXT:    [[SPHI:%.*]] = phi i32 [ [[MUL:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
4048; CHECK-NEXT:    [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
4049; CHECK-NEXT:    [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i8 [[IDX]]
4050; CHECK-NEXT:    store i32 [[SPHI]], i32* [[PTR]], align 4
4051; CHECK-NEXT:    [[IDX_INC]] = add i8 [[IDX]], 1
4052; CHECK-NEXT:    [[IDX_INC_EXT:%.*]] = zext i8 [[IDX_INC]] to i32
4053; CHECK-NEXT:    [[MUL]] = mul i32 [[IDX_INC_EXT]], 4
4054; CHECK-NEXT:    [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
4055; CHECK-NEXT:    [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
4056; CHECK-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP39:![0-9]+]]
4057; CHECK:       exit.loopexit:
4058; CHECK-NEXT:    br label [[EXIT]]
4059; CHECK:       exit:
4060; CHECK-NEXT:    ret void
4061;
4062; IND-LABEL: @wrappingindvars2(
4063; IND-NEXT:  entry:
4064; IND-NEXT:    [[EXT:%.*]] = zext i8 [[T:%.*]] to i32
4065; IND-NEXT:    [[EXT_MUL:%.*]] = shl nuw nsw i32 [[EXT]], 2
4066; IND-NEXT:    [[ECMP:%.*]] = icmp ult i8 [[T]], 42
4067; IND-NEXT:    br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
4068; IND:       loop.preheader:
4069; IND-NEXT:    [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
4070; IND-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2
4071; IND-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
4072; IND:       vector.scevcheck:
4073; IND-NEXT:    [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
4074; IND-NEXT:    [[TMP2:%.*]] = xor i8 [[T]], -1
4075; IND-NEXT:    [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]]
4076; IND-NEXT:    [[TMP4:%.*]] = icmp ugt i32 [[LEN]], 255
4077; IND-NEXT:    [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
4078; IND-NEXT:    [[TMP6:%.*]] = trunc i32 [[LEN]] to i8
4079; IND-NEXT:    [[TMP7:%.*]] = add i8 [[TMP6]], [[T]]
4080; IND-NEXT:    [[TMP8:%.*]] = icmp slt i8 [[TMP7]], [[T]]
4081; IND-NEXT:    [[TMP9:%.*]] = icmp ugt i32 [[LEN]], 255
4082; IND-NEXT:    [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
4083; IND-NEXT:    [[TMP11:%.*]] = or i1 [[TMP5]], [[TMP10]]
4084; IND-NEXT:    br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
4085; IND:       vector.ph:
4086; IND-NEXT:    [[N_VEC:%.*]] = and i32 [[TMP0]], -2
4087; IND-NEXT:    [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8
4088; IND-NEXT:    [[IND_END:%.*]] = add i8 [[CAST_CRD]], [[T]]
4089; IND-NEXT:    [[TMP12:%.*]] = add i32 [[N_VEC]], [[EXT]]
4090; IND-NEXT:    [[IND_END2:%.*]] = shl i32 [[TMP12]], 2
4091; IND-NEXT:    [[DOTSPLATINSERT4:%.*]] = insertelement <2 x i32> poison, i32 [[EXT_MUL]], i64 0
4092; IND-NEXT:    [[DOTSPLAT5:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT4]], <2 x i32> poison, <2 x i32> zeroinitializer
4093; IND-NEXT:    [[INDUCTION6:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT5]], <i32 0, i32 4>
4094; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
4095; IND:       vector.body:
4096; IND-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4097; IND-NEXT:    [[VEC_IND7:%.*]] = phi <2 x i32> [ [[INDUCTION6]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT8:%.*]], [[VECTOR_BODY]] ]
4098; IND-NEXT:    [[TMP13:%.*]] = trunc i32 [[INDEX]] to i8
4099; IND-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[TMP13]], [[T]]
4100; IND-NEXT:    [[TMP14:%.*]] = sext i8 [[OFFSET_IDX]] to i64
4101; IND-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP14]]
4102; IND-NEXT:    [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <2 x i32>*
4103; IND-NEXT:    store <2 x i32> [[VEC_IND7]], <2 x i32>* [[TMP16]], align 4
4104; IND-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
4105; IND-NEXT:    [[VEC_IND_NEXT8]] = add <2 x i32> [[VEC_IND7]], <i32 8, i32 8>
4106; IND-NEXT:    [[TMP17:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4107; IND-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
4108; IND:       middle.block:
4109; IND-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
4110; IND-NEXT:    br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
4111; IND:       scalar.ph:
4112; IND-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
4113; IND-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ]
4114; IND-NEXT:    [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
4115; IND-NEXT:    br label [[LOOP:%.*]]
4116; IND:       loop:
4117; IND-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4118; IND-NEXT:    [[SPHI:%.*]] = phi i32 [ [[MUL:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
4119; IND-NEXT:    [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
4120; IND-NEXT:    [[TMP18:%.*]] = sext i8 [[IDX]] to i64
4121; IND-NEXT:    [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP18]]
4122; IND-NEXT:    store i32 [[SPHI]], i32* [[PTR]], align 4
4123; IND-NEXT:    [[IDX_INC]] = add i8 [[IDX]], 1
4124; IND-NEXT:    [[IDX_INC_EXT:%.*]] = zext i8 [[IDX_INC]] to i32
4125; IND-NEXT:    [[MUL]] = shl nuw nsw i32 [[IDX_INC_EXT]], 2
4126; IND-NEXT:    [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
4127; IND-NEXT:    [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
4128; IND-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP39:![0-9]+]]
4129; IND:       exit.loopexit:
4130; IND-NEXT:    br label [[EXIT]]
4131; IND:       exit:
4132; IND-NEXT:    ret void
4133;
4134; UNROLL-LABEL: @wrappingindvars2(
4135; UNROLL-NEXT:  entry:
4136; UNROLL-NEXT:    [[EXT:%.*]] = zext i8 [[T:%.*]] to i32
4137; UNROLL-NEXT:    [[EXT_MUL:%.*]] = shl nuw nsw i32 [[EXT]], 2
4138; UNROLL-NEXT:    [[ECMP:%.*]] = icmp ult i8 [[T]], 42
4139; UNROLL-NEXT:    br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
4140; UNROLL:       loop.preheader:
4141; UNROLL-NEXT:    [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
4142; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4
4143; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
4144; UNROLL:       vector.scevcheck:
4145; UNROLL-NEXT:    [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
4146; UNROLL-NEXT:    [[TMP2:%.*]] = xor i8 [[T]], -1
4147; UNROLL-NEXT:    [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]]
4148; UNROLL-NEXT:    [[TMP4:%.*]] = icmp ugt i32 [[LEN]], 255
4149; UNROLL-NEXT:    [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
4150; UNROLL-NEXT:    [[TMP6:%.*]] = trunc i32 [[LEN]] to i8
4151; UNROLL-NEXT:    [[TMP7:%.*]] = add i8 [[TMP6]], [[T]]
4152; UNROLL-NEXT:    [[TMP8:%.*]] = icmp slt i8 [[TMP7]], [[T]]
4153; UNROLL-NEXT:    [[TMP9:%.*]] = icmp ugt i32 [[LEN]], 255
4154; UNROLL-NEXT:    [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
4155; UNROLL-NEXT:    [[TMP11:%.*]] = or i1 [[TMP5]], [[TMP10]]
4156; UNROLL-NEXT:    br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
4157; UNROLL:       vector.ph:
4158; UNROLL-NEXT:    [[N_VEC:%.*]] = and i32 [[TMP0]], -4
4159; UNROLL-NEXT:    [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8
4160; UNROLL-NEXT:    [[IND_END:%.*]] = add i8 [[CAST_CRD]], [[T]]
4161; UNROLL-NEXT:    [[TMP12:%.*]] = add i32 [[N_VEC]], [[EXT]]
4162; UNROLL-NEXT:    [[IND_END2:%.*]] = shl i32 [[TMP12]], 2
4163; UNROLL-NEXT:    [[DOTSPLATINSERT5:%.*]] = insertelement <2 x i32> poison, i32 [[EXT_MUL]], i64 0
4164; UNROLL-NEXT:    [[DOTSPLAT6:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT5]], <2 x i32> poison, <2 x i32> zeroinitializer
4165; UNROLL-NEXT:    [[INDUCTION7:%.*]] = add nuw nsw <2 x i32> [[DOTSPLAT6]], <i32 0, i32 4>
4166; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
4167; UNROLL:       vector.body:
4168; UNROLL-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4169; UNROLL-NEXT:    [[VEC_IND8:%.*]] = phi <2 x i32> [ [[INDUCTION7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ]
4170; UNROLL-NEXT:    [[TMP13:%.*]] = trunc i32 [[INDEX]] to i8
4171; UNROLL-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[TMP13]], [[T]]
4172; UNROLL-NEXT:    [[STEP_ADD9:%.*]] = add <2 x i32> [[VEC_IND8]], <i32 8, i32 8>
4173; UNROLL-NEXT:    [[TMP14:%.*]] = sext i8 [[OFFSET_IDX]] to i64
4174; UNROLL-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP14]]
4175; UNROLL-NEXT:    [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <2 x i32>*
4176; UNROLL-NEXT:    store <2 x i32> [[VEC_IND8]], <2 x i32>* [[TMP16]], align 4
4177; UNROLL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i64 2
4178; UNROLL-NEXT:    [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <2 x i32>*
4179; UNROLL-NEXT:    store <2 x i32> [[STEP_ADD9]], <2 x i32>* [[TMP18]], align 4
4180; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
4181; UNROLL-NEXT:    [[VEC_IND_NEXT11]] = add <2 x i32> [[VEC_IND8]], <i32 16, i32 16>
4182; UNROLL-NEXT:    [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4183; UNROLL-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
4184; UNROLL:       middle.block:
4185; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
4186; UNROLL-NEXT:    br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
4187; UNROLL:       scalar.ph:
4188; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
4189; UNROLL-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ]
4190; UNROLL-NEXT:    [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
4191; UNROLL-NEXT:    br label [[LOOP:%.*]]
4192; UNROLL:       loop:
4193; UNROLL-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4194; UNROLL-NEXT:    [[SPHI:%.*]] = phi i32 [ [[MUL:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
4195; UNROLL-NEXT:    [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
4196; UNROLL-NEXT:    [[TMP20:%.*]] = sext i8 [[IDX]] to i64
4197; UNROLL-NEXT:    [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP20]]
4198; UNROLL-NEXT:    store i32 [[SPHI]], i32* [[PTR]], align 4
4199; UNROLL-NEXT:    [[IDX_INC]] = add i8 [[IDX]], 1
4200; UNROLL-NEXT:    [[IDX_INC_EXT:%.*]] = zext i8 [[IDX_INC]] to i32
4201; UNROLL-NEXT:    [[MUL]] = shl nuw nsw i32 [[IDX_INC_EXT]], 2
4202; UNROLL-NEXT:    [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
4203; UNROLL-NEXT:    [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
4204; UNROLL-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP39:![0-9]+]]
4205; UNROLL:       exit.loopexit:
4206; UNROLL-NEXT:    br label [[EXIT]]
4207; UNROLL:       exit:
4208; UNROLL-NEXT:    ret void
4209;
4210; UNROLL-NO-IC-LABEL: @wrappingindvars2(
4211; UNROLL-NO-IC-NEXT:  entry:
4212; UNROLL-NO-IC-NEXT:    [[ST:%.*]] = zext i8 [[T:%.*]] to i16
4213; UNROLL-NO-IC-NEXT:    [[EXT:%.*]] = zext i8 [[T]] to i32
4214; UNROLL-NO-IC-NEXT:    [[EXT_MUL:%.*]] = mul i32 [[EXT]], 4
4215; UNROLL-NO-IC-NEXT:    [[ECMP:%.*]] = icmp ult i16 [[ST]], 42
4216; UNROLL-NO-IC-NEXT:    br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
4217; UNROLL-NO-IC:       loop.preheader:
4218; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
4219; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4
4220; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
4221; UNROLL-NO-IC:       vector.scevcheck:
4222; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
4223; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add i8 [[T]], [[TMP1]]
4224; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = sub i8 [[T]], [[TMP1]]
4225; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = icmp ugt i8 [[TMP3]], [[T]]
4226; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = icmp ult i8 [[TMP2]], [[T]]
4227; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
4228; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = icmp ugt i32 [[LEN]], 255
4229; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
4230; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = trunc i32 [[LEN]] to i8
4231; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = add i8 [[T]], [[TMP10]]
4232; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = sub i8 [[T]], [[TMP10]]
4233; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = icmp sgt i8 [[TMP12]], [[T]]
4234; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = icmp slt i8 [[TMP11]], [[T]]
4235; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = select i1 false, i1 [[TMP13]], i1 [[TMP14]]
4236; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = icmp ugt i32 [[LEN]], 255
4237; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = or i1 [[TMP15]], [[TMP16]]
4238; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = or i1 [[TMP8]], [[TMP17]]
4239; UNROLL-NO-IC-NEXT:    br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
4240; UNROLL-NO-IC:       vector.ph:
4241; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4
4242; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
4243; UNROLL-NO-IC-NEXT:    [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8
4244; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = add i8 [[T]], [[CAST_CRD]]
4245; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = mul i32 [[N_VEC]], 4
4246; UNROLL-NO-IC-NEXT:    [[IND_END2:%.*]] = add i32 [[EXT_MUL]], [[TMP19]]
4247; UNROLL-NO-IC-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i8> poison, i8 [[T]], i32 0
4248; UNROLL-NO-IC-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i8> [[DOTSPLATINSERT]], <2 x i8> poison, <2 x i32> zeroinitializer
4249; UNROLL-NO-IC-NEXT:    [[INDUCTION:%.*]] = add <2 x i8> [[DOTSPLAT]], <i8 0, i8 1>
4250; UNROLL-NO-IC-NEXT:    [[DOTSPLATINSERT5:%.*]] = insertelement <2 x i32> poison, i32 [[EXT_MUL]], i32 0
4251; UNROLL-NO-IC-NEXT:    [[DOTSPLAT6:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT5]], <2 x i32> poison, <2 x i32> zeroinitializer
4252; UNROLL-NO-IC-NEXT:    [[INDUCTION7:%.*]] = add <2 x i32> [[DOTSPLAT6]], <i32 0, i32 4>
4253; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
4254; UNROLL-NO-IC:       vector.body:
4255; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4256; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <2 x i8> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4257; UNROLL-NO-IC-NEXT:    [[VEC_IND8:%.*]] = phi <2 x i32> [ [[INDUCTION7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ]
4258; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i8> [[VEC_IND]], <i8 2, i8 2>
4259; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = trunc i32 [[INDEX]] to i8
4260; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[T]], [[TMP20]]
4261; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = add i8 [[OFFSET_IDX]], 0
4262; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = add i8 [[OFFSET_IDX]], 1
4263; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = add i8 [[OFFSET_IDX]], 2
4264; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = add i8 [[OFFSET_IDX]], 3
4265; UNROLL-NO-IC-NEXT:    [[STEP_ADD9:%.*]] = add <2 x i32> [[VEC_IND8]], <i32 8, i32 8>
4266; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = add i32 [[INDEX]], 0
4267; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = add i32 [[INDEX]], 2
4268; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i8 [[TMP21]]
4269; UNROLL-NO-IC-NEXT:    [[TMP28:%.*]] = getelementptr inbounds i32, i32* [[A]], i8 [[TMP23]]
4270; UNROLL-NO-IC-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[TMP27]], i32 0
4271; UNROLL-NO-IC-NEXT:    [[TMP30:%.*]] = bitcast i32* [[TMP29]] to <2 x i32>*
4272; UNROLL-NO-IC-NEXT:    store <2 x i32> [[VEC_IND8]], <2 x i32>* [[TMP30]], align 4
4273; UNROLL-NO-IC-NEXT:    [[TMP31:%.*]] = getelementptr inbounds i32, i32* [[TMP27]], i32 2
4274; UNROLL-NO-IC-NEXT:    [[TMP32:%.*]] = bitcast i32* [[TMP31]] to <2 x i32>*
4275; UNROLL-NO-IC-NEXT:    store <2 x i32> [[STEP_ADD9]], <2 x i32>* [[TMP32]], align 4
4276; UNROLL-NO-IC-NEXT:    [[TMP33:%.*]] = add <2 x i8> [[VEC_IND]], <i8 1, i8 1>
4277; UNROLL-NO-IC-NEXT:    [[TMP34:%.*]] = add <2 x i8> [[STEP_ADD]], <i8 1, i8 1>
4278; UNROLL-NO-IC-NEXT:    [[TMP35:%.*]] = zext <2 x i8> [[TMP33]] to <2 x i32>
4279; UNROLL-NO-IC-NEXT:    [[TMP36:%.*]] = zext <2 x i8> [[TMP34]] to <2 x i32>
4280; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
4281; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i8> [[STEP_ADD]], <i8 2, i8 2>
4282; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT11]] = add <2 x i32> [[STEP_ADD9]], <i32 8, i32 8>
4283; UNROLL-NO-IC-NEXT:    [[TMP37:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4284; UNROLL-NO-IC-NEXT:    br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
4285; UNROLL-NO-IC:       middle.block:
4286; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
4287; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
4288; UNROLL-NO-IC:       scalar.ph:
4289; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
4290; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ]
4291; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
4292; UNROLL-NO-IC-NEXT:    br label [[LOOP:%.*]]
4293; UNROLL-NO-IC:       loop:
4294; UNROLL-NO-IC-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4295; UNROLL-NO-IC-NEXT:    [[SPHI:%.*]] = phi i32 [ [[MUL:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
4296; UNROLL-NO-IC-NEXT:    [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
4297; UNROLL-NO-IC-NEXT:    [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i8 [[IDX]]
4298; UNROLL-NO-IC-NEXT:    store i32 [[SPHI]], i32* [[PTR]], align 4
4299; UNROLL-NO-IC-NEXT:    [[IDX_INC]] = add i8 [[IDX]], 1
4300; UNROLL-NO-IC-NEXT:    [[IDX_INC_EXT:%.*]] = zext i8 [[IDX_INC]] to i32
4301; UNROLL-NO-IC-NEXT:    [[MUL]] = mul i32 [[IDX_INC_EXT]], 4
4302; UNROLL-NO-IC-NEXT:    [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
4303; UNROLL-NO-IC-NEXT:    [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
4304; UNROLL-NO-IC-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP39:![0-9]+]]
4305; UNROLL-NO-IC:       exit.loopexit:
4306; UNROLL-NO-IC-NEXT:    br label [[EXIT]]
4307; UNROLL-NO-IC:       exit:
4308; UNROLL-NO-IC-NEXT:    ret void
4309;
4310; INTERLEAVE-LABEL: @wrappingindvars2(
4311; INTERLEAVE-NEXT:  entry:
4312; INTERLEAVE-NEXT:    [[EXT:%.*]] = zext i8 [[T:%.*]] to i32
4313; INTERLEAVE-NEXT:    [[EXT_MUL:%.*]] = shl nuw nsw i32 [[EXT]], 2
4314; INTERLEAVE-NEXT:    [[ECMP:%.*]] = icmp ult i8 [[T]], 42
4315; INTERLEAVE-NEXT:    br i1 [[ECMP]], label [[LOOP_PREHEADER:%.*]], label [[EXIT:%.*]]
4316; INTERLEAVE:       loop.preheader:
4317; INTERLEAVE-NEXT:    [[TMP0:%.*]] = add i32 [[LEN:%.*]], 1
4318; INTERLEAVE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 8
4319; INTERLEAVE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
4320; INTERLEAVE:       vector.scevcheck:
4321; INTERLEAVE-NEXT:    [[TMP1:%.*]] = trunc i32 [[LEN]] to i8
4322; INTERLEAVE-NEXT:    [[TMP2:%.*]] = xor i8 [[T]], -1
4323; INTERLEAVE-NEXT:    [[TMP3:%.*]] = icmp ult i8 [[TMP2]], [[TMP1]]
4324; INTERLEAVE-NEXT:    [[TMP4:%.*]] = icmp ugt i32 [[LEN]], 255
4325; INTERLEAVE-NEXT:    [[TMP5:%.*]] = or i1 [[TMP3]], [[TMP4]]
4326; INTERLEAVE-NEXT:    [[TMP6:%.*]] = trunc i32 [[LEN]] to i8
4327; INTERLEAVE-NEXT:    [[TMP7:%.*]] = add i8 [[TMP6]], [[T]]
4328; INTERLEAVE-NEXT:    [[TMP8:%.*]] = icmp slt i8 [[TMP7]], [[T]]
4329; INTERLEAVE-NEXT:    [[TMP9:%.*]] = icmp ugt i32 [[LEN]], 255
4330; INTERLEAVE-NEXT:    [[TMP10:%.*]] = or i1 [[TMP8]], [[TMP9]]
4331; INTERLEAVE-NEXT:    [[TMP11:%.*]] = or i1 [[TMP5]], [[TMP10]]
4332; INTERLEAVE-NEXT:    br i1 [[TMP11]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
4333; INTERLEAVE:       vector.ph:
4334; INTERLEAVE-NEXT:    [[N_VEC:%.*]] = and i32 [[TMP0]], -8
4335; INTERLEAVE-NEXT:    [[CAST_CRD:%.*]] = trunc i32 [[N_VEC]] to i8
4336; INTERLEAVE-NEXT:    [[IND_END:%.*]] = add i8 [[CAST_CRD]], [[T]]
4337; INTERLEAVE-NEXT:    [[TMP12:%.*]] = add i32 [[N_VEC]], [[EXT]]
4338; INTERLEAVE-NEXT:    [[IND_END2:%.*]] = shl i32 [[TMP12]], 2
4339; INTERLEAVE-NEXT:    [[DOTSPLATINSERT5:%.*]] = insertelement <4 x i32> poison, i32 [[EXT_MUL]], i64 0
4340; INTERLEAVE-NEXT:    [[DOTSPLAT6:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT5]], <4 x i32> poison, <4 x i32> zeroinitializer
4341; INTERLEAVE-NEXT:    [[INDUCTION7:%.*]] = add nuw nsw <4 x i32> [[DOTSPLAT6]], <i32 0, i32 4, i32 8, i32 12>
4342; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
4343; INTERLEAVE:       vector.body:
4344; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4345; INTERLEAVE-NEXT:    [[VEC_IND8:%.*]] = phi <4 x i32> [ [[INDUCTION7]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ]
4346; INTERLEAVE-NEXT:    [[TMP13:%.*]] = trunc i32 [[INDEX]] to i8
4347; INTERLEAVE-NEXT:    [[OFFSET_IDX:%.*]] = add i8 [[TMP13]], [[T]]
4348; INTERLEAVE-NEXT:    [[STEP_ADD9:%.*]] = add <4 x i32> [[VEC_IND8]], <i32 16, i32 16, i32 16, i32 16>
4349; INTERLEAVE-NEXT:    [[TMP14:%.*]] = sext i8 [[OFFSET_IDX]] to i64
4350; INTERLEAVE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP14]]
4351; INTERLEAVE-NEXT:    [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>*
4352; INTERLEAVE-NEXT:    store <4 x i32> [[VEC_IND8]], <4 x i32>* [[TMP16]], align 4
4353; INTERLEAVE-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i64 4
4354; INTERLEAVE-NEXT:    [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <4 x i32>*
4355; INTERLEAVE-NEXT:    store <4 x i32> [[STEP_ADD9]], <4 x i32>* [[TMP18]], align 4
4356; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
4357; INTERLEAVE-NEXT:    [[VEC_IND_NEXT11]] = add <4 x i32> [[VEC_IND8]], <i32 32, i32 32, i32 32, i32 32>
4358; INTERLEAVE-NEXT:    [[TMP19:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4359; INTERLEAVE-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP38:![0-9]+]]
4360; INTERLEAVE:       middle.block:
4361; INTERLEAVE-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
4362; INTERLEAVE-NEXT:    br i1 [[CMP_N]], label [[EXIT_LOOPEXIT:%.*]], label [[SCALAR_PH]]
4363; INTERLEAVE:       scalar.ph:
4364; INTERLEAVE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i8 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[T]], [[LOOP_PREHEADER]] ], [ [[T]], [[VECTOR_SCEVCHECK]] ]
4365; INTERLEAVE-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END2]], [[MIDDLE_BLOCK]] ], [ [[EXT_MUL]], [[LOOP_PREHEADER]] ], [ [[EXT_MUL]], [[VECTOR_SCEVCHECK]] ]
4366; INTERLEAVE-NEXT:    [[BC_RESUME_VAL3:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[LOOP_PREHEADER]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
4367; INTERLEAVE-NEXT:    br label [[LOOP:%.*]]
4368; INTERLEAVE:       loop:
4369; INTERLEAVE-NEXT:    [[IDX:%.*]] = phi i8 [ [[IDX_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4370; INTERLEAVE-NEXT:    [[SPHI:%.*]] = phi i32 [ [[MUL:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
4371; INTERLEAVE-NEXT:    [[IDX_B:%.*]] = phi i32 [ [[IDX_B_INC:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL3]], [[SCALAR_PH]] ]
4372; INTERLEAVE-NEXT:    [[TMP20:%.*]] = sext i8 [[IDX]] to i64
4373; INTERLEAVE-NEXT:    [[PTR:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP20]]
4374; INTERLEAVE-NEXT:    store i32 [[SPHI]], i32* [[PTR]], align 4
4375; INTERLEAVE-NEXT:    [[IDX_INC]] = add i8 [[IDX]], 1
4376; INTERLEAVE-NEXT:    [[IDX_INC_EXT:%.*]] = zext i8 [[IDX_INC]] to i32
4377; INTERLEAVE-NEXT:    [[MUL]] = shl nuw nsw i32 [[IDX_INC_EXT]], 2
4378; INTERLEAVE-NEXT:    [[IDX_B_INC]] = add nuw nsw i32 [[IDX_B]], 1
4379; INTERLEAVE-NEXT:    [[C:%.*]] = icmp ult i32 [[IDX_B]], [[LEN]]
4380; INTERLEAVE-NEXT:    br i1 [[C]], label [[LOOP]], label [[EXIT_LOOPEXIT]], !llvm.loop [[LOOP39:![0-9]+]]
4381; INTERLEAVE:       exit.loopexit:
4382; INTERLEAVE-NEXT:    br label [[EXIT]]
4383; INTERLEAVE:       exit:
4384; INTERLEAVE-NEXT:    ret void
4385;
4386entry:
4387  %st = zext i8 %t to i16
4388  %ext = zext i8 %t to i32
4389  %ext.mul = mul i32 %ext, 4
4390
4391  %ecmp = icmp ult i16 %st, 42
4392  br i1 %ecmp, label %loop, label %exit
4393
4394  loop:
4395
4396  %idx = phi i8 [ %t, %entry ], [ %idx.inc, %loop ]
4397  %sphi = phi i32 [ %ext.mul, %entry ], [%mul, %loop]
4398  %idx.b = phi i32 [ 0, %entry ], [ %idx.b.inc, %loop ]
4399
4400  %ptr = getelementptr inbounds i32, i32* %A, i8 %idx
4401  store i32 %sphi, i32* %ptr
4402
4403  %idx.inc = add i8 %idx, 1
4404  %idx.inc.ext = zext i8 %idx.inc to i32
4405  %mul = mul i32 %idx.inc.ext, 4
4406  %idx.b.inc = add nuw nsw i32 %idx.b, 1
4407
4408  %c = icmp ult i32 %idx.b, %len
4409  br i1 %c, label %loop, label %exit
4410
4411  exit:
4412  ret void
4413}
4414
4415; Check that we generate vectorized IVs in the pre-header
4416; instead of widening the scalar IV inside the loop, when
4417; we know how to do that.
4418define void @veciv(i32* nocapture %a, i32 %start, i32 %k) {
4419; CHECK-LABEL: @veciv(
4420; CHECK-NEXT:  for.body.preheader:
4421; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[K:%.*]], 2
4422; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4423; CHECK:       vector.ph:
4424; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[K]], 2
4425; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[K]], [[N_MOD_VF]]
4426; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
4427; CHECK:       vector.body:
4428; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4429; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4430; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
4431; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 1
4432; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]]
4433; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 0
4434; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>*
4435; CHECK-NEXT:    store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP4]], align 4
4436; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
4437; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
4438; CHECK-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4439; CHECK-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
4440; CHECK:       middle.block:
4441; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[K]], [[N_VEC]]
4442; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4443; CHECK:       scalar.ph:
4444; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ]
4445; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
4446; CHECK:       for.body:
4447; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4448; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]]
4449; CHECK-NEXT:    store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4
4450; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
4451; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
4452; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
4453; CHECK:       exit:
4454; CHECK-NEXT:    ret void
4455;
4456; IND-LABEL: @veciv(
4457; IND-NEXT:  for.body.preheader:
4458; IND-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[K:%.*]], 2
4459; IND-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4460; IND:       vector.ph:
4461; IND-NEXT:    [[N_VEC:%.*]] = and i32 [[K]], -2
4462; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
4463; IND:       vector.body:
4464; IND-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4465; IND-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4466; IND-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
4467; IND-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]]
4468; IND-NEXT:    [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <2 x i32>*
4469; IND-NEXT:    store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP2]], align 4
4470; IND-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
4471; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
4472; IND-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4473; IND-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
4474; IND:       middle.block:
4475; IND-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[K]]
4476; IND-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4477; IND:       scalar.ph:
4478; IND-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ]
4479; IND-NEXT:    br label [[FOR_BODY:%.*]]
4480; IND:       for.body:
4481; IND-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4482; IND-NEXT:    [[TMP4:%.*]] = sext i32 [[INDVARS_IV]] to i64
4483; IND-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
4484; IND-NEXT:    store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4
4485; IND-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
4486; IND-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
4487; IND-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
4488; IND:       exit:
4489; IND-NEXT:    ret void
4490;
4491; UNROLL-LABEL: @veciv(
4492; UNROLL-NEXT:  for.body.preheader:
4493; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[K:%.*]], 4
4494; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4495; UNROLL:       vector.ph:
4496; UNROLL-NEXT:    [[N_VEC:%.*]] = and i32 [[K]], -4
4497; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
4498; UNROLL:       vector.body:
4499; UNROLL-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4500; UNROLL-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4501; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
4502; UNROLL-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
4503; UNROLL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]]
4504; UNROLL-NEXT:    [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <2 x i32>*
4505; UNROLL-NEXT:    store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP2]], align 4
4506; UNROLL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 2
4507; UNROLL-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>*
4508; UNROLL-NEXT:    store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP4]], align 4
4509; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
4510; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
4511; UNROLL-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4512; UNROLL-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
4513; UNROLL:       middle.block:
4514; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[K]]
4515; UNROLL-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4516; UNROLL:       scalar.ph:
4517; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ]
4518; UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
4519; UNROLL:       for.body:
4520; UNROLL-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4521; UNROLL-NEXT:    [[TMP6:%.*]] = sext i32 [[INDVARS_IV]] to i64
4522; UNROLL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]]
4523; UNROLL-NEXT:    store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4
4524; UNROLL-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
4525; UNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
4526; UNROLL-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
4527; UNROLL:       exit:
4528; UNROLL-NEXT:    ret void
4529;
4530; UNROLL-NO-IC-LABEL: @veciv(
4531; UNROLL-NO-IC-NEXT:  for.body.preheader:
4532; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[K:%.*]], 4
4533; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4534; UNROLL-NO-IC:       vector.ph:
4535; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[K]], 4
4536; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i32 [[K]], [[N_MOD_VF]]
4537; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
4538; UNROLL-NO-IC:       vector.body:
4539; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4540; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4541; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
4542; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i32 [[INDEX]], 0
4543; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add i32 [[INDEX]], 1
4544; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add i32 [[INDEX]], 2
4545; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add i32 [[INDEX]], 3
4546; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP0]]
4547; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP2]]
4548; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
4549; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <2 x i32>*
4550; UNROLL-NO-IC-NEXT:    store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP7]], align 4
4551; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 2
4552; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <2 x i32>*
4553; UNROLL-NO-IC-NEXT:    store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP9]], align 4
4554; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
4555; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2>
4556; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4557; UNROLL-NO-IC-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
4558; UNROLL-NO-IC:       middle.block:
4559; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[K]], [[N_VEC]]
4560; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4561; UNROLL-NO-IC:       scalar.ph:
4562; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ]
4563; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
4564; UNROLL-NO-IC:       for.body:
4565; UNROLL-NO-IC-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4566; UNROLL-NO-IC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]]
4567; UNROLL-NO-IC-NEXT:    store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4
4568; UNROLL-NO-IC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
4569; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
4570; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
4571; UNROLL-NO-IC:       exit:
4572; UNROLL-NO-IC-NEXT:    ret void
4573;
4574; INTERLEAVE-LABEL: @veciv(
4575; INTERLEAVE-NEXT:  for.body.preheader:
4576; INTERLEAVE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[K:%.*]], 8
4577; INTERLEAVE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4578; INTERLEAVE:       vector.ph:
4579; INTERLEAVE-NEXT:    [[N_VEC:%.*]] = and i32 [[K]], -8
4580; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
4581; INTERLEAVE:       vector.body:
4582; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4583; INTERLEAVE-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4584; INTERLEAVE-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
4585; INTERLEAVE-NEXT:    [[TMP0:%.*]] = sext i32 [[INDEX]] to i64
4586; INTERLEAVE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]]
4587; INTERLEAVE-NEXT:    [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>*
4588; INTERLEAVE-NEXT:    store <4 x i32> [[VEC_IND]], <4 x i32>* [[TMP2]], align 4
4589; INTERLEAVE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 4
4590; INTERLEAVE-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
4591; INTERLEAVE-NEXT:    store <4 x i32> [[STEP_ADD]], <4 x i32>* [[TMP4]], align 4
4592; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
4593; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
4594; INTERLEAVE-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4595; INTERLEAVE-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
4596; INTERLEAVE:       middle.block:
4597; INTERLEAVE-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[N_VEC]], [[K]]
4598; INTERLEAVE-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4599; INTERLEAVE:       scalar.ph:
4600; INTERLEAVE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ]
4601; INTERLEAVE-NEXT:    br label [[FOR_BODY:%.*]]
4602; INTERLEAVE:       for.body:
4603; INTERLEAVE-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4604; INTERLEAVE-NEXT:    [[TMP6:%.*]] = sext i32 [[INDVARS_IV]] to i64
4605; INTERLEAVE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]]
4606; INTERLEAVE-NEXT:    store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4
4607; INTERLEAVE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
4608; INTERLEAVE-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
4609; INTERLEAVE-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
4610; INTERLEAVE:       exit:
4611; INTERLEAVE-NEXT:    ret void
4612;
4613for.body.preheader:
4614  br label %for.body
4615
4616for.body:
4617  %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
4618  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %indvars.iv
4619  store i32 %indvars.iv, i32* %arrayidx, align 4
4620  %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
4621  %exitcond = icmp eq i32 %indvars.iv.next, %k
4622  br i1 %exitcond, label %exit, label %for.body
4623
4624exit:
4625  ret void
4626}
4627
4628define void @trunciv(i32* nocapture %a, i32 %start, i64 %k) {
4629; CHECK-LABEL: @trunciv(
4630; CHECK-NEXT:  for.body.preheader:
4631; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 2
4632; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
4633; CHECK:       vector.scevcheck:
4634; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[K]], -1
4635; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
4636; CHECK-NEXT:    [[TMP2:%.*]] = add i32 0, [[TMP1]]
4637; CHECK-NEXT:    [[TMP3:%.*]] = sub i32 0, [[TMP1]]
4638; CHECK-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], 0
4639; CHECK-NEXT:    [[TMP5:%.*]] = icmp slt i32 [[TMP2]], 0
4640; CHECK-NEXT:    [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
4641; CHECK-NEXT:    [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
4642; CHECK-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
4643; CHECK-NEXT:    br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
4644; CHECK:       vector.ph:
4645; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[K]], 2
4646; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[K]], [[N_MOD_VF]]
4647; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
4648; CHECK:       vector.body:
4649; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4650; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4651; CHECK-NEXT:    [[VEC_IND1:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
4652; CHECK-NEXT:    [[TMP10:%.*]] = trunc i64 [[INDEX]] to i32
4653; CHECK-NEXT:    [[TMP11:%.*]] = add i32 [[TMP10]], 0
4654; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[TMP10]], 1
4655; CHECK-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP11]]
4656; CHECK-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i32 0
4657; CHECK-NEXT:    [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <2 x i32>*
4658; CHECK-NEXT:    store <2 x i32> [[VEC_IND1]], <2 x i32>* [[TMP15]], align 4
4659; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
4660; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
4661; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <2 x i32> [[VEC_IND1]], <i32 2, i32 2>
4662; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4663; CHECK-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
4664; CHECK:       middle.block:
4665; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]]
4666; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4667; CHECK:       scalar.ph:
4668; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
4669; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
4670; CHECK:       for.body:
4671; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4672; CHECK-NEXT:    [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32
4673; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TRUNC_IV]]
4674; CHECK-NEXT:    store i32 [[TRUNC_IV]], i32* [[ARRAYIDX]], align 4
4675; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
4676; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
4677; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
4678; CHECK:       exit:
4679; CHECK-NEXT:    ret void
4680;
4681; IND-LABEL: @trunciv(
4682; IND-NEXT:  for.body.preheader:
4683; IND-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 2
4684; IND-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
4685; IND:       vector.scevcheck:
4686; IND-NEXT:    [[TMP0:%.*]] = add i64 [[K]], -1
4687; IND-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 2147483648
4688; IND-NEXT:    br i1 [[TMP1]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]]
4689; IND:       vector.ph:
4690; IND-NEXT:    [[N_VEC:%.*]] = and i64 [[K]], -2
4691; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
4692; IND:       vector.body:
4693; IND-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4694; IND-NEXT:    [[VEC_IND1:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[VECTOR_BODY]] ]
4695; IND-NEXT:    [[SEXT:%.*]] = shl i64 [[INDEX]], 32
4696; IND-NEXT:    [[TMP2:%.*]] = ashr exact i64 [[SEXT]], 32
4697; IND-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP2]]
4698; IND-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>*
4699; IND-NEXT:    store <2 x i32> [[VEC_IND1]], <2 x i32>* [[TMP4]], align 4
4700; IND-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
4701; IND-NEXT:    [[VEC_IND_NEXT2]] = add <2 x i32> [[VEC_IND1]], <i32 2, i32 2>
4702; IND-NEXT:    [[TMP5:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4703; IND-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
4704; IND:       middle.block:
4705; IND-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[K]]
4706; IND-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4707; IND:       scalar.ph:
4708; IND-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
4709; IND-NEXT:    br label [[FOR_BODY:%.*]]
4710; IND:       for.body:
4711; IND-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4712; IND-NEXT:    [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32
4713; IND-NEXT:    [[SEXT3:%.*]] = shl i64 [[INDVARS_IV]], 32
4714; IND-NEXT:    [[TMP6:%.*]] = ashr exact i64 [[SEXT3]], 32
4715; IND-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]]
4716; IND-NEXT:    store i32 [[TRUNC_IV]], i32* [[ARRAYIDX]], align 4
4717; IND-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
4718; IND-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
4719; IND-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
4720; IND:       exit:
4721; IND-NEXT:    ret void
4722;
4723; UNROLL-LABEL: @trunciv(
4724; UNROLL-NEXT:  for.body.preheader:
4725; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 4
4726; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
4727; UNROLL:       vector.scevcheck:
4728; UNROLL-NEXT:    [[TMP0:%.*]] = add i64 [[K]], -1
4729; UNROLL-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 2147483648
4730; UNROLL-NEXT:    br i1 [[TMP1]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]]
4731; UNROLL:       vector.ph:
4732; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[K]], -4
4733; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
4734; UNROLL:       vector.body:
4735; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4736; UNROLL-NEXT:    [[VEC_IND2:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ]
4737; UNROLL-NEXT:    [[STEP_ADD3:%.*]] = add <2 x i32> [[VEC_IND2]], <i32 2, i32 2>
4738; UNROLL-NEXT:    [[SEXT:%.*]] = shl i64 [[INDEX]], 32
4739; UNROLL-NEXT:    [[TMP2:%.*]] = ashr exact i64 [[SEXT]], 32
4740; UNROLL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP2]]
4741; UNROLL-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <2 x i32>*
4742; UNROLL-NEXT:    store <2 x i32> [[VEC_IND2]], <2 x i32>* [[TMP4]], align 4
4743; UNROLL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 2
4744; UNROLL-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <2 x i32>*
4745; UNROLL-NEXT:    store <2 x i32> [[STEP_ADD3]], <2 x i32>* [[TMP6]], align 4
4746; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
4747; UNROLL-NEXT:    [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND2]], <i32 4, i32 4>
4748; UNROLL-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4749; UNROLL-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
4750; UNROLL:       middle.block:
4751; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[K]]
4752; UNROLL-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4753; UNROLL:       scalar.ph:
4754; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
4755; UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
4756; UNROLL:       for.body:
4757; UNROLL-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4758; UNROLL-NEXT:    [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32
4759; UNROLL-NEXT:    [[SEXT6:%.*]] = shl i64 [[INDVARS_IV]], 32
4760; UNROLL-NEXT:    [[TMP8:%.*]] = ashr exact i64 [[SEXT6]], 32
4761; UNROLL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP8]]
4762; UNROLL-NEXT:    store i32 [[TRUNC_IV]], i32* [[ARRAYIDX]], align 4
4763; UNROLL-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
4764; UNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
4765; UNROLL-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
4766; UNROLL:       exit:
4767; UNROLL-NEXT:    ret void
4768;
4769; UNROLL-NO-IC-LABEL: @trunciv(
4770; UNROLL-NO-IC-NEXT:  for.body.preheader:
4771; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 4
4772; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
4773; UNROLL-NO-IC:       vector.scevcheck:
4774; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i64 [[K]], -1
4775; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = trunc i64 [[TMP0]] to i32
4776; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add i32 0, [[TMP1]]
4777; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = sub i32 0, [[TMP1]]
4778; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = icmp sgt i32 [[TMP3]], 0
4779; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = icmp slt i32 [[TMP2]], 0
4780; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = select i1 false, i1 [[TMP4]], i1 [[TMP5]]
4781; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = icmp ugt i64 [[TMP0]], 4294967295
4782; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = or i1 [[TMP6]], [[TMP7]]
4783; UNROLL-NO-IC-NEXT:    br i1 [[TMP8]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
4784; UNROLL-NO-IC:       vector.ph:
4785; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[K]], 4
4786; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[K]], [[N_MOD_VF]]
4787; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
4788; UNROLL-NO-IC:       vector.body:
4789; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4790; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4791; UNROLL-NO-IC-NEXT:    [[VEC_IND2:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ]
4792; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
4793; UNROLL-NO-IC-NEXT:    [[STEP_ADD3:%.*]] = add <2 x i32> [[VEC_IND2]], <i32 2, i32 2>
4794; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = trunc i64 [[INDEX]] to i32
4795; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = add i32 [[TMP10]], 0
4796; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = add i32 [[TMP10]], 1
4797; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = add i32 [[TMP10]], 2
4798; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = add i32 [[TMP10]], 3
4799; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP11]]
4800; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP13]]
4801; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i32 0
4802; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <2 x i32>*
4803; UNROLL-NO-IC-NEXT:    store <2 x i32> [[VEC_IND2]], <2 x i32>* [[TMP18]], align 4
4804; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i32 2
4805; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = bitcast i32* [[TMP19]] to <2 x i32>*
4806; UNROLL-NO-IC-NEXT:    store <2 x i32> [[STEP_ADD3]], <2 x i32>* [[TMP20]], align 4
4807; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
4808; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 2, i64 2>
4809; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT5]] = add <2 x i32> [[STEP_ADD3]], <i32 2, i32 2>
4810; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4811; UNROLL-NO-IC-NEXT:    br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
4812; UNROLL-NO-IC:       middle.block:
4813; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[K]], [[N_VEC]]
4814; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4815; UNROLL-NO-IC:       scalar.ph:
4816; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
4817; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
4818; UNROLL-NO-IC:       for.body:
4819; UNROLL-NO-IC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4820; UNROLL-NO-IC-NEXT:    [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32
4821; UNROLL-NO-IC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TRUNC_IV]]
4822; UNROLL-NO-IC-NEXT:    store i32 [[TRUNC_IV]], i32* [[ARRAYIDX]], align 4
4823; UNROLL-NO-IC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
4824; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
4825; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
4826; UNROLL-NO-IC:       exit:
4827; UNROLL-NO-IC-NEXT:    ret void
4828;
4829; INTERLEAVE-LABEL: @trunciv(
4830; INTERLEAVE-NEXT:  for.body.preheader:
4831; INTERLEAVE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[K:%.*]], 8
4832; INTERLEAVE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
4833; INTERLEAVE:       vector.scevcheck:
4834; INTERLEAVE-NEXT:    [[TMP0:%.*]] = add i64 [[K]], -1
4835; INTERLEAVE-NEXT:    [[TMP1:%.*]] = icmp ult i64 [[TMP0]], 2147483648
4836; INTERLEAVE-NEXT:    br i1 [[TMP1]], label [[VECTOR_PH:%.*]], label [[SCALAR_PH]]
4837; INTERLEAVE:       vector.ph:
4838; INTERLEAVE-NEXT:    [[N_VEC:%.*]] = and i64 [[K]], -8
4839; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
4840; INTERLEAVE:       vector.body:
4841; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4842; INTERLEAVE-NEXT:    [[VEC_IND2:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ]
4843; INTERLEAVE-NEXT:    [[STEP_ADD3:%.*]] = add <4 x i32> [[VEC_IND2]], <i32 4, i32 4, i32 4, i32 4>
4844; INTERLEAVE-NEXT:    [[SEXT:%.*]] = shl i64 [[INDEX]], 32
4845; INTERLEAVE-NEXT:    [[TMP2:%.*]] = ashr exact i64 [[SEXT]], 32
4846; INTERLEAVE-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP2]]
4847; INTERLEAVE-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
4848; INTERLEAVE-NEXT:    store <4 x i32> [[VEC_IND2]], <4 x i32>* [[TMP4]], align 4
4849; INTERLEAVE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 4
4850; INTERLEAVE-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
4851; INTERLEAVE-NEXT:    store <4 x i32> [[STEP_ADD3]], <4 x i32>* [[TMP6]], align 4
4852; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
4853; INTERLEAVE-NEXT:    [[VEC_IND_NEXT5]] = add <4 x i32> [[VEC_IND2]], <i32 8, i32 8, i32 8, i32 8>
4854; INTERLEAVE-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
4855; INTERLEAVE-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
4856; INTERLEAVE:       middle.block:
4857; INTERLEAVE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[K]]
4858; INTERLEAVE-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4859; INTERLEAVE:       scalar.ph:
4860; INTERLEAVE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER:%.*]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
4861; INTERLEAVE-NEXT:    br label [[FOR_BODY:%.*]]
4862; INTERLEAVE:       for.body:
4863; INTERLEAVE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4864; INTERLEAVE-NEXT:    [[TRUNC_IV:%.*]] = trunc i64 [[INDVARS_IV]] to i32
4865; INTERLEAVE-NEXT:    [[SEXT6:%.*]] = shl i64 [[INDVARS_IV]], 32
4866; INTERLEAVE-NEXT:    [[TMP8:%.*]] = ashr exact i64 [[SEXT6]], 32
4867; INTERLEAVE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP8]]
4868; INTERLEAVE-NEXT:    store i32 [[TRUNC_IV]], i32* [[ARRAYIDX]], align 4
4869; INTERLEAVE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
4870; INTERLEAVE-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[K]]
4871; INTERLEAVE-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP43:![0-9]+]]
4872; INTERLEAVE:       exit:
4873; INTERLEAVE-NEXT:    ret void
4874;
4875for.body.preheader:
4876  br label %for.body
4877
4878for.body:
4879  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
4880  %trunc.iv = trunc i64 %indvars.iv to i32
4881  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %trunc.iv
4882  store i32 %trunc.iv, i32* %arrayidx, align 4
4883  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
4884  %exitcond = icmp eq i64 %indvars.iv.next, %k
4885  br i1 %exitcond, label %exit, label %for.body
4886
4887exit:
4888  ret void
4889}
4890
4891;
4892;
4893define void @nonprimary(i32* nocapture %a, i32 %start, i32 %i, i32 %k) {
4894; CHECK-LABEL: @nonprimary(
4895; CHECK-NEXT:  for.body.preheader:
4896; CHECK-NEXT:    [[TMP0:%.*]] = sub i32 [[K:%.*]], [[I:%.*]]
4897; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2
4898; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4899; CHECK:       vector.ph:
4900; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 2
4901; CHECK-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
4902; CHECK-NEXT:    [[IND_END:%.*]] = add i32 [[I]], [[N_VEC]]
4903; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[I]], i32 0
4904; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
4905; CHECK-NEXT:    [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
4906; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
4907; CHECK:       vector.body:
4908; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4909; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4910; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i32 [[I]], [[INDEX]]
4911; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0
4912; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 1
4913; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP1]]
4914; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i32 0
4915; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <2 x i32>*
4916; CHECK-NEXT:    store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP5]], align 4
4917; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
4918; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
4919; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4920; CHECK-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
4921; CHECK:       middle.block:
4922; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
4923; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4924; CHECK:       scalar.ph:
4925; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I]], [[FOR_BODY_PREHEADER:%.*]] ]
4926; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
4927; CHECK:       for.body:
4928; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4929; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]]
4930; CHECK-NEXT:    store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4
4931; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
4932; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
4933; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]]
4934; CHECK:       exit:
4935; CHECK-NEXT:    ret void
4936;
4937; IND-LABEL: @nonprimary(
4938; IND-NEXT:  for.body.preheader:
4939; IND-NEXT:    [[TMP0:%.*]] = sub i32 [[K:%.*]], [[I:%.*]]
4940; IND-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 2
4941; IND-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4942; IND:       vector.ph:
4943; IND-NEXT:    [[N_VEC:%.*]] = and i32 [[TMP0]], -2
4944; IND-NEXT:    [[IND_END:%.*]] = add i32 [[N_VEC]], [[I]]
4945; IND-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[I]], i64 0
4946; IND-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
4947; IND-NEXT:    [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
4948; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
4949; IND:       vector.body:
4950; IND-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4951; IND-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4952; IND-NEXT:    [[OFFSET_IDX:%.*]] = add i32 [[INDEX]], [[I]]
4953; IND-NEXT:    [[TMP1:%.*]] = sext i32 [[OFFSET_IDX]] to i64
4954; IND-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP1]]
4955; IND-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>*
4956; IND-NEXT:    store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP3]], align 4
4957; IND-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
4958; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
4959; IND-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4960; IND-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
4961; IND:       middle.block:
4962; IND-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
4963; IND-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
4964; IND:       scalar.ph:
4965; IND-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I]], [[FOR_BODY_PREHEADER:%.*]] ]
4966; IND-NEXT:    br label [[FOR_BODY:%.*]]
4967; IND:       for.body:
4968; IND-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4969; IND-NEXT:    [[TMP5:%.*]] = sext i32 [[INDVARS_IV]] to i64
4970; IND-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]]
4971; IND-NEXT:    store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4
4972; IND-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
4973; IND-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
4974; IND-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]]
4975; IND:       exit:
4976; IND-NEXT:    ret void
4977;
4978; UNROLL-LABEL: @nonprimary(
4979; UNROLL-NEXT:  for.body.preheader:
4980; UNROLL-NEXT:    [[TMP0:%.*]] = sub i32 [[K:%.*]], [[I:%.*]]
4981; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4
4982; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4983; UNROLL:       vector.ph:
4984; UNROLL-NEXT:    [[N_VEC:%.*]] = and i32 [[TMP0]], -4
4985; UNROLL-NEXT:    [[IND_END:%.*]] = add i32 [[N_VEC]], [[I]]
4986; UNROLL-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[I]], i64 0
4987; UNROLL-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
4988; UNROLL-NEXT:    [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
4989; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
4990; UNROLL:       vector.body:
4991; UNROLL-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4992; UNROLL-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4993; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
4994; UNROLL-NEXT:    [[OFFSET_IDX:%.*]] = add i32 [[INDEX]], [[I]]
4995; UNROLL-NEXT:    [[TMP1:%.*]] = sext i32 [[OFFSET_IDX]] to i64
4996; UNROLL-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP1]]
4997; UNROLL-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>*
4998; UNROLL-NEXT:    store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP3]], align 4
4999; UNROLL-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 2
5000; UNROLL-NEXT:    [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <2 x i32>*
5001; UNROLL-NEXT:    store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP5]], align 4
5002; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
5003; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 4, i32 4>
5004; UNROLL-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
5005; UNROLL-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
5006; UNROLL:       middle.block:
5007; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
5008; UNROLL-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
5009; UNROLL:       scalar.ph:
5010; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I]], [[FOR_BODY_PREHEADER:%.*]] ]
5011; UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
5012; UNROLL:       for.body:
5013; UNROLL-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
5014; UNROLL-NEXT:    [[TMP7:%.*]] = sext i32 [[INDVARS_IV]] to i64
5015; UNROLL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]]
5016; UNROLL-NEXT:    store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4
5017; UNROLL-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
5018; UNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
5019; UNROLL-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]]
5020; UNROLL:       exit:
5021; UNROLL-NEXT:    ret void
5022;
5023; UNROLL-NO-IC-LABEL: @nonprimary(
5024; UNROLL-NO-IC-NEXT:  for.body.preheader:
5025; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = sub i32 [[K:%.*]], [[I:%.*]]
5026; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 4
5027; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5028; UNROLL-NO-IC:       vector.ph:
5029; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[TMP0]], 4
5030; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i32 [[TMP0]], [[N_MOD_VF]]
5031; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = add i32 [[I]], [[N_VEC]]
5032; UNROLL-NO-IC-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[I]], i32 0
5033; UNROLL-NO-IC-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
5034; UNROLL-NO-IC-NEXT:    [[INDUCTION:%.*]] = add <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
5035; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
5036; UNROLL-NO-IC:       vector.body:
5037; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5038; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
5039; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
5040; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = add i32 [[I]], [[INDEX]]
5041; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 0
5042; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 1
5043; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], 2
5044; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 3
5045; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i32 [[TMP1]]
5046; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[TMP3]]
5047; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
5048; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <2 x i32>*
5049; UNROLL-NO-IC-NEXT:    store <2 x i32> [[VEC_IND]], <2 x i32>* [[TMP8]], align 4
5050; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 2
5051; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <2 x i32>*
5052; UNROLL-NO-IC-NEXT:    store <2 x i32> [[STEP_ADD]], <2 x i32>* [[TMP10]], align 4
5053; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
5054; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2>
5055; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
5056; UNROLL-NO-IC-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
5057; UNROLL-NO-IC:       middle.block:
5058; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
5059; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
5060; UNROLL-NO-IC:       scalar.ph:
5061; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I]], [[FOR_BODY_PREHEADER:%.*]] ]
5062; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
5063; UNROLL-NO-IC:       for.body:
5064; UNROLL-NO-IC-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
5065; UNROLL-NO-IC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i32 [[INDVARS_IV]]
5066; UNROLL-NO-IC-NEXT:    store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4
5067; UNROLL-NO-IC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
5068; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
5069; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]]
5070; UNROLL-NO-IC:       exit:
5071; UNROLL-NO-IC-NEXT:    ret void
5072;
5073; INTERLEAVE-LABEL: @nonprimary(
5074; INTERLEAVE-NEXT:  for.body.preheader:
5075; INTERLEAVE-NEXT:    [[TMP0:%.*]] = sub i32 [[K:%.*]], [[I:%.*]]
5076; INTERLEAVE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 8
5077; INTERLEAVE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5078; INTERLEAVE:       vector.ph:
5079; INTERLEAVE-NEXT:    [[N_VEC:%.*]] = and i32 [[TMP0]], -8
5080; INTERLEAVE-NEXT:    [[IND_END:%.*]] = add i32 [[N_VEC]], [[I]]
5081; INTERLEAVE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[I]], i64 0
5082; INTERLEAVE-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
5083; INTERLEAVE-NEXT:    [[INDUCTION:%.*]] = add <4 x i32> [[DOTSPLAT]], <i32 0, i32 1, i32 2, i32 3>
5084; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
5085; INTERLEAVE:       vector.body:
5086; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5087; INTERLEAVE-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
5088; INTERLEAVE-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
5089; INTERLEAVE-NEXT:    [[OFFSET_IDX:%.*]] = add i32 [[INDEX]], [[I]]
5090; INTERLEAVE-NEXT:    [[TMP1:%.*]] = sext i32 [[OFFSET_IDX]] to i64
5091; INTERLEAVE-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP1]]
5092; INTERLEAVE-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
5093; INTERLEAVE-NEXT:    store <4 x i32> [[VEC_IND]], <4 x i32>* [[TMP3]], align 4
5094; INTERLEAVE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 4
5095; INTERLEAVE-NEXT:    [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>*
5096; INTERLEAVE-NEXT:    store <4 x i32> [[STEP_ADD]], <4 x i32>* [[TMP5]], align 4
5097; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
5098; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
5099; INTERLEAVE-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
5100; INTERLEAVE-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP44:![0-9]+]]
5101; INTERLEAVE:       middle.block:
5102; INTERLEAVE-NEXT:    [[CMP_N:%.*]] = icmp eq i32 [[TMP0]], [[N_VEC]]
5103; INTERLEAVE-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
5104; INTERLEAVE:       scalar.ph:
5105; INTERLEAVE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[I]], [[FOR_BODY_PREHEADER:%.*]] ]
5106; INTERLEAVE-NEXT:    br label [[FOR_BODY:%.*]]
5107; INTERLEAVE:       for.body:
5108; INTERLEAVE-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
5109; INTERLEAVE-NEXT:    [[TMP7:%.*]] = sext i32 [[INDVARS_IV]] to i64
5110; INTERLEAVE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]]
5111; INTERLEAVE-NEXT:    store i32 [[INDVARS_IV]], i32* [[ARRAYIDX]], align 4
5112; INTERLEAVE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
5113; INTERLEAVE-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[K]]
5114; INTERLEAVE-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP45:![0-9]+]]
5115; INTERLEAVE:       exit:
5116; INTERLEAVE-NEXT:    ret void
5117;
5118for.body.preheader:
5119  br label %for.body
5120
5121for.body:
5122  %indvars.iv = phi i32 [ %indvars.iv.next, %for.body ], [ %i, %for.body.preheader ]
5123  %arrayidx = getelementptr inbounds i32, i32* %a, i32 %indvars.iv
5124  store i32 %indvars.iv, i32* %arrayidx, align 4
5125  %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
5126  %exitcond = icmp eq i32 %indvars.iv.next, %k
5127  br i1 %exitcond, label %exit, label %for.body
5128
5129exit:
5130  ret void
5131}
5132
5133define void @non_primary_iv_trunc(i32* %a, i64 %n) {
5134; CHECK-LABEL: @non_primary_iv_trunc(
5135; CHECK-NEXT:  entry:
5136; CHECK-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
5137; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 2
5138; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5139; CHECK:       vector.ph:
5140; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 2
5141; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]]
5142; CHECK-NEXT:    [[IND_END:%.*]] = mul i64 [[N_VEC]], 2
5143; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
5144; CHECK:       vector.body:
5145; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5146; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
5147; CHECK-NEXT:    [[VEC_IND2:%.*]] = phi <2 x i32> [ <i32 0, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
5148; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
5149; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]]
5150; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 0
5151; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>*
5152; CHECK-NEXT:    store <2 x i32> [[VEC_IND2]], <2 x i32>* [[TMP3]], align 4
5153; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
5154; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
5155; CHECK-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], <i32 4, i32 4>
5156; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
5157; CHECK-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
5158; CHECK:       middle.block:
5159; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
5160; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
5161; CHECK:       scalar.ph:
5162; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
5163; CHECK-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
5164; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
5165; CHECK:       for.body:
5166; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
5167; CHECK-NEXT:    [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
5168; CHECK-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I]]
5169; CHECK-NEXT:    [[VAR1:%.*]] = trunc i64 [[J]] to i32
5170; CHECK-NEXT:    store i32 [[VAR1]], i32* [[VAR0]], align 4
5171; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
5172; CHECK-NEXT:    [[J_NEXT]] = add nuw nsw i64 [[J]], 2
5173; CHECK-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
5174; CHECK-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP47:![0-9]+]]
5175; CHECK:       for.end:
5176; CHECK-NEXT:    ret void
5177;
5178; IND-LABEL: @non_primary_iv_trunc(
5179; IND-NEXT:  entry:
5180; IND-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
5181; IND-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 2
5182; IND-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5183; IND:       vector.ph:
5184; IND-NEXT:    [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806
5185; IND-NEXT:    [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1
5186; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
5187; IND:       vector.body:
5188; IND-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5189; IND-NEXT:    [[VEC_IND2:%.*]] = phi <2 x i32> [ <i32 0, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
5190; IND-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
5191; IND-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <2 x i32>*
5192; IND-NEXT:    store <2 x i32> [[VEC_IND2]], <2 x i32>* [[TMP1]], align 4
5193; IND-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
5194; IND-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], <i32 4, i32 4>
5195; IND-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
5196; IND-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
5197; IND:       middle.block:
5198; IND-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
5199; IND-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
5200; IND:       scalar.ph:
5201; IND-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
5202; IND-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
5203; IND-NEXT:    br label [[FOR_BODY:%.*]]
5204; IND:       for.body:
5205; IND-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
5206; IND-NEXT:    [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
5207; IND-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I]]
5208; IND-NEXT:    [[VAR1:%.*]] = trunc i64 [[J]] to i32
5209; IND-NEXT:    store i32 [[VAR1]], i32* [[VAR0]], align 4
5210; IND-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
5211; IND-NEXT:    [[J_NEXT]] = add nuw nsw i64 [[J]], 2
5212; IND-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
5213; IND-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP47:![0-9]+]]
5214; IND:       for.end:
5215; IND-NEXT:    ret void
5216;
5217; UNROLL-LABEL: @non_primary_iv_trunc(
5218; UNROLL-NEXT:  entry:
5219; UNROLL-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
5220; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4
5221; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5222; UNROLL:       vector.ph:
5223; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775804
5224; UNROLL-NEXT:    [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1
5225; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
5226; UNROLL:       vector.body:
5227; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5228; UNROLL-NEXT:    [[VEC_IND3:%.*]] = phi <2 x i32> [ <i32 0, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ]
5229; UNROLL-NEXT:    [[STEP_ADD4:%.*]] = add <2 x i32> [[VEC_IND3]], <i32 4, i32 4>
5230; UNROLL-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
5231; UNROLL-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <2 x i32>*
5232; UNROLL-NEXT:    store <2 x i32> [[VEC_IND3]], <2 x i32>* [[TMP1]], align 4
5233; UNROLL-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 2
5234; UNROLL-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <2 x i32>*
5235; UNROLL-NEXT:    store <2 x i32> [[STEP_ADD4]], <2 x i32>* [[TMP3]], align 4
5236; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
5237; UNROLL-NEXT:    [[VEC_IND_NEXT6]] = add <2 x i32> [[VEC_IND3]], <i32 8, i32 8>
5238; UNROLL-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
5239; UNROLL-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
5240; UNROLL:       middle.block:
5241; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
5242; UNROLL-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
5243; UNROLL:       scalar.ph:
5244; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
5245; UNROLL-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
5246; UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
5247; UNROLL:       for.body:
5248; UNROLL-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
5249; UNROLL-NEXT:    [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
5250; UNROLL-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I]]
5251; UNROLL-NEXT:    [[VAR1:%.*]] = trunc i64 [[J]] to i32
5252; UNROLL-NEXT:    store i32 [[VAR1]], i32* [[VAR0]], align 4
5253; UNROLL-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
5254; UNROLL-NEXT:    [[J_NEXT]] = add nuw nsw i64 [[J]], 2
5255; UNROLL-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
5256; UNROLL-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP47:![0-9]+]]
5257; UNROLL:       for.end:
5258; UNROLL-NEXT:    ret void
5259;
5260; UNROLL-NO-IC-LABEL: @non_primary_iv_trunc(
5261; UNROLL-NO-IC-NEXT:  entry:
5262; UNROLL-NO-IC-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
5263; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4
5264; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5265; UNROLL-NO-IC:       vector.ph:
5266; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[SMAX]], 4
5267; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[SMAX]], [[N_MOD_VF]]
5268; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = mul i64 [[N_VEC]], 2
5269; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
5270; UNROLL-NO-IC:       vector.body:
5271; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5272; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
5273; UNROLL-NO-IC-NEXT:    [[VEC_IND3:%.*]] = phi <2 x i32> [ <i32 0, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ]
5274; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
5275; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 2
5276; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], <i64 4, i64 4>
5277; UNROLL-NO-IC-NEXT:    [[STEP_ADD4:%.*]] = add <2 x i32> [[VEC_IND3]], <i32 4, i32 4>
5278; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP0]]
5279; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]]
5280; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 0
5281; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <2 x i32>*
5282; UNROLL-NO-IC-NEXT:    store <2 x i32> [[VEC_IND3]], <2 x i32>* [[TMP5]], align 4
5283; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 2
5284; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <2 x i32>*
5285; UNROLL-NO-IC-NEXT:    store <2 x i32> [[STEP_ADD4]], <2 x i32>* [[TMP7]], align 4
5286; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
5287; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 4, i64 4>
5288; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT6]] = add <2 x i32> [[STEP_ADD4]], <i32 4, i32 4>
5289; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
5290; UNROLL-NO-IC-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
5291; UNROLL-NO-IC:       middle.block:
5292; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
5293; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
5294; UNROLL-NO-IC:       scalar.ph:
5295; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
5296; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
5297; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
5298; UNROLL-NO-IC:       for.body:
5299; UNROLL-NO-IC-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
5300; UNROLL-NO-IC-NEXT:    [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
5301; UNROLL-NO-IC-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I]]
5302; UNROLL-NO-IC-NEXT:    [[VAR1:%.*]] = trunc i64 [[J]] to i32
5303; UNROLL-NO-IC-NEXT:    store i32 [[VAR1]], i32* [[VAR0]], align 4
5304; UNROLL-NO-IC-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
5305; UNROLL-NO-IC-NEXT:    [[J_NEXT]] = add nuw nsw i64 [[J]], 2
5306; UNROLL-NO-IC-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
5307; UNROLL-NO-IC-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP47:![0-9]+]]
5308; UNROLL-NO-IC:       for.end:
5309; UNROLL-NO-IC-NEXT:    ret void
5310;
5311; INTERLEAVE-LABEL: @non_primary_iv_trunc(
5312; INTERLEAVE-NEXT:  entry:
5313; INTERLEAVE-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
5314; INTERLEAVE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 8
5315; INTERLEAVE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5316; INTERLEAVE:       vector.ph:
5317; INTERLEAVE-NEXT:    [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775800
5318; INTERLEAVE-NEXT:    [[IND_END:%.*]] = shl nuw i64 [[N_VEC]], 1
5319; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
5320; INTERLEAVE:       vector.body:
5321; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5322; INTERLEAVE-NEXT:    [[VEC_IND3:%.*]] = phi <4 x i32> [ <i32 0, i32 2, i32 4, i32 6>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ]
5323; INTERLEAVE-NEXT:    [[STEP_ADD4:%.*]] = add <4 x i32> [[VEC_IND3]], <i32 8, i32 8, i32 8, i32 8>
5324; INTERLEAVE-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]]
5325; INTERLEAVE-NEXT:    [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>*
5326; INTERLEAVE-NEXT:    store <4 x i32> [[VEC_IND3]], <4 x i32>* [[TMP1]], align 4
5327; INTERLEAVE-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i32, i32* [[TMP0]], i64 4
5328; INTERLEAVE-NEXT:    [[TMP3:%.*]] = bitcast i32* [[TMP2]] to <4 x i32>*
5329; INTERLEAVE-NEXT:    store <4 x i32> [[STEP_ADD4]], <4 x i32>* [[TMP3]], align 4
5330; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
5331; INTERLEAVE-NEXT:    [[VEC_IND_NEXT6]] = add <4 x i32> [[VEC_IND3]], <i32 16, i32 16, i32 16, i32 16>
5332; INTERLEAVE-NEXT:    [[TMP4:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
5333; INTERLEAVE-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP46:![0-9]+]]
5334; INTERLEAVE:       middle.block:
5335; INTERLEAVE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
5336; INTERLEAVE-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
5337; INTERLEAVE:       scalar.ph:
5338; INTERLEAVE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
5339; INTERLEAVE-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
5340; INTERLEAVE-NEXT:    br label [[FOR_BODY:%.*]]
5341; INTERLEAVE:       for.body:
5342; INTERLEAVE-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
5343; INTERLEAVE-NEXT:    [[J:%.*]] = phi i64 [ [[J_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
5344; INTERLEAVE-NEXT:    [[VAR0:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I]]
5345; INTERLEAVE-NEXT:    [[VAR1:%.*]] = trunc i64 [[J]] to i32
5346; INTERLEAVE-NEXT:    store i32 [[VAR1]], i32* [[VAR0]], align 4
5347; INTERLEAVE-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
5348; INTERLEAVE-NEXT:    [[J_NEXT]] = add nuw nsw i64 [[J]], 2
5349; INTERLEAVE-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
5350; INTERLEAVE-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP47:![0-9]+]]
5351; INTERLEAVE:       for.end:
5352; INTERLEAVE-NEXT:    ret void
5353;
5354entry:
5355  br label %for.body
5356
5357for.body:
5358  %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
5359  %j = phi i64 [ %j.next, %for.body ], [ 0, %entry ]
5360  %var0 = getelementptr inbounds i32, i32* %a, i64 %i
5361  %var1 = trunc i64 %j to i32
5362  store i32 %var1, i32* %var0, align 4
5363  %i.next = add nuw nsw i64 %i, 1
5364  %j.next = add nuw nsw i64 %j, 2
5365  %cond = icmp slt i64 %i.next, %n
5366  br i1 %cond, label %for.body, label %for.end
5367
5368for.end:
5369  ret void
5370}
5371
5372; PR32419. Ensure we transform truncated non-primary induction variables. In
5373; the test case below we replace %var1 with a new induction variable. Because
5374; the truncated value is non-primary, we must compute an offset from the
5375; primary induction variable.
5376;
5377;
5378define i32 @PR32419(i32 %a, i16 %b) {
5379; CHECK-LABEL: @PR32419(
5380; CHECK-NEXT:  entry:
5381; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5382; CHECK:       vector.ph:
5383; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> zeroinitializer, i32 [[A:%.*]], i32 0
5384; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
5385; CHECK:       vector.body:
5386; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UREM_CONTINUE4:%.*]] ]
5387; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 -20, i32 -19>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UREM_CONTINUE4]] ]
5388; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[PRED_UREM_CONTINUE4]] ]
5389; CHECK-NEXT:    [[VEC_IND1:%.*]] = phi <2 x i16> [ <i16 -20, i16 -19>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_UREM_CONTINUE4]] ]
5390; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = add i32 -20, [[INDEX]]
5391; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[OFFSET_IDX]] to i16
5392; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND1]], zeroinitializer
5393; CHECK-NEXT:    [[TMP3:%.*]] = xor <2 x i1> [[TMP2]], <i1 true, i1 true>
5394; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i32 0
5395; CHECK-NEXT:    br i1 [[TMP4]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]]
5396; CHECK:       pred.urem.if:
5397; CHECK-NEXT:    [[TMP5:%.*]] = add i16 [[TMP1]], 0
5398; CHECK-NEXT:    [[TMP6:%.*]] = urem i16 [[B:%.*]], [[TMP5]]
5399; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x i16> poison, i16 [[TMP6]], i32 0
5400; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE]]
5401; CHECK:       pred.urem.continue:
5402; CHECK-NEXT:    [[TMP8:%.*]] = phi <2 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_UREM_IF]] ]
5403; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x i1> [[TMP3]], i32 1
5404; CHECK-NEXT:    br i1 [[TMP9]], label [[PRED_UREM_IF3:%.*]], label [[PRED_UREM_CONTINUE4]]
5405; CHECK:       pred.urem.if3:
5406; CHECK-NEXT:    [[TMP10:%.*]] = add i16 [[TMP1]], 1
5407; CHECK-NEXT:    [[TMP11:%.*]] = urem i16 [[B]], [[TMP10]]
5408; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <2 x i16> [[TMP8]], i16 [[TMP11]], i32 1
5409; CHECK-NEXT:    br label [[PRED_UREM_CONTINUE4]]
5410; CHECK:       pred.urem.continue4:
5411; CHECK-NEXT:    [[TMP13:%.*]] = phi <2 x i16> [ [[TMP8]], [[PRED_UREM_CONTINUE]] ], [ [[TMP12]], [[PRED_UREM_IF3]] ]
5412; CHECK-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> zeroinitializer, <2 x i16> [[TMP13]]
5413; CHECK-NEXT:    [[TMP14:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32>
5414; CHECK-NEXT:    [[TMP15]] = or <2 x i32> [[VEC_PHI]], [[TMP14]]
5415; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
5416; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
5417; CHECK-NEXT:    [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], <i16 2, i16 2>
5418; CHECK-NEXT:    [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20
5419; CHECK-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
5420; CHECK:       middle.block:
5421; CHECK-NEXT:    [[TMP17:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP15]])
5422; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i32 20, 20
5423; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
5424; CHECK:       scalar.ph:
5425; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ -20, [[ENTRY:%.*]] ]
5426; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[A]], [[ENTRY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ]
5427; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
5428; CHECK:       for.body:
5429; CHECK-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ]
5430; CHECK-NEXT:    [[VAR0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR6:%.*]], [[FOR_INC]] ]
5431; CHECK-NEXT:    [[VAR1:%.*]] = trunc i32 [[I]] to i16
5432; CHECK-NEXT:    [[VAR2:%.*]] = icmp eq i16 [[VAR1]], 0
5433; CHECK-NEXT:    br i1 [[VAR2]], label [[FOR_INC]], label [[FOR_COND:%.*]]
5434; CHECK:       for.cond:
5435; CHECK-NEXT:    [[VAR3:%.*]] = urem i16 [[B]], [[VAR1]]
5436; CHECK-NEXT:    br label [[FOR_INC]]
5437; CHECK:       for.inc:
5438; CHECK-NEXT:    [[VAR4:%.*]] = phi i16 [ [[VAR3]], [[FOR_COND]] ], [ 0, [[FOR_BODY]] ]
5439; CHECK-NEXT:    [[VAR5:%.*]] = sext i16 [[VAR4]] to i32
5440; CHECK-NEXT:    [[VAR6]] = or i32 [[VAR0]], [[VAR5]]
5441; CHECK-NEXT:    [[I_NEXT]] = add nsw i32 [[I]], 1
5442; CHECK-NEXT:    [[COND:%.*]] = icmp eq i32 [[I_NEXT]], 0
5443; CHECK-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]]
5444; CHECK:       for.end:
5445; CHECK-NEXT:    [[VAR7:%.*]] = phi i32 [ [[VAR6]], [[FOR_INC]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ]
5446; CHECK-NEXT:    ret i32 [[VAR7]]
5447;
5448; IND-LABEL: @PR32419(
5449; IND-NEXT:  entry:
5450; IND-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5451; IND:       vector.ph:
5452; IND-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[A:%.*]], i64 0
5453; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
5454; IND:       vector.body:
5455; IND-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UREM_CONTINUE4:%.*]] ]
5456; IND-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[PRED_UREM_CONTINUE4]] ]
5457; IND-NEXT:    [[VEC_IND1:%.*]] = phi <2 x i16> [ <i16 -20, i16 -19>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT2:%.*]], [[PRED_UREM_CONTINUE4]] ]
5458; IND-NEXT:    [[TMP1:%.*]] = trunc i32 [[INDEX]] to i16
5459; IND-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND1]], zeroinitializer
5460; IND-NEXT:    [[TMP3:%.*]] = xor <2 x i1> [[TMP2]], <i1 true, i1 true>
5461; IND-NEXT:    [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i64 0
5462; IND-NEXT:    br i1 [[TMP4]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]]
5463; IND:       pred.urem.if:
5464; IND-NEXT:    [[TMP5:%.*]] = add i16 [[TMP1]], -20
5465; IND-NEXT:    [[TMP6:%.*]] = urem i16 [[B:%.*]], [[TMP5]]
5466; IND-NEXT:    [[TMP7:%.*]] = insertelement <2 x i16> poison, i16 [[TMP6]], i64 0
5467; IND-NEXT:    br label [[PRED_UREM_CONTINUE]]
5468; IND:       pred.urem.continue:
5469; IND-NEXT:    [[TMP8:%.*]] = phi <2 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_UREM_IF]] ]
5470; IND-NEXT:    [[TMP9:%.*]] = extractelement <2 x i1> [[TMP3]], i64 1
5471; IND-NEXT:    br i1 [[TMP9]], label [[PRED_UREM_IF3:%.*]], label [[PRED_UREM_CONTINUE4]]
5472; IND:       pred.urem.if3:
5473; IND-NEXT:    [[TMP10:%.*]] = add i16 [[TMP1]], -19
5474; IND-NEXT:    [[TMP11:%.*]] = urem i16 [[B]], [[TMP10]]
5475; IND-NEXT:    [[TMP12:%.*]] = insertelement <2 x i16> [[TMP8]], i16 [[TMP11]], i64 1
5476; IND-NEXT:    br label [[PRED_UREM_CONTINUE4]]
5477; IND:       pred.urem.continue4:
5478; IND-NEXT:    [[TMP13:%.*]] = phi <2 x i16> [ [[TMP8]], [[PRED_UREM_CONTINUE]] ], [ [[TMP12]], [[PRED_UREM_IF3]] ]
5479; IND-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> zeroinitializer, <2 x i16> [[TMP13]]
5480; IND-NEXT:    [[TMP14:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32>
5481; IND-NEXT:    [[TMP15]] = or <2 x i32> [[VEC_PHI]], [[TMP14]]
5482; IND-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
5483; IND-NEXT:    [[VEC_IND_NEXT2]] = add <2 x i16> [[VEC_IND1]], <i16 2, i16 2>
5484; IND-NEXT:    [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20
5485; IND-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
5486; IND:       middle.block:
5487; IND-NEXT:    [[TMP17:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[TMP15]])
5488; IND-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
5489; IND:       scalar.ph:
5490; IND-NEXT:    br label [[FOR_BODY:%.*]]
5491; IND:       for.body:
5492; IND-NEXT:    br i1 undef, label [[FOR_INC:%.*]], label [[FOR_COND:%.*]]
5493; IND:       for.cond:
5494; IND-NEXT:    br label [[FOR_INC]]
5495; IND:       for.inc:
5496; IND-NEXT:    br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]]
5497; IND:       for.end:
5498; IND-NEXT:    [[VAR7:%.*]] = phi i32 [ undef, [[FOR_INC]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ]
5499; IND-NEXT:    ret i32 [[VAR7]]
5500;
5501; UNROLL-LABEL: @PR32419(
5502; UNROLL-NEXT:  entry:
5503; UNROLL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5504; UNROLL:       vector.ph:
5505; UNROLL-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> <i32 poison, i32 0>, i32 [[A:%.*]], i64 0
5506; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
5507; UNROLL:       vector.body:
5508; UNROLL-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UREM_CONTINUE12:%.*]] ]
5509; UNROLL-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[PRED_UREM_CONTINUE12]] ]
5510; UNROLL-NEXT:    [[VEC_PHI2:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_UREM_CONTINUE12]] ]
5511; UNROLL-NEXT:    [[VEC_IND3:%.*]] = phi <2 x i16> [ <i16 -20, i16 -19>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[PRED_UREM_CONTINUE12]] ]
5512; UNROLL-NEXT:    [[TMP1:%.*]] = trunc i32 [[INDEX]] to i16
5513; UNROLL-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND3]], zeroinitializer
5514; UNROLL-NEXT:    [[TMP3:%.*]] = icmp eq <2 x i16> [[VEC_IND3]], <i16 -2, i16 -2>
5515; UNROLL-NEXT:    [[TMP4:%.*]] = xor <2 x i1> [[TMP2]], <i1 true, i1 true>
5516; UNROLL-NEXT:    [[TMP5:%.*]] = xor <2 x i1> [[TMP3]], <i1 true, i1 true>
5517; UNROLL-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i64 0
5518; UNROLL-NEXT:    br i1 [[TMP6]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]]
5519; UNROLL:       pred.urem.if:
5520; UNROLL-NEXT:    [[TMP7:%.*]] = add i16 [[TMP1]], -20
5521; UNROLL-NEXT:    [[TMP8:%.*]] = urem i16 [[B:%.*]], [[TMP7]]
5522; UNROLL-NEXT:    [[TMP9:%.*]] = insertelement <2 x i16> poison, i16 [[TMP8]], i64 0
5523; UNROLL-NEXT:    br label [[PRED_UREM_CONTINUE]]
5524; UNROLL:       pred.urem.continue:
5525; UNROLL-NEXT:    [[TMP10:%.*]] = phi <2 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_UREM_IF]] ]
5526; UNROLL-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP4]], i64 1
5527; UNROLL-NEXT:    br i1 [[TMP11]], label [[PRED_UREM_IF7:%.*]], label [[PRED_UREM_CONTINUE8:%.*]]
5528; UNROLL:       pred.urem.if7:
5529; UNROLL-NEXT:    [[TMP12:%.*]] = add i16 [[TMP1]], -19
5530; UNROLL-NEXT:    [[TMP13:%.*]] = urem i16 [[B]], [[TMP12]]
5531; UNROLL-NEXT:    [[TMP14:%.*]] = insertelement <2 x i16> [[TMP10]], i16 [[TMP13]], i64 1
5532; UNROLL-NEXT:    br label [[PRED_UREM_CONTINUE8]]
5533; UNROLL:       pred.urem.continue8:
5534; UNROLL-NEXT:    [[TMP15:%.*]] = phi <2 x i16> [ [[TMP10]], [[PRED_UREM_CONTINUE]] ], [ [[TMP14]], [[PRED_UREM_IF7]] ]
5535; UNROLL-NEXT:    [[TMP16:%.*]] = extractelement <2 x i1> [[TMP5]], i64 0
5536; UNROLL-NEXT:    br i1 [[TMP16]], label [[PRED_UREM_IF9:%.*]], label [[PRED_UREM_CONTINUE10:%.*]]
5537; UNROLL:       pred.urem.if9:
5538; UNROLL-NEXT:    [[TMP17:%.*]] = add i16 [[TMP1]], -18
5539; UNROLL-NEXT:    [[TMP18:%.*]] = urem i16 [[B]], [[TMP17]]
5540; UNROLL-NEXT:    [[TMP19:%.*]] = insertelement <2 x i16> poison, i16 [[TMP18]], i64 0
5541; UNROLL-NEXT:    br label [[PRED_UREM_CONTINUE10]]
5542; UNROLL:       pred.urem.continue10:
5543; UNROLL-NEXT:    [[TMP20:%.*]] = phi <2 x i16> [ poison, [[PRED_UREM_CONTINUE8]] ], [ [[TMP19]], [[PRED_UREM_IF9]] ]
5544; UNROLL-NEXT:    [[TMP21:%.*]] = extractelement <2 x i1> [[TMP5]], i64 1
5545; UNROLL-NEXT:    br i1 [[TMP21]], label [[PRED_UREM_IF11:%.*]], label [[PRED_UREM_CONTINUE12]]
5546; UNROLL:       pred.urem.if11:
5547; UNROLL-NEXT:    [[TMP22:%.*]] = add i16 [[TMP1]], -17
5548; UNROLL-NEXT:    [[TMP23:%.*]] = urem i16 [[B]], [[TMP22]]
5549; UNROLL-NEXT:    [[TMP24:%.*]] = insertelement <2 x i16> [[TMP20]], i16 [[TMP23]], i64 1
5550; UNROLL-NEXT:    br label [[PRED_UREM_CONTINUE12]]
5551; UNROLL:       pred.urem.continue12:
5552; UNROLL-NEXT:    [[TMP25:%.*]] = phi <2 x i16> [ [[TMP20]], [[PRED_UREM_CONTINUE10]] ], [ [[TMP24]], [[PRED_UREM_IF11]] ]
5553; UNROLL-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> zeroinitializer, <2 x i16> [[TMP15]]
5554; UNROLL-NEXT:    [[PREDPHI13:%.*]] = select <2 x i1> [[TMP3]], <2 x i16> zeroinitializer, <2 x i16> [[TMP25]]
5555; UNROLL-NEXT:    [[TMP26:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32>
5556; UNROLL-NEXT:    [[TMP27:%.*]] = sext <2 x i16> [[PREDPHI13]] to <2 x i32>
5557; UNROLL-NEXT:    [[TMP28]] = or <2 x i32> [[VEC_PHI]], [[TMP26]]
5558; UNROLL-NEXT:    [[TMP29]] = or <2 x i32> [[VEC_PHI2]], [[TMP27]]
5559; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
5560; UNROLL-NEXT:    [[VEC_IND_NEXT6]] = add <2 x i16> [[VEC_IND3]], <i16 4, i16 4>
5561; UNROLL-NEXT:    [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20
5562; UNROLL-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
5563; UNROLL:       middle.block:
5564; UNROLL-NEXT:    [[BIN_RDX:%.*]] = or <2 x i32> [[TMP29]], [[TMP28]]
5565; UNROLL-NEXT:    [[TMP31:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[BIN_RDX]])
5566; UNROLL-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
5567; UNROLL:       scalar.ph:
5568; UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
5569; UNROLL:       for.body:
5570; UNROLL-NEXT:    br i1 undef, label [[FOR_INC:%.*]], label [[FOR_COND:%.*]]
5571; UNROLL:       for.cond:
5572; UNROLL-NEXT:    br label [[FOR_INC]]
5573; UNROLL:       for.inc:
5574; UNROLL-NEXT:    br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]]
5575; UNROLL:       for.end:
5576; UNROLL-NEXT:    [[VAR7:%.*]] = phi i32 [ undef, [[FOR_INC]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ]
5577; UNROLL-NEXT:    ret i32 [[VAR7]]
5578;
5579; UNROLL-NO-IC-LABEL: @PR32419(
5580; UNROLL-NO-IC-NEXT:  entry:
5581; UNROLL-NO-IC-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5582; UNROLL-NO-IC:       vector.ph:
5583; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = insertelement <2 x i32> zeroinitializer, i32 [[A:%.*]], i32 0
5584; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
5585; UNROLL-NO-IC:       vector.body:
5586; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UREM_CONTINUE12:%.*]] ]
5587; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ <i32 -20, i32 -19>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_UREM_CONTINUE12]] ]
5588; UNROLL-NO-IC-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[PRED_UREM_CONTINUE12]] ]
5589; UNROLL-NO-IC-NEXT:    [[VEC_PHI2:%.*]] = phi <2 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP29:%.*]], [[PRED_UREM_CONTINUE12]] ]
5590; UNROLL-NO-IC-NEXT:    [[VEC_IND3:%.*]] = phi <2 x i16> [ <i16 -20, i16 -19>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[PRED_UREM_CONTINUE12]] ]
5591; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i32> [[VEC_IND]], <i32 2, i32 2>
5592; UNROLL-NO-IC-NEXT:    [[STEP_ADD4:%.*]] = add <2 x i16> [[VEC_IND3]], <i16 2, i16 2>
5593; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = add i32 -20, [[INDEX]]
5594; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = trunc i32 [[OFFSET_IDX]] to i16
5595; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = icmp eq <2 x i16> [[VEC_IND3]], zeroinitializer
5596; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = icmp eq <2 x i16> [[STEP_ADD4]], zeroinitializer
5597; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = xor <2 x i1> [[TMP2]], <i1 true, i1 true>
5598; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = xor <2 x i1> [[TMP3]], <i1 true, i1 true>
5599; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP4]], i32 0
5600; UNROLL-NO-IC-NEXT:    br i1 [[TMP6]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]]
5601; UNROLL-NO-IC:       pred.urem.if:
5602; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = add i16 [[TMP1]], 0
5603; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = urem i16 [[B:%.*]], [[TMP7]]
5604; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = insertelement <2 x i16> poison, i16 [[TMP8]], i32 0
5605; UNROLL-NO-IC-NEXT:    br label [[PRED_UREM_CONTINUE]]
5606; UNROLL-NO-IC:       pred.urem.continue:
5607; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = phi <2 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_UREM_IF]] ]
5608; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = extractelement <2 x i1> [[TMP4]], i32 1
5609; UNROLL-NO-IC-NEXT:    br i1 [[TMP11]], label [[PRED_UREM_IF7:%.*]], label [[PRED_UREM_CONTINUE8:%.*]]
5610; UNROLL-NO-IC:       pred.urem.if7:
5611; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = add i16 [[TMP1]], 1
5612; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = urem i16 [[B]], [[TMP12]]
5613; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = insertelement <2 x i16> [[TMP10]], i16 [[TMP13]], i32 1
5614; UNROLL-NO-IC-NEXT:    br label [[PRED_UREM_CONTINUE8]]
5615; UNROLL-NO-IC:       pred.urem.continue8:
5616; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = phi <2 x i16> [ [[TMP10]], [[PRED_UREM_CONTINUE]] ], [ [[TMP14]], [[PRED_UREM_IF7]] ]
5617; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = extractelement <2 x i1> [[TMP5]], i32 0
5618; UNROLL-NO-IC-NEXT:    br i1 [[TMP16]], label [[PRED_UREM_IF9:%.*]], label [[PRED_UREM_CONTINUE10:%.*]]
5619; UNROLL-NO-IC:       pred.urem.if9:
5620; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = add i16 [[TMP1]], 2
5621; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = urem i16 [[B]], [[TMP17]]
5622; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = insertelement <2 x i16> poison, i16 [[TMP18]], i32 0
5623; UNROLL-NO-IC-NEXT:    br label [[PRED_UREM_CONTINUE10]]
5624; UNROLL-NO-IC:       pred.urem.continue10:
5625; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = phi <2 x i16> [ poison, [[PRED_UREM_CONTINUE8]] ], [ [[TMP19]], [[PRED_UREM_IF9]] ]
5626; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = extractelement <2 x i1> [[TMP5]], i32 1
5627; UNROLL-NO-IC-NEXT:    br i1 [[TMP21]], label [[PRED_UREM_IF11:%.*]], label [[PRED_UREM_CONTINUE12]]
5628; UNROLL-NO-IC:       pred.urem.if11:
5629; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = add i16 [[TMP1]], 3
5630; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = urem i16 [[B]], [[TMP22]]
5631; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = insertelement <2 x i16> [[TMP20]], i16 [[TMP23]], i32 1
5632; UNROLL-NO-IC-NEXT:    br label [[PRED_UREM_CONTINUE12]]
5633; UNROLL-NO-IC:       pred.urem.continue12:
5634; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = phi <2 x i16> [ [[TMP20]], [[PRED_UREM_CONTINUE10]] ], [ [[TMP24]], [[PRED_UREM_IF11]] ]
5635; UNROLL-NO-IC-NEXT:    [[PREDPHI:%.*]] = select <2 x i1> [[TMP2]], <2 x i16> zeroinitializer, <2 x i16> [[TMP15]]
5636; UNROLL-NO-IC-NEXT:    [[PREDPHI13:%.*]] = select <2 x i1> [[TMP3]], <2 x i16> zeroinitializer, <2 x i16> [[TMP25]]
5637; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = sext <2 x i16> [[PREDPHI]] to <2 x i32>
5638; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = sext <2 x i16> [[PREDPHI13]] to <2 x i32>
5639; UNROLL-NO-IC-NEXT:    [[TMP28]] = or <2 x i32> [[VEC_PHI]], [[TMP26]]
5640; UNROLL-NO-IC-NEXT:    [[TMP29]] = or <2 x i32> [[VEC_PHI2]], [[TMP27]]
5641; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
5642; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], <i32 2, i32 2>
5643; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT6]] = add <2 x i16> [[STEP_ADD4]], <i16 2, i16 2>
5644; UNROLL-NO-IC-NEXT:    [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 20
5645; UNROLL-NO-IC-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
5646; UNROLL-NO-IC:       middle.block:
5647; UNROLL-NO-IC-NEXT:    [[BIN_RDX:%.*]] = or <2 x i32> [[TMP29]], [[TMP28]]
5648; UNROLL-NO-IC-NEXT:    [[TMP31:%.*]] = call i32 @llvm.vector.reduce.or.v2i32(<2 x i32> [[BIN_RDX]])
5649; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i32 20, 20
5650; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
5651; UNROLL-NO-IC:       scalar.ph:
5652; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 0, [[MIDDLE_BLOCK]] ], [ -20, [[ENTRY:%.*]] ]
5653; UNROLL-NO-IC-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[A]], [[ENTRY]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ]
5654; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
5655; UNROLL-NO-IC:       for.body:
5656; UNROLL-NO-IC-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ]
5657; UNROLL-NO-IC-NEXT:    [[VAR0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR6:%.*]], [[FOR_INC]] ]
5658; UNROLL-NO-IC-NEXT:    [[VAR1:%.*]] = trunc i32 [[I]] to i16
5659; UNROLL-NO-IC-NEXT:    [[VAR2:%.*]] = icmp eq i16 [[VAR1]], 0
5660; UNROLL-NO-IC-NEXT:    br i1 [[VAR2]], label [[FOR_INC]], label [[FOR_COND:%.*]]
5661; UNROLL-NO-IC:       for.cond:
5662; UNROLL-NO-IC-NEXT:    [[VAR3:%.*]] = urem i16 [[B]], [[VAR1]]
5663; UNROLL-NO-IC-NEXT:    br label [[FOR_INC]]
5664; UNROLL-NO-IC:       for.inc:
5665; UNROLL-NO-IC-NEXT:    [[VAR4:%.*]] = phi i16 [ [[VAR3]], [[FOR_COND]] ], [ 0, [[FOR_BODY]] ]
5666; UNROLL-NO-IC-NEXT:    [[VAR5:%.*]] = sext i16 [[VAR4]] to i32
5667; UNROLL-NO-IC-NEXT:    [[VAR6]] = or i32 [[VAR0]], [[VAR5]]
5668; UNROLL-NO-IC-NEXT:    [[I_NEXT]] = add nsw i32 [[I]], 1
5669; UNROLL-NO-IC-NEXT:    [[COND:%.*]] = icmp eq i32 [[I_NEXT]], 0
5670; UNROLL-NO-IC-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]]
5671; UNROLL-NO-IC:       for.end:
5672; UNROLL-NO-IC-NEXT:    [[VAR7:%.*]] = phi i32 [ [[VAR6]], [[FOR_INC]] ], [ [[TMP31]], [[MIDDLE_BLOCK]] ]
5673; UNROLL-NO-IC-NEXT:    ret i32 [[VAR7]]
5674;
5675; INTERLEAVE-LABEL: @PR32419(
5676; INTERLEAVE-NEXT:  entry:
5677; INTERLEAVE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5678; INTERLEAVE:       vector.ph:
5679; INTERLEAVE-NEXT:    [[TMP0:%.*]] = insertelement <4 x i32> <i32 poison, i32 0, i32 0, i32 0>, i32 [[A:%.*]], i64 0
5680; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
5681; INTERLEAVE:       vector.body:
5682; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UREM_CONTINUE20:%.*]] ]
5683; INTERLEAVE-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[PRED_UREM_CONTINUE20]] ]
5684; INTERLEAVE-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[PRED_UREM_CONTINUE20]] ]
5685; INTERLEAVE-NEXT:    [[VEC_IND3:%.*]] = phi <4 x i16> [ <i16 -20, i16 -19, i16 -18, i16 -17>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[PRED_UREM_CONTINUE20]] ]
5686; INTERLEAVE-NEXT:    [[TMP1:%.*]] = trunc i32 [[INDEX]] to i16
5687; INTERLEAVE-NEXT:    [[TMP2:%.*]] = icmp eq <4 x i16> [[VEC_IND3]], zeroinitializer
5688; INTERLEAVE-NEXT:    [[TMP3:%.*]] = icmp eq <4 x i16> [[VEC_IND3]], <i16 -4, i16 -4, i16 -4, i16 -4>
5689; INTERLEAVE-NEXT:    [[TMP4:%.*]] = xor <4 x i1> [[TMP2]], <i1 true, i1 true, i1 true, i1 true>
5690; INTERLEAVE-NEXT:    [[TMP5:%.*]] = xor <4 x i1> [[TMP3]], <i1 true, i1 true, i1 true, i1 true>
5691; INTERLEAVE-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP4]], i64 0
5692; INTERLEAVE-NEXT:    br i1 [[TMP6]], label [[PRED_UREM_IF:%.*]], label [[PRED_UREM_CONTINUE:%.*]]
5693; INTERLEAVE:       pred.urem.if:
5694; INTERLEAVE-NEXT:    [[TMP7:%.*]] = add i16 [[TMP1]], -20
5695; INTERLEAVE-NEXT:    [[TMP8:%.*]] = urem i16 [[B:%.*]], [[TMP7]]
5696; INTERLEAVE-NEXT:    [[TMP9:%.*]] = insertelement <4 x i16> poison, i16 [[TMP8]], i64 0
5697; INTERLEAVE-NEXT:    br label [[PRED_UREM_CONTINUE]]
5698; INTERLEAVE:       pred.urem.continue:
5699; INTERLEAVE-NEXT:    [[TMP10:%.*]] = phi <4 x i16> [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_UREM_IF]] ]
5700; INTERLEAVE-NEXT:    [[TMP11:%.*]] = extractelement <4 x i1> [[TMP4]], i64 1
5701; INTERLEAVE-NEXT:    br i1 [[TMP11]], label [[PRED_UREM_IF7:%.*]], label [[PRED_UREM_CONTINUE8:%.*]]
5702; INTERLEAVE:       pred.urem.if7:
5703; INTERLEAVE-NEXT:    [[TMP12:%.*]] = add i16 [[TMP1]], -19
5704; INTERLEAVE-NEXT:    [[TMP13:%.*]] = urem i16 [[B]], [[TMP12]]
5705; INTERLEAVE-NEXT:    [[TMP14:%.*]] = insertelement <4 x i16> [[TMP10]], i16 [[TMP13]], i64 1
5706; INTERLEAVE-NEXT:    br label [[PRED_UREM_CONTINUE8]]
5707; INTERLEAVE:       pred.urem.continue8:
5708; INTERLEAVE-NEXT:    [[TMP15:%.*]] = phi <4 x i16> [ [[TMP10]], [[PRED_UREM_CONTINUE]] ], [ [[TMP14]], [[PRED_UREM_IF7]] ]
5709; INTERLEAVE-NEXT:    [[TMP16:%.*]] = extractelement <4 x i1> [[TMP4]], i64 2
5710; INTERLEAVE-NEXT:    br i1 [[TMP16]], label [[PRED_UREM_IF9:%.*]], label [[PRED_UREM_CONTINUE10:%.*]]
5711; INTERLEAVE:       pred.urem.if9:
5712; INTERLEAVE-NEXT:    [[TMP17:%.*]] = add i16 [[TMP1]], -18
5713; INTERLEAVE-NEXT:    [[TMP18:%.*]] = urem i16 [[B]], [[TMP17]]
5714; INTERLEAVE-NEXT:    [[TMP19:%.*]] = insertelement <4 x i16> [[TMP15]], i16 [[TMP18]], i64 2
5715; INTERLEAVE-NEXT:    br label [[PRED_UREM_CONTINUE10]]
5716; INTERLEAVE:       pred.urem.continue10:
5717; INTERLEAVE-NEXT:    [[TMP20:%.*]] = phi <4 x i16> [ [[TMP15]], [[PRED_UREM_CONTINUE8]] ], [ [[TMP19]], [[PRED_UREM_IF9]] ]
5718; INTERLEAVE-NEXT:    [[TMP21:%.*]] = extractelement <4 x i1> [[TMP4]], i64 3
5719; INTERLEAVE-NEXT:    br i1 [[TMP21]], label [[PRED_UREM_IF11:%.*]], label [[PRED_UREM_CONTINUE12:%.*]]
5720; INTERLEAVE:       pred.urem.if11:
5721; INTERLEAVE-NEXT:    [[TMP22:%.*]] = add i16 [[TMP1]], -17
5722; INTERLEAVE-NEXT:    [[TMP23:%.*]] = urem i16 [[B]], [[TMP22]]
5723; INTERLEAVE-NEXT:    [[TMP24:%.*]] = insertelement <4 x i16> [[TMP20]], i16 [[TMP23]], i64 3
5724; INTERLEAVE-NEXT:    br label [[PRED_UREM_CONTINUE12]]
5725; INTERLEAVE:       pred.urem.continue12:
5726; INTERLEAVE-NEXT:    [[TMP25:%.*]] = phi <4 x i16> [ [[TMP20]], [[PRED_UREM_CONTINUE10]] ], [ [[TMP24]], [[PRED_UREM_IF11]] ]
5727; INTERLEAVE-NEXT:    [[TMP26:%.*]] = extractelement <4 x i1> [[TMP5]], i64 0
5728; INTERLEAVE-NEXT:    br i1 [[TMP26]], label [[PRED_UREM_IF13:%.*]], label [[PRED_UREM_CONTINUE14:%.*]]
5729; INTERLEAVE:       pred.urem.if13:
5730; INTERLEAVE-NEXT:    [[TMP27:%.*]] = add i16 [[TMP1]], -16
5731; INTERLEAVE-NEXT:    [[TMP28:%.*]] = urem i16 [[B]], [[TMP27]]
5732; INTERLEAVE-NEXT:    [[TMP29:%.*]] = insertelement <4 x i16> poison, i16 [[TMP28]], i64 0
5733; INTERLEAVE-NEXT:    br label [[PRED_UREM_CONTINUE14]]
5734; INTERLEAVE:       pred.urem.continue14:
5735; INTERLEAVE-NEXT:    [[TMP30:%.*]] = phi <4 x i16> [ poison, [[PRED_UREM_CONTINUE12]] ], [ [[TMP29]], [[PRED_UREM_IF13]] ]
5736; INTERLEAVE-NEXT:    [[TMP31:%.*]] = extractelement <4 x i1> [[TMP5]], i64 1
5737; INTERLEAVE-NEXT:    br i1 [[TMP31]], label [[PRED_UREM_IF15:%.*]], label [[PRED_UREM_CONTINUE16:%.*]]
5738; INTERLEAVE:       pred.urem.if15:
5739; INTERLEAVE-NEXT:    [[TMP32:%.*]] = add i16 [[TMP1]], -15
5740; INTERLEAVE-NEXT:    [[TMP33:%.*]] = urem i16 [[B]], [[TMP32]]
5741; INTERLEAVE-NEXT:    [[TMP34:%.*]] = insertelement <4 x i16> [[TMP30]], i16 [[TMP33]], i64 1
5742; INTERLEAVE-NEXT:    br label [[PRED_UREM_CONTINUE16]]
5743; INTERLEAVE:       pred.urem.continue16:
5744; INTERLEAVE-NEXT:    [[TMP35:%.*]] = phi <4 x i16> [ [[TMP30]], [[PRED_UREM_CONTINUE14]] ], [ [[TMP34]], [[PRED_UREM_IF15]] ]
5745; INTERLEAVE-NEXT:    [[TMP36:%.*]] = extractelement <4 x i1> [[TMP5]], i64 2
5746; INTERLEAVE-NEXT:    br i1 [[TMP36]], label [[PRED_UREM_IF17:%.*]], label [[PRED_UREM_CONTINUE18:%.*]]
5747; INTERLEAVE:       pred.urem.if17:
5748; INTERLEAVE-NEXT:    [[TMP37:%.*]] = add i16 [[TMP1]], -14
5749; INTERLEAVE-NEXT:    [[TMP38:%.*]] = urem i16 [[B]], [[TMP37]]
5750; INTERLEAVE-NEXT:    [[TMP39:%.*]] = insertelement <4 x i16> [[TMP35]], i16 [[TMP38]], i64 2
5751; INTERLEAVE-NEXT:    br label [[PRED_UREM_CONTINUE18]]
5752; INTERLEAVE:       pred.urem.continue18:
5753; INTERLEAVE-NEXT:    [[TMP40:%.*]] = phi <4 x i16> [ [[TMP35]], [[PRED_UREM_CONTINUE16]] ], [ [[TMP39]], [[PRED_UREM_IF17]] ]
5754; INTERLEAVE-NEXT:    [[TMP41:%.*]] = extractelement <4 x i1> [[TMP5]], i64 3
5755; INTERLEAVE-NEXT:    br i1 [[TMP41]], label [[PRED_UREM_IF19:%.*]], label [[PRED_UREM_CONTINUE20]]
5756; INTERLEAVE:       pred.urem.if19:
5757; INTERLEAVE-NEXT:    [[TMP42:%.*]] = add i16 [[TMP1]], -13
5758; INTERLEAVE-NEXT:    [[TMP43:%.*]] = urem i16 [[B]], [[TMP42]]
5759; INTERLEAVE-NEXT:    [[TMP44:%.*]] = insertelement <4 x i16> [[TMP40]], i16 [[TMP43]], i64 3
5760; INTERLEAVE-NEXT:    br label [[PRED_UREM_CONTINUE20]]
5761; INTERLEAVE:       pred.urem.continue20:
5762; INTERLEAVE-NEXT:    [[TMP45:%.*]] = phi <4 x i16> [ [[TMP40]], [[PRED_UREM_CONTINUE18]] ], [ [[TMP44]], [[PRED_UREM_IF19]] ]
5763; INTERLEAVE-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP2]], <4 x i16> zeroinitializer, <4 x i16> [[TMP25]]
5764; INTERLEAVE-NEXT:    [[PREDPHI21:%.*]] = select <4 x i1> [[TMP3]], <4 x i16> zeroinitializer, <4 x i16> [[TMP45]]
5765; INTERLEAVE-NEXT:    [[TMP46:%.*]] = sext <4 x i16> [[PREDPHI]] to <4 x i32>
5766; INTERLEAVE-NEXT:    [[TMP47:%.*]] = sext <4 x i16> [[PREDPHI21]] to <4 x i32>
5767; INTERLEAVE-NEXT:    [[TMP48]] = or <4 x i32> [[VEC_PHI]], [[TMP46]]
5768; INTERLEAVE-NEXT:    [[TMP49]] = or <4 x i32> [[VEC_PHI2]], [[TMP47]]
5769; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
5770; INTERLEAVE-NEXT:    [[VEC_IND_NEXT6]] = add <4 x i16> [[VEC_IND3]], <i16 8, i16 8, i16 8, i16 8>
5771; INTERLEAVE-NEXT:    [[TMP50:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
5772; INTERLEAVE-NEXT:    br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
5773; INTERLEAVE:       middle.block:
5774; INTERLEAVE-NEXT:    [[BIN_RDX:%.*]] = or <4 x i32> [[TMP49]], [[TMP48]]
5775; INTERLEAVE-NEXT:    [[TMP51:%.*]] = call i32 @llvm.vector.reduce.or.v4i32(<4 x i32> [[BIN_RDX]])
5776; INTERLEAVE-NEXT:    br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
5777; INTERLEAVE:       scalar.ph:
5778; INTERLEAVE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ -4, [[MIDDLE_BLOCK]] ], [ -20, [[ENTRY:%.*]] ]
5779; INTERLEAVE-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP51]], [[MIDDLE_BLOCK]] ], [ [[A]], [[ENTRY]] ]
5780; INTERLEAVE-NEXT:    br label [[FOR_BODY:%.*]]
5781; INTERLEAVE:       for.body:
5782; INTERLEAVE-NEXT:    [[I:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ]
5783; INTERLEAVE-NEXT:    [[VAR0:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[VAR6:%.*]], [[FOR_INC]] ]
5784; INTERLEAVE-NEXT:    [[VAR1:%.*]] = trunc i32 [[I]] to i16
5785; INTERLEAVE-NEXT:    [[VAR2:%.*]] = icmp eq i16 [[VAR1]], 0
5786; INTERLEAVE-NEXT:    br i1 [[VAR2]], label [[FOR_INC]], label [[FOR_COND:%.*]]
5787; INTERLEAVE:       for.cond:
5788; INTERLEAVE-NEXT:    [[VAR3:%.*]] = urem i16 [[B]], [[VAR1]]
5789; INTERLEAVE-NEXT:    br label [[FOR_INC]]
5790; INTERLEAVE:       for.inc:
5791; INTERLEAVE-NEXT:    [[VAR4:%.*]] = phi i16 [ [[VAR3]], [[FOR_COND]] ], [ 0, [[FOR_BODY]] ]
5792; INTERLEAVE-NEXT:    [[VAR5:%.*]] = sext i16 [[VAR4]] to i32
5793; INTERLEAVE-NEXT:    [[VAR6]] = or i32 [[VAR0]], [[VAR5]]
5794; INTERLEAVE-NEXT:    [[I_NEXT]] = add nsw i32 [[I]], 1
5795; INTERLEAVE-NEXT:    [[COND:%.*]] = icmp eq i32 [[I_NEXT]], 0
5796; INTERLEAVE-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]]
5797; INTERLEAVE:       for.end:
5798; INTERLEAVE-NEXT:    [[VAR7:%.*]] = phi i32 [ [[VAR6]], [[FOR_INC]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ]
5799; INTERLEAVE-NEXT:    ret i32 [[VAR7]]
5800;
5801entry:
5802  br label %for.body
5803
5804for.body:
5805  %i = phi i32 [ -20, %entry ], [ %i.next, %for.inc ]
5806  %var0 = phi i32 [ %a, %entry ], [ %var6, %for.inc ]
5807  %var1 = trunc i32 %i to i16
5808  %var2 = icmp eq i16 %var1, 0
5809  br i1 %var2, label %for.inc, label %for.cond
5810
5811for.cond:
5812  %var3 = urem i16 %b, %var1
5813  br label %for.inc
5814
5815for.inc:
5816  %var4 = phi i16 [ %var3, %for.cond ], [ 0, %for.body ]
5817  %var5 = sext i16 %var4 to i32
5818  %var6 = or i32 %var0, %var5
5819  %i.next = add nsw i32 %i, 1
5820  %cond = icmp eq i32 %i.next, 0
5821  br i1 %cond, label %for.end, label %for.body
5822
5823for.end:
5824  %var7 = phi i32 [ %var6, %for.inc ]
5825  ret i32 %var7
5826}
5827
5828; Ensure that the shuffle vector for first order recurrence is inserted
5829; correctly after all the phis. These new phis correspond to new IVs
5830; that are generated by optimizing non-free truncs of IVs to IVs themselves.
5831; This also ensures the first-order recurrence splice recipe is placed
5832; correctly if it is fed by an induction.
5833define i64 @trunc_with_first_order_recurrence() {
5834; CHECK-LABEL: @trunc_with_first_order_recurrence(
5835; CHECK-NEXT:  entry:
5836; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5837; CHECK:       vector.ph:
5838; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
5839; CHECK:       vector.body:
5840; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5841; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
5842; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
5843; CHECK-NEXT:    [[VEC_IND2:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
5844; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 42>, [[VECTOR_PH]] ], [ [[VEC_IND4:%.*]], [[VECTOR_BODY]] ]
5845; CHECK-NEXT:    [[VEC_IND4]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ]
5846; CHECK-NEXT:    [[VEC_IND6:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VECTOR_BODY]] ]
5847; CHECK-NEXT:    [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND4]], <2 x i32> <i32 1, i32 2>
5848; CHECK-NEXT:    [[TMP1:%.*]] = mul <2 x i32> [[VEC_IND2]], [[VEC_IND4]]
5849; CHECK-NEXT:    [[TMP2:%.*]] = add <2 x i32> [[TMP1]], <i32 42, i32 42>
5850; CHECK-NEXT:    [[TMP3:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND4]]
5851; CHECK-NEXT:    [[TMP4:%.*]] = add <2 x i32> [[TMP3]], [[TMP2]]
5852; CHECK-NEXT:    [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64>
5853; CHECK-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP5]]
5854; CHECK-NEXT:    [[TMP7:%.*]] = shl <2 x i32> [[VEC_IND6]], <i32 1, i32 1>
5855; CHECK-NEXT:    [[TMP8:%.*]] = add <2 x i32> [[TMP2]], [[TMP7]]
5856; CHECK-NEXT:    [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64>
5857; CHECK-NEXT:    [[TMP10]] = add <2 x i64> [[TMP6]], [[TMP9]]
5858; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
5859; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
5860; CHECK-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], <i32 2, i32 2>
5861; CHECK-NEXT:    [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], <i32 2, i32 2>
5862; CHECK-NEXT:    [[VEC_IND_NEXT7]] = add <2 x i32> [[VEC_IND6]], <i32 2, i32 2>
5863; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
5864; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
5865; CHECK:       middle.block:
5866; CHECK-NEXT:    [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP10]])
5867; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 113, 112
5868; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND4]], i32 1
5869; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i32> [[VEC_IND4]], i32 0
5870; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
5871; CHECK:       scalar.ph:
5872; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 42, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
5873; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ]
5874; CHECK-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ]
5875; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
5876; CHECK-NEXT:    br label [[LOOP:%.*]]
5877; CHECK:       exit:
5878; CHECK-NEXT:    [[DOTLCSSA:%.*]] = phi i64 [ [[C23:%.*]], [[LOOP]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
5879; CHECK-NEXT:    ret i64 [[DOTLCSSA]]
5880; CHECK:       loop:
5881; CHECK-NEXT:    [[C5:%.*]] = phi i64 [ [[C23]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
5882; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
5883; CHECK-NEXT:    [[X:%.*]] = phi i32 [ [[C24:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
5884; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[C6:%.*]], [[LOOP]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
5885; CHECK-NEXT:    [[C6]] = trunc i64 [[INDVARS_IV]] to i32
5886; CHECK-NEXT:    [[C8:%.*]] = mul i32 [[X]], [[C6]]
5887; CHECK-NEXT:    [[C9:%.*]] = add i32 [[C8]], 42
5888; CHECK-NEXT:    [[C10:%.*]] = add i32 [[SCALAR_RECUR]], [[C6]]
5889; CHECK-NEXT:    [[C11:%.*]] = add i32 [[C10]], [[C9]]
5890; CHECK-NEXT:    [[C12:%.*]] = sext i32 [[C11]] to i64
5891; CHECK-NEXT:    [[C13:%.*]] = add i64 [[C5]], [[C12]]
5892; CHECK-NEXT:    [[INDVARS_IV_TR:%.*]] = trunc i64 [[INDVARS_IV]] to i32
5893; CHECK-NEXT:    [[C14:%.*]] = shl i32 [[INDVARS_IV_TR]], 1
5894; CHECK-NEXT:    [[C15:%.*]] = add i32 [[C9]], [[C14]]
5895; CHECK-NEXT:    [[C16:%.*]] = sext i32 [[C15]] to i64
5896; CHECK-NEXT:    [[C23]] = add i64 [[C13]], [[C16]]
5897; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
5898; CHECK-NEXT:    [[C24]] = add nuw nsw i32 [[X]], 1
5899; CHECK-NEXT:    [[EXITCOND_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 114
5900; CHECK-NEXT:    br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]]
5901;
5902; IND-LABEL: @trunc_with_first_order_recurrence(
5903; IND-NEXT:  entry:
5904; IND-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5905; IND:       vector.ph:
5906; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
5907; IND:       vector.body:
5908; IND-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5909; IND-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
5910; IND-NEXT:    [[VEC_IND2:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
5911; IND-NEXT:    [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 42>, [[VECTOR_PH]] ], [ [[VEC_IND4:%.*]], [[VECTOR_BODY]] ]
5912; IND-NEXT:    [[VEC_IND4]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT5:%.*]], [[VECTOR_BODY]] ]
5913; IND-NEXT:    [[VEC_IND6:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VECTOR_BODY]] ]
5914; IND-NEXT:    [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND4]], <2 x i32> <i32 1, i32 2>
5915; IND-NEXT:    [[TMP1:%.*]] = mul <2 x i32> [[VEC_IND2]], [[VEC_IND4]]
5916; IND-NEXT:    [[TMP2:%.*]] = add <2 x i32> [[TMP1]], <i32 42, i32 42>
5917; IND-NEXT:    [[TMP3:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND4]]
5918; IND-NEXT:    [[TMP4:%.*]] = add <2 x i32> [[TMP3]], [[TMP2]]
5919; IND-NEXT:    [[TMP5:%.*]] = sext <2 x i32> [[TMP4]] to <2 x i64>
5920; IND-NEXT:    [[TMP6:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP5]]
5921; IND-NEXT:    [[TMP7:%.*]] = shl <2 x i32> [[VEC_IND6]], <i32 1, i32 1>
5922; IND-NEXT:    [[TMP8:%.*]] = add <2 x i32> [[TMP2]], [[TMP7]]
5923; IND-NEXT:    [[TMP9:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64>
5924; IND-NEXT:    [[TMP10]] = add <2 x i64> [[TMP6]], [[TMP9]]
5925; IND-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
5926; IND-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], <i32 2, i32 2>
5927; IND-NEXT:    [[VEC_IND_NEXT5]] = add <2 x i32> [[VEC_IND4]], <i32 2, i32 2>
5928; IND-NEXT:    [[VEC_IND_NEXT7]] = add <2 x i32> [[VEC_IND6]], <i32 2, i32 2>
5929; IND-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
5930; IND-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
5931; IND:       middle.block:
5932; IND-NEXT:    [[TMP12:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[TMP10]])
5933; IND-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND4]], i64 1
5934; IND-NEXT:    br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
5935; IND:       scalar.ph:
5936; IND-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 42, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
5937; IND-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ 113, [[MIDDLE_BLOCK]] ]
5938; IND-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ 113, [[MIDDLE_BLOCK]] ]
5939; IND-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
5940; IND-NEXT:    br label [[LOOP:%.*]]
5941; IND:       exit:
5942; IND-NEXT:    [[DOTLCSSA:%.*]] = phi i64 [ [[C23:%.*]], [[LOOP]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
5943; IND-NEXT:    ret i64 [[DOTLCSSA]]
5944; IND:       loop:
5945; IND-NEXT:    [[C5:%.*]] = phi i64 [ [[C23]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
5946; IND-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
5947; IND-NEXT:    [[X:%.*]] = phi i32 [ [[C24:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
5948; IND-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[C6:%.*]], [[LOOP]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
5949; IND-NEXT:    [[C6]] = trunc i64 [[INDVARS_IV]] to i32
5950; IND-NEXT:    [[C8:%.*]] = mul i32 [[X]], [[C6]]
5951; IND-NEXT:    [[C9:%.*]] = add i32 [[C8]], 42
5952; IND-NEXT:    [[C10:%.*]] = add i32 [[SCALAR_RECUR]], [[C6]]
5953; IND-NEXT:    [[C11:%.*]] = add i32 [[C10]], [[C9]]
5954; IND-NEXT:    [[C12:%.*]] = sext i32 [[C11]] to i64
5955; IND-NEXT:    [[C13:%.*]] = add i64 [[C5]], [[C12]]
5956; IND-NEXT:    [[INDVARS_IV_TR:%.*]] = trunc i64 [[INDVARS_IV]] to i32
5957; IND-NEXT:    [[C14:%.*]] = shl i32 [[INDVARS_IV_TR]], 1
5958; IND-NEXT:    [[C15:%.*]] = add i32 [[C9]], [[C14]]
5959; IND-NEXT:    [[C16:%.*]] = sext i32 [[C15]] to i64
5960; IND-NEXT:    [[C23]] = add i64 [[C13]], [[C16]]
5961; IND-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
5962; IND-NEXT:    [[C24]] = add nuw nsw i32 [[X]], 1
5963; IND-NEXT:    [[EXITCOND_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 114
5964; IND-NEXT:    br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]]
5965;
5966; UNROLL-LABEL: @trunc_with_first_order_recurrence(
5967; UNROLL-NEXT:  entry:
5968; UNROLL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5969; UNROLL:       vector.ph:
5970; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
5971; UNROLL:       vector.body:
5972; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5973; UNROLL-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
5974; UNROLL-NEXT:    [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
5975; UNROLL-NEXT:    [[VEC_IND4:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VECTOR_BODY]] ]
5976; UNROLL-NEXT:    [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 42>, [[VECTOR_PH]] ], [ [[STEP_ADD9:%.*]], [[VECTOR_BODY]] ]
5977; UNROLL-NEXT:    [[VEC_IND8:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ]
5978; UNROLL-NEXT:    [[VEC_IND12:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT15:%.*]], [[VECTOR_BODY]] ]
5979; UNROLL-NEXT:    [[STEP_ADD5:%.*]] = add <2 x i32> [[VEC_IND4]], <i32 2, i32 2>
5980; UNROLL-NEXT:    [[STEP_ADD9]] = add <2 x i32> [[VEC_IND8]], <i32 2, i32 2>
5981; UNROLL-NEXT:    [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND8]], <2 x i32> <i32 1, i32 2>
5982; UNROLL-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_IND8]], <2 x i32> [[STEP_ADD9]], <2 x i32> <i32 1, i32 2>
5983; UNROLL-NEXT:    [[TMP2:%.*]] = mul <2 x i32> [[VEC_IND4]], [[VEC_IND8]]
5984; UNROLL-NEXT:    [[TMP3:%.*]] = mul <2 x i32> [[STEP_ADD5]], [[STEP_ADD9]]
5985; UNROLL-NEXT:    [[TMP4:%.*]] = add <2 x i32> [[TMP2]], <i32 42, i32 42>
5986; UNROLL-NEXT:    [[TMP5:%.*]] = add <2 x i32> [[TMP3]], <i32 42, i32 42>
5987; UNROLL-NEXT:    [[TMP6:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND8]]
5988; UNROLL-NEXT:    [[TMP7:%.*]] = add <2 x i32> [[TMP1]], [[STEP_ADD9]]
5989; UNROLL-NEXT:    [[TMP8:%.*]] = add <2 x i32> [[TMP6]], [[TMP4]]
5990; UNROLL-NEXT:    [[TMP9:%.*]] = add <2 x i32> [[TMP7]], [[TMP5]]
5991; UNROLL-NEXT:    [[TMP10:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64>
5992; UNROLL-NEXT:    [[TMP11:%.*]] = sext <2 x i32> [[TMP9]] to <2 x i64>
5993; UNROLL-NEXT:    [[TMP12:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP10]]
5994; UNROLL-NEXT:    [[TMP13:%.*]] = add <2 x i64> [[VEC_PHI2]], [[TMP11]]
5995; UNROLL-NEXT:    [[TMP14:%.*]] = shl <2 x i32> [[VEC_IND12]], <i32 1, i32 1>
5996; UNROLL-NEXT:    [[STEP_ADD13:%.*]] = shl <2 x i32> [[VEC_IND12]], <i32 1, i32 1>
5997; UNROLL-NEXT:    [[TMP15:%.*]] = add <2 x i32> [[STEP_ADD13]], <i32 4, i32 4>
5998; UNROLL-NEXT:    [[TMP16:%.*]] = add <2 x i32> [[TMP4]], [[TMP14]]
5999; UNROLL-NEXT:    [[TMP17:%.*]] = add <2 x i32> [[TMP5]], [[TMP15]]
6000; UNROLL-NEXT:    [[TMP18:%.*]] = sext <2 x i32> [[TMP16]] to <2 x i64>
6001; UNROLL-NEXT:    [[TMP19:%.*]] = sext <2 x i32> [[TMP17]] to <2 x i64>
6002; UNROLL-NEXT:    [[TMP20]] = add <2 x i64> [[TMP12]], [[TMP18]]
6003; UNROLL-NEXT:    [[TMP21]] = add <2 x i64> [[TMP13]], [[TMP19]]
6004; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
6005; UNROLL-NEXT:    [[VEC_IND_NEXT7]] = add <2 x i32> [[VEC_IND4]], <i32 4, i32 4>
6006; UNROLL-NEXT:    [[VEC_IND_NEXT11]] = add <2 x i32> [[VEC_IND8]], <i32 4, i32 4>
6007; UNROLL-NEXT:    [[VEC_IND_NEXT15]] = add <2 x i32> [[VEC_IND12]], <i32 4, i32 4>
6008; UNROLL-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
6009; UNROLL-NEXT:    br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
6010; UNROLL:       middle.block:
6011; UNROLL-NEXT:    [[BIN_RDX:%.*]] = add <2 x i64> [[TMP21]], [[TMP20]]
6012; UNROLL-NEXT:    [[TMP23:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]])
6013; UNROLL-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD9]], i64 1
6014; UNROLL-NEXT:    br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
6015; UNROLL:       scalar.ph:
6016; UNROLL-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 42, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
6017; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ 113, [[MIDDLE_BLOCK]] ]
6018; UNROLL-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ 113, [[MIDDLE_BLOCK]] ]
6019; UNROLL-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ]
6020; UNROLL-NEXT:    br label [[LOOP:%.*]]
6021; UNROLL:       exit:
6022; UNROLL-NEXT:    [[DOTLCSSA:%.*]] = phi i64 [ [[C23:%.*]], [[LOOP]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ]
6023; UNROLL-NEXT:    ret i64 [[DOTLCSSA]]
6024; UNROLL:       loop:
6025; UNROLL-NEXT:    [[C5:%.*]] = phi i64 [ [[C23]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
6026; UNROLL-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
6027; UNROLL-NEXT:    [[X:%.*]] = phi i32 [ [[C24:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
6028; UNROLL-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[C6:%.*]], [[LOOP]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
6029; UNROLL-NEXT:    [[C6]] = trunc i64 [[INDVARS_IV]] to i32
6030; UNROLL-NEXT:    [[C8:%.*]] = mul i32 [[X]], [[C6]]
6031; UNROLL-NEXT:    [[C9:%.*]] = add i32 [[C8]], 42
6032; UNROLL-NEXT:    [[C10:%.*]] = add i32 [[SCALAR_RECUR]], [[C6]]
6033; UNROLL-NEXT:    [[C11:%.*]] = add i32 [[C10]], [[C9]]
6034; UNROLL-NEXT:    [[C12:%.*]] = sext i32 [[C11]] to i64
6035; UNROLL-NEXT:    [[C13:%.*]] = add i64 [[C5]], [[C12]]
6036; UNROLL-NEXT:    [[INDVARS_IV_TR:%.*]] = trunc i64 [[INDVARS_IV]] to i32
6037; UNROLL-NEXT:    [[C14:%.*]] = shl i32 [[INDVARS_IV_TR]], 1
6038; UNROLL-NEXT:    [[C15:%.*]] = add i32 [[C9]], [[C14]]
6039; UNROLL-NEXT:    [[C16:%.*]] = sext i32 [[C15]] to i64
6040; UNROLL-NEXT:    [[C23]] = add i64 [[C13]], [[C16]]
6041; UNROLL-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
6042; UNROLL-NEXT:    [[C24]] = add nuw nsw i32 [[X]], 1
6043; UNROLL-NEXT:    [[EXITCOND_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 114
6044; UNROLL-NEXT:    br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]]
6045;
6046; UNROLL-NO-IC-LABEL: @trunc_with_first_order_recurrence(
6047; UNROLL-NO-IC-NEXT:  entry:
6048; UNROLL-NO-IC-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
6049; UNROLL-NO-IC:       vector.ph:
6050; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
6051; UNROLL-NO-IC:       vector.body:
6052; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6053; UNROLL-NO-IC-NEXT:    [[VEC_PHI:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
6054; UNROLL-NO-IC-NEXT:    [[VEC_PHI2:%.*]] = phi <2 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
6055; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 1, i64 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
6056; UNROLL-NO-IC-NEXT:    [[VEC_IND4:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VECTOR_BODY]] ]
6057; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 42>, [[VECTOR_PH]] ], [ [[STEP_ADD9:%.*]], [[VECTOR_BODY]] ]
6058; UNROLL-NO-IC-NEXT:    [[VEC_IND8:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ]
6059; UNROLL-NO-IC-NEXT:    [[VEC_IND12:%.*]] = phi <2 x i32> [ <i32 1, i32 2>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT15:%.*]], [[VECTOR_BODY]] ]
6060; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
6061; UNROLL-NO-IC-NEXT:    [[STEP_ADD5:%.*]] = add <2 x i32> [[VEC_IND4]], <i32 2, i32 2>
6062; UNROLL-NO-IC-NEXT:    [[STEP_ADD9]] = add <2 x i32> [[VEC_IND8]], <i32 2, i32 2>
6063; UNROLL-NO-IC-NEXT:    [[STEP_ADD13:%.*]] = add <2 x i32> [[VEC_IND12]], <i32 2, i32 2>
6064; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND8]], <2 x i32> <i32 1, i32 2>
6065; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_IND8]], <2 x i32> [[STEP_ADD9]], <2 x i32> <i32 1, i32 2>
6066; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = mul <2 x i32> [[VEC_IND4]], [[VEC_IND8]]
6067; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = mul <2 x i32> [[STEP_ADD5]], [[STEP_ADD9]]
6068; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add <2 x i32> [[TMP2]], <i32 42, i32 42>
6069; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = add <2 x i32> [[TMP3]], <i32 42, i32 42>
6070; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = add <2 x i32> [[TMP0]], [[VEC_IND8]]
6071; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = add <2 x i32> [[TMP1]], [[STEP_ADD9]]
6072; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = add <2 x i32> [[TMP6]], [[TMP4]]
6073; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = add <2 x i32> [[TMP7]], [[TMP5]]
6074; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = sext <2 x i32> [[TMP8]] to <2 x i64>
6075; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = sext <2 x i32> [[TMP9]] to <2 x i64>
6076; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = add <2 x i64> [[VEC_PHI]], [[TMP10]]
6077; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = add <2 x i64> [[VEC_PHI2]], [[TMP11]]
6078; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = shl <2 x i32> [[VEC_IND12]], <i32 1, i32 1>
6079; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = shl <2 x i32> [[STEP_ADD13]], <i32 1, i32 1>
6080; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = add <2 x i32> [[TMP4]], [[TMP14]]
6081; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = add <2 x i32> [[TMP5]], [[TMP15]]
6082; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = sext <2 x i32> [[TMP16]] to <2 x i64>
6083; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = sext <2 x i32> [[TMP17]] to <2 x i64>
6084; UNROLL-NO-IC-NEXT:    [[TMP20]] = add <2 x i64> [[TMP12]], [[TMP18]]
6085; UNROLL-NO-IC-NEXT:    [[TMP21]] = add <2 x i64> [[TMP13]], [[TMP19]]
6086; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
6087; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 2, i64 2>
6088; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT7]] = add <2 x i32> [[STEP_ADD5]], <i32 2, i32 2>
6089; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT11]] = add <2 x i32> [[STEP_ADD9]], <i32 2, i32 2>
6090; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT15]] = add <2 x i32> [[STEP_ADD13]], <i32 2, i32 2>
6091; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
6092; UNROLL-NO-IC-NEXT:    br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
6093; UNROLL-NO-IC:       middle.block:
6094; UNROLL-NO-IC-NEXT:    [[BIN_RDX:%.*]] = add <2 x i64> [[TMP21]], [[TMP20]]
6095; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = call i64 @llvm.vector.reduce.add.v2i64(<2 x i64> [[BIN_RDX]])
6096; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 113, 112
6097; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD9]], i32 1
6098; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i32> [[STEP_ADD9]], i32 0
6099; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
6100; UNROLL-NO-IC:       scalar.ph:
6101; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 42, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
6102; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ]
6103; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ 113, [[MIDDLE_BLOCK]] ], [ 1, [[ENTRY]] ]
6104; UNROLL-NO-IC-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ]
6105; UNROLL-NO-IC-NEXT:    br label [[LOOP:%.*]]
6106; UNROLL-NO-IC:       exit:
6107; UNROLL-NO-IC-NEXT:    [[DOTLCSSA:%.*]] = phi i64 [ [[C23:%.*]], [[LOOP]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ]
6108; UNROLL-NO-IC-NEXT:    ret i64 [[DOTLCSSA]]
6109; UNROLL-NO-IC:       loop:
6110; UNROLL-NO-IC-NEXT:    [[C5:%.*]] = phi i64 [ [[C23]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
6111; UNROLL-NO-IC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
6112; UNROLL-NO-IC-NEXT:    [[X:%.*]] = phi i32 [ [[C24:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
6113; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[C6:%.*]], [[LOOP]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
6114; UNROLL-NO-IC-NEXT:    [[C6]] = trunc i64 [[INDVARS_IV]] to i32
6115; UNROLL-NO-IC-NEXT:    [[C8:%.*]] = mul i32 [[X]], [[C6]]
6116; UNROLL-NO-IC-NEXT:    [[C9:%.*]] = add i32 [[C8]], 42
6117; UNROLL-NO-IC-NEXT:    [[C10:%.*]] = add i32 [[SCALAR_RECUR]], [[C6]]
6118; UNROLL-NO-IC-NEXT:    [[C11:%.*]] = add i32 [[C10]], [[C9]]
6119; UNROLL-NO-IC-NEXT:    [[C12:%.*]] = sext i32 [[C11]] to i64
6120; UNROLL-NO-IC-NEXT:    [[C13:%.*]] = add i64 [[C5]], [[C12]]
6121; UNROLL-NO-IC-NEXT:    [[INDVARS_IV_TR:%.*]] = trunc i64 [[INDVARS_IV]] to i32
6122; UNROLL-NO-IC-NEXT:    [[C14:%.*]] = shl i32 [[INDVARS_IV_TR]], 1
6123; UNROLL-NO-IC-NEXT:    [[C15:%.*]] = add i32 [[C9]], [[C14]]
6124; UNROLL-NO-IC-NEXT:    [[C16:%.*]] = sext i32 [[C15]] to i64
6125; UNROLL-NO-IC-NEXT:    [[C23]] = add i64 [[C13]], [[C16]]
6126; UNROLL-NO-IC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
6127; UNROLL-NO-IC-NEXT:    [[C24]] = add nuw nsw i32 [[X]], 1
6128; UNROLL-NO-IC-NEXT:    [[EXITCOND_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 114
6129; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]]
6130;
6131; INTERLEAVE-LABEL: @trunc_with_first_order_recurrence(
6132; INTERLEAVE-NEXT:  entry:
6133; INTERLEAVE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
6134; INTERLEAVE:       vector.ph:
6135; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
6136; INTERLEAVE:       vector.body:
6137; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6138; INTERLEAVE-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
6139; INTERLEAVE-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i64> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
6140; INTERLEAVE-NEXT:    [[VEC_IND4:%.*]] = phi <4 x i32> [ <i32 1, i32 2, i32 3, i32 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT7:%.*]], [[VECTOR_BODY]] ]
6141; INTERLEAVE-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 42>, [[VECTOR_PH]] ], [ [[STEP_ADD9:%.*]], [[VECTOR_BODY]] ]
6142; INTERLEAVE-NEXT:    [[VEC_IND8:%.*]] = phi <4 x i32> [ <i32 1, i32 2, i32 3, i32 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT11:%.*]], [[VECTOR_BODY]] ]
6143; INTERLEAVE-NEXT:    [[VEC_IND12:%.*]] = phi <4 x i32> [ <i32 1, i32 2, i32 3, i32 4>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT15:%.*]], [[VECTOR_BODY]] ]
6144; INTERLEAVE-NEXT:    [[STEP_ADD5:%.*]] = add <4 x i32> [[VEC_IND4]], <i32 4, i32 4, i32 4, i32 4>
6145; INTERLEAVE-NEXT:    [[STEP_ADD9]] = add <4 x i32> [[VEC_IND8]], <i32 4, i32 4, i32 4, i32 4>
6146; INTERLEAVE-NEXT:    [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND8]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
6147; INTERLEAVE-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[VEC_IND8]], <4 x i32> [[STEP_ADD9]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
6148; INTERLEAVE-NEXT:    [[TMP2:%.*]] = mul <4 x i32> [[VEC_IND4]], [[VEC_IND8]]
6149; INTERLEAVE-NEXT:    [[TMP3:%.*]] = mul <4 x i32> [[STEP_ADD5]], [[STEP_ADD9]]
6150; INTERLEAVE-NEXT:    [[TMP4:%.*]] = add <4 x i32> [[TMP2]], <i32 42, i32 42, i32 42, i32 42>
6151; INTERLEAVE-NEXT:    [[TMP5:%.*]] = add <4 x i32> [[TMP3]], <i32 42, i32 42, i32 42, i32 42>
6152; INTERLEAVE-NEXT:    [[TMP6:%.*]] = add <4 x i32> [[TMP0]], [[VEC_IND8]]
6153; INTERLEAVE-NEXT:    [[TMP7:%.*]] = add <4 x i32> [[TMP1]], [[STEP_ADD9]]
6154; INTERLEAVE-NEXT:    [[TMP8:%.*]] = add <4 x i32> [[TMP6]], [[TMP4]]
6155; INTERLEAVE-NEXT:    [[TMP9:%.*]] = add <4 x i32> [[TMP7]], [[TMP5]]
6156; INTERLEAVE-NEXT:    [[TMP10:%.*]] = sext <4 x i32> [[TMP8]] to <4 x i64>
6157; INTERLEAVE-NEXT:    [[TMP11:%.*]] = sext <4 x i32> [[TMP9]] to <4 x i64>
6158; INTERLEAVE-NEXT:    [[TMP12:%.*]] = add <4 x i64> [[VEC_PHI]], [[TMP10]]
6159; INTERLEAVE-NEXT:    [[TMP13:%.*]] = add <4 x i64> [[VEC_PHI2]], [[TMP11]]
6160; INTERLEAVE-NEXT:    [[TMP14:%.*]] = shl <4 x i32> [[VEC_IND12]], <i32 1, i32 1, i32 1, i32 1>
6161; INTERLEAVE-NEXT:    [[STEP_ADD13:%.*]] = shl <4 x i32> [[VEC_IND12]], <i32 1, i32 1, i32 1, i32 1>
6162; INTERLEAVE-NEXT:    [[TMP15:%.*]] = add <4 x i32> [[STEP_ADD13]], <i32 8, i32 8, i32 8, i32 8>
6163; INTERLEAVE-NEXT:    [[TMP16:%.*]] = add <4 x i32> [[TMP4]], [[TMP14]]
6164; INTERLEAVE-NEXT:    [[TMP17:%.*]] = add <4 x i32> [[TMP5]], [[TMP15]]
6165; INTERLEAVE-NEXT:    [[TMP18:%.*]] = sext <4 x i32> [[TMP16]] to <4 x i64>
6166; INTERLEAVE-NEXT:    [[TMP19:%.*]] = sext <4 x i32> [[TMP17]] to <4 x i64>
6167; INTERLEAVE-NEXT:    [[TMP20]] = add <4 x i64> [[TMP12]], [[TMP18]]
6168; INTERLEAVE-NEXT:    [[TMP21]] = add <4 x i64> [[TMP13]], [[TMP19]]
6169; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
6170; INTERLEAVE-NEXT:    [[VEC_IND_NEXT7]] = add <4 x i32> [[VEC_IND4]], <i32 8, i32 8, i32 8, i32 8>
6171; INTERLEAVE-NEXT:    [[VEC_IND_NEXT11]] = add <4 x i32> [[VEC_IND8]], <i32 8, i32 8, i32 8, i32 8>
6172; INTERLEAVE-NEXT:    [[VEC_IND_NEXT15]] = add <4 x i32> [[VEC_IND12]], <i32 8, i32 8, i32 8, i32 8>
6173; INTERLEAVE-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], 112
6174; INTERLEAVE-NEXT:    br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
6175; INTERLEAVE:       middle.block:
6176; INTERLEAVE-NEXT:    [[BIN_RDX:%.*]] = add <4 x i64> [[TMP21]], [[TMP20]]
6177; INTERLEAVE-NEXT:    [[TMP23:%.*]] = call i64 @llvm.vector.reduce.add.v4i64(<4 x i64> [[BIN_RDX]])
6178; INTERLEAVE-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD9]], i64 3
6179; INTERLEAVE-NEXT:    br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
6180; INTERLEAVE:       scalar.ph:
6181; INTERLEAVE-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 42, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
6182; INTERLEAVE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1, [[ENTRY]] ], [ 113, [[MIDDLE_BLOCK]] ]
6183; INTERLEAVE-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ 1, [[ENTRY]] ], [ 113, [[MIDDLE_BLOCK]] ]
6184; INTERLEAVE-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ]
6185; INTERLEAVE-NEXT:    br label [[LOOP:%.*]]
6186; INTERLEAVE:       exit:
6187; INTERLEAVE-NEXT:    [[DOTLCSSA:%.*]] = phi i64 [ [[C23:%.*]], [[LOOP]] ], [ [[TMP23]], [[MIDDLE_BLOCK]] ]
6188; INTERLEAVE-NEXT:    ret i64 [[DOTLCSSA]]
6189; INTERLEAVE:       loop:
6190; INTERLEAVE-NEXT:    [[C5:%.*]] = phi i64 [ [[C23]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
6191; INTERLEAVE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
6192; INTERLEAVE-NEXT:    [[X:%.*]] = phi i32 [ [[C24:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
6193; INTERLEAVE-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[C6:%.*]], [[LOOP]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
6194; INTERLEAVE-NEXT:    [[C6]] = trunc i64 [[INDVARS_IV]] to i32
6195; INTERLEAVE-NEXT:    [[C8:%.*]] = mul i32 [[X]], [[C6]]
6196; INTERLEAVE-NEXT:    [[C9:%.*]] = add i32 [[C8]], 42
6197; INTERLEAVE-NEXT:    [[C10:%.*]] = add i32 [[SCALAR_RECUR]], [[C6]]
6198; INTERLEAVE-NEXT:    [[C11:%.*]] = add i32 [[C10]], [[C9]]
6199; INTERLEAVE-NEXT:    [[C12:%.*]] = sext i32 [[C11]] to i64
6200; INTERLEAVE-NEXT:    [[C13:%.*]] = add i64 [[C5]], [[C12]]
6201; INTERLEAVE-NEXT:    [[INDVARS_IV_TR:%.*]] = trunc i64 [[INDVARS_IV]] to i32
6202; INTERLEAVE-NEXT:    [[C14:%.*]] = shl i32 [[INDVARS_IV_TR]], 1
6203; INTERLEAVE-NEXT:    [[C15:%.*]] = add i32 [[C9]], [[C14]]
6204; INTERLEAVE-NEXT:    [[C16:%.*]] = sext i32 [[C15]] to i64
6205; INTERLEAVE-NEXT:    [[C23]] = add i64 [[C13]], [[C16]]
6206; INTERLEAVE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
6207; INTERLEAVE-NEXT:    [[C24]] = add nuw nsw i32 [[X]], 1
6208; INTERLEAVE-NEXT:    [[EXITCOND_I:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], 114
6209; INTERLEAVE-NEXT:    br i1 [[EXITCOND_I]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP51:![0-9]+]]
6210;
6211entry:
6212  br label %loop
6213
6214exit:                                        ; preds = %loop
6215  %.lcssa = phi i64 [ %c23, %loop ]
6216  ret i64 %.lcssa
6217
6218loop:                                         ; preds = %loop, %entry
6219  %c5 = phi i64 [ %c23, %loop ], [ 0, %entry ]
6220  %indvars.iv = phi i64 [ %indvars.iv.next, %loop ], [ 1, %entry ]
6221  %x = phi i32 [ %c24, %loop ], [ 1, %entry ]
6222  %y = phi i32 [ %c6, %loop ], [ 42, %entry ]
6223  %c6 = trunc i64 %indvars.iv to i32
6224  %c8 = mul i32 %x, %c6
6225  %c9 = add i32 %c8, 42
6226  %c10 = add i32 %y, %c6
6227  %c11 = add i32 %c10, %c9
6228  %c12 = sext i32 %c11 to i64
6229  %c13 = add i64 %c5, %c12
6230  %indvars.iv.tr = trunc i64 %indvars.iv to i32
6231  %c14 = shl i32 %indvars.iv.tr, 1
6232  %c15 = add i32 %c9, %c14
6233  %c16 = sext i32 %c15 to i64
6234  %c23 = add i64 %c13, %c16
6235  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
6236  %c24 = add nuw nsw i32 %x, 1
6237  %exitcond.i = icmp eq i64 %indvars.iv.next, 114
6238  br i1 %exitcond.i, label %exit, label %loop
6239
6240}
6241
6242; Test case for PR52460.
6243define void @pr52460_first_order_recurrence_truncated_iv(i32* noalias %src, i32* %dst) {
6244;
6245; CHECK-LABEL: @pr52460_first_order_recurrence_truncated_iv(
6246; CHECK-NEXT:  entry:
6247; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
6248; CHECK:       vector.ph:
6249; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
6250; CHECK:       vector.body:
6251; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6252; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
6253; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[VEC_IND2:%.*]], [[VECTOR_BODY]] ]
6254; CHECK-NEXT:    [[VEC_IND2]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
6255; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32
6256; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0
6257; CHECK-NEXT:    [[TMP1:%.*]] = trunc i64 [[INDEX]] to i32
6258; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 0
6259; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[TMP1]], 1
6260; CHECK-NEXT:    [[TMP4:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND2]], <2 x i32> <i32 1, i32 2>
6261; CHECK-NEXT:    [[TMP5:%.*]] = load i32, i32* [[SRC:%.*]], align 4
6262; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP5]], i32 0
6263; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
6264; CHECK-NEXT:    [[TMP6:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT]], [[TMP4]]
6265; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr i32, i32* [[DST:%.*]], i32 [[TMP2]]
6266; CHECK-NEXT:    [[TMP8:%.*]] = add <2 x i32> [[VEC_IND2]], [[TMP6]]
6267; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr i32, i32* [[TMP7]], i32 0
6268; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <2 x i32>*
6269; CHECK-NEXT:    store <2 x i32> [[TMP8]], <2 x i32>* [[TMP10]], align 4
6270; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
6271; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
6272; CHECK-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], <i32 2, i32 2>
6273; CHECK-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
6274; CHECK-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
6275; CHECK:       middle.block:
6276; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 100, 100
6277; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND2]], i32 1
6278; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i32> [[VEC_IND2]], i32 0
6279; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
6280; CHECK:       scalar.ph:
6281; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
6282; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
6283; CHECK-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
6284; CHECK-NEXT:    br label [[LOOP:%.*]]
6285; CHECK:       loop:
6286; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
6287; CHECK-NEXT:    [[TRUNC_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ]
6288; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_TRUNC:%.*]], [[LOOP]] ]
6289; CHECK-NEXT:    [[LV:%.*]] = load i32, i32* [[SRC]], align 4
6290; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[LV]], [[SCALAR_RECUR]]
6291; CHECK-NEXT:    [[TRUNC_IV_NEXT]] = add i32 [[TRUNC_IV]], 1
6292; CHECK-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
6293; CHECK-NEXT:    [[IV_TRUNC]] = trunc i64 [[IV]] to i32
6294; CHECK-NEXT:    [[DST_GEP:%.*]] = getelementptr i32, i32* [[DST]], i32 [[IV_TRUNC]]
6295; CHECK-NEXT:    [[ADD:%.*]] = add i32 [[IV_TRUNC]], [[MUL]]
6296; CHECK-NEXT:    store i32 [[ADD]], i32* [[DST_GEP]], align 4
6297; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[TRUNC_IV_NEXT]], 100
6298; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]]
6299; CHECK:       exit:
6300; CHECK-NEXT:    ret void
6301;
6302; IND-LABEL: @pr52460_first_order_recurrence_truncated_iv(
6303; IND-NEXT:  entry:
6304; IND-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
6305; IND:       vector.ph:
6306; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
6307; IND:       vector.body:
6308; IND-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6309; IND-NEXT:    [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[VEC_IND2:%.*]], [[VECTOR_BODY]] ]
6310; IND-NEXT:    [[VEC_IND2]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ]
6311; IND-NEXT:    [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND2]], <2 x i32> <i32 1, i32 2>
6312; IND-NEXT:    [[TMP1:%.*]] = load i32, i32* [[SRC:%.*]], align 4
6313; IND-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP1]], i64 0
6314; IND-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
6315; IND-NEXT:    [[TMP2:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT]], [[TMP0]]
6316; IND-NEXT:    [[SEXT:%.*]] = shl i64 [[INDEX]], 32
6317; IND-NEXT:    [[TMP3:%.*]] = ashr exact i64 [[SEXT]], 32
6318; IND-NEXT:    [[TMP4:%.*]] = getelementptr i32, i32* [[DST:%.*]], i64 [[TMP3]]
6319; IND-NEXT:    [[TMP5:%.*]] = add <2 x i32> [[VEC_IND2]], [[TMP2]]
6320; IND-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP4]] to <2 x i32>*
6321; IND-NEXT:    store <2 x i32> [[TMP5]], <2 x i32>* [[TMP6]], align 4
6322; IND-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
6323; IND-NEXT:    [[VEC_IND_NEXT3]] = add <2 x i32> [[VEC_IND2]], <i32 2, i32 2>
6324; IND-NEXT:    [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
6325; IND-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
6326; IND:       middle.block:
6327; IND-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
6328; IND:       scalar.ph:
6329; IND-NEXT:    br label [[LOOP:%.*]]
6330; IND:       loop:
6331; IND-NEXT:    br i1 undef, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]]
6332; IND:       exit:
6333; IND-NEXT:    ret void
6334;
6335; UNROLL-LABEL: @pr52460_first_order_recurrence_truncated_iv(
6336; UNROLL-NEXT:  entry:
6337; UNROLL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
6338; UNROLL:       vector.ph:
6339; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
6340; UNROLL:       vector.body:
6341; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6342; UNROLL-NEXT:    [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[STEP_ADD4:%.*]], [[VECTOR_BODY]] ]
6343; UNROLL-NEXT:    [[VEC_IND3:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ]
6344; UNROLL-NEXT:    [[STEP_ADD4]] = add <2 x i32> [[VEC_IND3]], <i32 2, i32 2>
6345; UNROLL-NEXT:    [[TMP0:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND3]], <2 x i32> <i32 1, i32 2>
6346; UNROLL-NEXT:    [[TMP1:%.*]] = shufflevector <2 x i32> [[VEC_IND3]], <2 x i32> [[STEP_ADD4]], <2 x i32> <i32 1, i32 2>
6347; UNROLL-NEXT:    [[TMP2:%.*]] = load i32, i32* [[SRC:%.*]], align 4
6348; UNROLL-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i64 0
6349; UNROLL-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
6350; UNROLL-NEXT:    [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP2]], i64 0
6351; UNROLL-NEXT:    [[BROADCAST_SPLAT8:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT7]], <2 x i32> poison, <2 x i32> zeroinitializer
6352; UNROLL-NEXT:    [[TMP3:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT]], [[TMP0]]
6353; UNROLL-NEXT:    [[TMP4:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT8]], [[TMP1]]
6354; UNROLL-NEXT:    [[SEXT:%.*]] = shl i64 [[INDEX]], 32
6355; UNROLL-NEXT:    [[TMP5:%.*]] = ashr exact i64 [[SEXT]], 32
6356; UNROLL-NEXT:    [[TMP6:%.*]] = getelementptr i32, i32* [[DST:%.*]], i64 [[TMP5]]
6357; UNROLL-NEXT:    [[TMP7:%.*]] = add <2 x i32> [[VEC_IND3]], [[TMP3]]
6358; UNROLL-NEXT:    [[TMP8:%.*]] = add <2 x i32> [[STEP_ADD4]], [[TMP4]]
6359; UNROLL-NEXT:    [[TMP9:%.*]] = bitcast i32* [[TMP6]] to <2 x i32>*
6360; UNROLL-NEXT:    store <2 x i32> [[TMP7]], <2 x i32>* [[TMP9]], align 4
6361; UNROLL-NEXT:    [[TMP10:%.*]] = getelementptr i32, i32* [[TMP6]], i64 2
6362; UNROLL-NEXT:    [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <2 x i32>*
6363; UNROLL-NEXT:    store <2 x i32> [[TMP8]], <2 x i32>* [[TMP11]], align 4
6364; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
6365; UNROLL-NEXT:    [[VEC_IND_NEXT6]] = add <2 x i32> [[VEC_IND3]], <i32 4, i32 4>
6366; UNROLL-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
6367; UNROLL-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
6368; UNROLL:       middle.block:
6369; UNROLL-NEXT:    br i1 true, label [[EXIT:%.*]], label [[SCALAR_PH]]
6370; UNROLL:       scalar.ph:
6371; UNROLL-NEXT:    br label [[LOOP:%.*]]
6372; UNROLL:       loop:
6373; UNROLL-NEXT:    br i1 undef, label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]]
6374; UNROLL:       exit:
6375; UNROLL-NEXT:    ret void
6376;
6377; UNROLL-NO-IC-LABEL: @pr52460_first_order_recurrence_truncated_iv(
6378; UNROLL-NO-IC-NEXT:  entry:
6379; UNROLL-NO-IC-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
6380; UNROLL-NO-IC:       vector.ph:
6381; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
6382; UNROLL-NO-IC:       vector.body:
6383; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6384; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <2 x i64> [ <i64 0, i64 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
6385; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[STEP_ADD4:%.*]], [[VECTOR_BODY]] ]
6386; UNROLL-NO-IC-NEXT:    [[VEC_IND3:%.*]] = phi <2 x i32> [ <i32 0, i32 1>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ]
6387; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <2 x i64> [[VEC_IND]], <i64 2, i64 2>
6388; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = trunc i64 [[INDEX]] to i32
6389; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i32 [[OFFSET_IDX]], 0
6390; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], 2
6391; UNROLL-NO-IC-NEXT:    [[STEP_ADD4]] = add <2 x i32> [[VEC_IND3]], <i32 2, i32 2>
6392; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = trunc i64 [[INDEX]] to i32
6393; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add i32 [[TMP2]], 0
6394; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add i32 [[TMP2]], 1
6395; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = add i32 [[TMP2]], 2
6396; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = add i32 [[TMP2]], 3
6397; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND3]], <2 x i32> <i32 1, i32 2>
6398; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = shufflevector <2 x i32> [[VEC_IND3]], <2 x i32> [[STEP_ADD4]], <2 x i32> <i32 1, i32 2>
6399; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = load i32, i32* [[SRC:%.*]], align 4
6400; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[TMP9]], i32 0
6401; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
6402; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = load i32, i32* [[SRC]], align 4
6403; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <2 x i32> poison, i32 [[TMP10]], i32 0
6404; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT8:%.*]] = shufflevector <2 x i32> [[BROADCAST_SPLATINSERT7]], <2 x i32> poison, <2 x i32> zeroinitializer
6405; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT]], [[TMP7]]
6406; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = mul nsw <2 x i32> [[BROADCAST_SPLAT8]], [[TMP8]]
6407; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = getelementptr i32, i32* [[DST:%.*]], i32 [[TMP3]]
6408; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = getelementptr i32, i32* [[DST]], i32 [[TMP5]]
6409; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = add <2 x i32> [[VEC_IND3]], [[TMP11]]
6410; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = add <2 x i32> [[STEP_ADD4]], [[TMP12]]
6411; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = getelementptr i32, i32* [[TMP13]], i32 0
6412; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <2 x i32>*
6413; UNROLL-NO-IC-NEXT:    store <2 x i32> [[TMP15]], <2 x i32>* [[TMP18]], align 4
6414; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = getelementptr i32, i32* [[TMP13]], i32 2
6415; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = bitcast i32* [[TMP19]] to <2 x i32>*
6416; UNROLL-NO-IC-NEXT:    store <2 x i32> [[TMP16]], <2 x i32>* [[TMP20]], align 4
6417; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
6418; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i64> [[STEP_ADD]], <i64 2, i64 2>
6419; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT6]] = add <2 x i32> [[STEP_ADD4]], <i32 2, i32 2>
6420; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], 100
6421; UNROLL-NO-IC-NEXT:    br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
6422; UNROLL-NO-IC:       middle.block:
6423; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 100, 100
6424; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD4]], i32 1
6425; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i32> [[STEP_ADD4]], i32 0
6426; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
6427; UNROLL-NO-IC:       scalar.ph:
6428; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
6429; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
6430; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ 100, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
6431; UNROLL-NO-IC-NEXT:    br label [[LOOP:%.*]]
6432; UNROLL-NO-IC:       loop:
6433; UNROLL-NO-IC-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
6434; UNROLL-NO-IC-NEXT:    [[TRUNC_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ]
6435; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_TRUNC:%.*]], [[LOOP]] ]
6436; UNROLL-NO-IC-NEXT:    [[LV:%.*]] = load i32, i32* [[SRC]], align 4
6437; UNROLL-NO-IC-NEXT:    [[MUL:%.*]] = mul nsw i32 [[LV]], [[SCALAR_RECUR]]
6438; UNROLL-NO-IC-NEXT:    [[TRUNC_IV_NEXT]] = add i32 [[TRUNC_IV]], 1
6439; UNROLL-NO-IC-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
6440; UNROLL-NO-IC-NEXT:    [[IV_TRUNC]] = trunc i64 [[IV]] to i32
6441; UNROLL-NO-IC-NEXT:    [[DST_GEP:%.*]] = getelementptr i32, i32* [[DST]], i32 [[IV_TRUNC]]
6442; UNROLL-NO-IC-NEXT:    [[ADD:%.*]] = add i32 [[IV_TRUNC]], [[MUL]]
6443; UNROLL-NO-IC-NEXT:    store i32 [[ADD]], i32* [[DST_GEP]], align 4
6444; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[TRUNC_IV_NEXT]], 100
6445; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]]
6446; UNROLL-NO-IC:       exit:
6447; UNROLL-NO-IC-NEXT:    ret void
6448;
6449; INTERLEAVE-LABEL: @pr52460_first_order_recurrence_truncated_iv(
6450; INTERLEAVE-NEXT:  entry:
6451; INTERLEAVE-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
6452; INTERLEAVE:       vector.ph:
6453; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
6454; INTERLEAVE:       vector.body:
6455; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6456; INTERLEAVE-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[STEP_ADD4:%.*]], [[VECTOR_BODY]] ]
6457; INTERLEAVE-NEXT:    [[VEC_IND3:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[VECTOR_BODY]] ]
6458; INTERLEAVE-NEXT:    [[STEP_ADD4]] = add <4 x i32> [[VEC_IND3]], <i32 4, i32 4, i32 4, i32 4>
6459; INTERLEAVE-NEXT:    [[TMP0:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
6460; INTERLEAVE-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[VEC_IND3]], <4 x i32> [[STEP_ADD4]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
6461; INTERLEAVE-NEXT:    [[TMP2:%.*]] = load i32, i32* [[SRC:%.*]], align 4
6462; INTERLEAVE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0
6463; INTERLEAVE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
6464; INTERLEAVE-NEXT:    [[BROADCAST_SPLATINSERT7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP2]], i64 0
6465; INTERLEAVE-NEXT:    [[BROADCAST_SPLAT8:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT7]], <4 x i32> poison, <4 x i32> zeroinitializer
6466; INTERLEAVE-NEXT:    [[TMP3:%.*]] = mul nsw <4 x i32> [[BROADCAST_SPLAT]], [[TMP0]]
6467; INTERLEAVE-NEXT:    [[TMP4:%.*]] = mul nsw <4 x i32> [[BROADCAST_SPLAT8]], [[TMP1]]
6468; INTERLEAVE-NEXT:    [[SEXT:%.*]] = shl i64 [[INDEX]], 32
6469; INTERLEAVE-NEXT:    [[TMP5:%.*]] = ashr exact i64 [[SEXT]], 32
6470; INTERLEAVE-NEXT:    [[TMP6:%.*]] = getelementptr i32, i32* [[DST:%.*]], i64 [[TMP5]]
6471; INTERLEAVE-NEXT:    [[TMP7:%.*]] = add <4 x i32> [[VEC_IND3]], [[TMP3]]
6472; INTERLEAVE-NEXT:    [[TMP8:%.*]] = add <4 x i32> [[STEP_ADD4]], [[TMP4]]
6473; INTERLEAVE-NEXT:    [[TMP9:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
6474; INTERLEAVE-NEXT:    store <4 x i32> [[TMP7]], <4 x i32>* [[TMP9]], align 4
6475; INTERLEAVE-NEXT:    [[TMP10:%.*]] = getelementptr i32, i32* [[TMP6]], i64 4
6476; INTERLEAVE-NEXT:    [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>*
6477; INTERLEAVE-NEXT:    store <4 x i32> [[TMP8]], <4 x i32>* [[TMP11]], align 4
6478; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
6479; INTERLEAVE-NEXT:    [[VEC_IND_NEXT6]] = add <4 x i32> [[VEC_IND3]], <i32 8, i32 8, i32 8, i32 8>
6480; INTERLEAVE-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], 96
6481; INTERLEAVE-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP52:![0-9]+]]
6482; INTERLEAVE:       middle.block:
6483; INTERLEAVE-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD4]], i64 3
6484; INTERLEAVE-NEXT:    br i1 false, label [[EXIT:%.*]], label [[SCALAR_PH]]
6485; INTERLEAVE:       scalar.ph:
6486; INTERLEAVE-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
6487; INTERLEAVE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ 96, [[MIDDLE_BLOCK]] ]
6488; INTERLEAVE-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ 96, [[MIDDLE_BLOCK]] ]
6489; INTERLEAVE-NEXT:    br label [[LOOP:%.*]]
6490; INTERLEAVE:       loop:
6491; INTERLEAVE-NEXT:    [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
6492; INTERLEAVE-NEXT:    [[TRUNC_IV:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[TRUNC_IV_NEXT:%.*]], [[LOOP]] ]
6493; INTERLEAVE-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_TRUNC:%.*]], [[LOOP]] ]
6494; INTERLEAVE-NEXT:    [[LV:%.*]] = load i32, i32* [[SRC]], align 4
6495; INTERLEAVE-NEXT:    [[MUL:%.*]] = mul nsw i32 [[LV]], [[SCALAR_RECUR]]
6496; INTERLEAVE-NEXT:    [[TRUNC_IV_NEXT]] = add i32 [[TRUNC_IV]], 1
6497; INTERLEAVE-NEXT:    [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1
6498; INTERLEAVE-NEXT:    [[IV_TRUNC]] = trunc i64 [[IV]] to i32
6499; INTERLEAVE-NEXT:    [[SEXT9:%.*]] = shl i64 [[IV]], 32
6500; INTERLEAVE-NEXT:    [[TMP13:%.*]] = ashr exact i64 [[SEXT9]], 32
6501; INTERLEAVE-NEXT:    [[DST_GEP:%.*]] = getelementptr i32, i32* [[DST]], i64 [[TMP13]]
6502; INTERLEAVE-NEXT:    [[ADD:%.*]] = add i32 [[MUL]], [[IV_TRUNC]]
6503; INTERLEAVE-NEXT:    store i32 [[ADD]], i32* [[DST_GEP]], align 4
6504; INTERLEAVE-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[TRUNC_IV_NEXT]], 100
6505; INTERLEAVE-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP53:![0-9]+]]
6506; INTERLEAVE:       exit:
6507; INTERLEAVE-NEXT:    ret void
6508;
6509entry:
6510  br label %loop
6511
6512loop:
6513  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
6514  %trunc.iv = phi i32 [ 0, %entry ], [ %trunc.iv.next, %loop ]
6515  %recur = phi i32 [ 0, %entry ], [ %iv.trunc, %loop ]
6516  %lv = load i32, i32* %src, align 4
6517  %mul = mul nsw i32 %lv, %recur
6518  %trunc.iv.next  = add i32 %trunc.iv, 1
6519  %iv.next = add nuw nsw i64 %iv, 1
6520  %iv.trunc = trunc i64 %iv to i32
6521  %dst.gep = getelementptr i32, i32* %dst, i32 %iv.trunc
6522  %add = add i32 %iv.trunc, %mul
6523  store i32 %add, i32* %dst.gep
6524  %exitcond = icmp eq i32 %trunc.iv.next, 100
6525  br i1 %exitcond, label %exit, label %loop
6526
6527exit:
6528  ret void
6529}
6530
6531; Test case where %iv.2.ext and %iv.2.conv become redundant due to the SCEV
6532; predicates generated for the vector loop. They should be removed in the
6533; vector loop.
6534define void @test_optimized_cast_induction_feeding_first_order_recurrence(i64 %n, i32 %step, i32* %ptr) {
6535;
6536; CHECK-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence(
6537; CHECK-NEXT:  entry:
6538; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
6539; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
6540; CHECK:       vector.scevcheck:
6541; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
6542; CHECK-NEXT:    [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8
6543; CHECK-NEXT:    [[TMP2:%.*]] = sub i8 0, [[TMP1]]
6544; CHECK-NEXT:    [[TMP3:%.*]] = icmp slt i8 [[TMP1]], 0
6545; CHECK-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[TMP1]]
6546; CHECK-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP0]] to i8
6547; CHECK-NEXT:    [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]])
6548; CHECK-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
6549; CHECK-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
6550; CHECK-NEXT:    [[TMP6:%.*]] = add i8 0, [[MUL_RESULT]]
6551; CHECK-NEXT:    [[TMP7:%.*]] = sub i8 0, [[MUL_RESULT]]
6552; CHECK-NEXT:    [[TMP8:%.*]] = icmp sgt i8 [[TMP7]], 0
6553; CHECK-NEXT:    [[TMP9:%.*]] = icmp slt i8 [[TMP6]], 0
6554; CHECK-NEXT:    [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP9]]
6555; CHECK-NEXT:    [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255
6556; CHECK-NEXT:    [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0
6557; CHECK-NEXT:    [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]]
6558; CHECK-NEXT:    [[TMP14:%.*]] = or i1 [[TMP10]], [[TMP13]]
6559; CHECK-NEXT:    [[TMP15:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW]]
6560; CHECK-NEXT:    [[TMP17:%.*]] = sext i8 [[TMP1]] to i32
6561; CHECK-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], [[TMP17]]
6562; CHECK-NEXT:    [[TMP18:%.*]] = or i1 [[TMP15]], [[IDENT_CHECK]]
6563; CHECK-NEXT:    br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
6564; CHECK:       vector.ph:
6565; CHECK-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
6566; CHECK-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
6567; CHECK-NEXT:    [[CAST_CRD:%.*]] = trunc i64 [[N_VEC]] to i32
6568; CHECK-NEXT:    [[IND_END:%.*]] = mul i32 [[CAST_CRD]], [[STEP]]
6569; CHECK-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i32 0
6570; CHECK-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
6571; CHECK-NEXT:    [[TMP19:%.*]] = mul <2 x i32> <i32 0, i32 1>, [[DOTSPLAT]]
6572; CHECK-NEXT:    [[INDUCTION:%.*]] = add <2 x i32> zeroinitializer, [[TMP19]]
6573; CHECK-NEXT:    [[TMP20:%.*]] = mul i32 [[STEP]], 2
6574; CHECK-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP20]], i32 0
6575; CHECK-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer
6576; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
6577; CHECK:       vector.body:
6578; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6579; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[VEC_IND:%.*]], [[VECTOR_BODY]] ]
6580; CHECK-NEXT:    [[VEC_IND]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
6581; CHECK-NEXT:    [[TMP21:%.*]] = add i64 [[INDEX]], 0
6582; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> <i32 1, i32 2>
6583; CHECK-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 [[TMP21]]
6584; CHECK-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 0
6585; CHECK-NEXT:    [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <2 x i32>*
6586; CHECK-NEXT:    store <2 x i32> [[TMP22]], <2 x i32>* [[TMP25]], align 4
6587; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
6588; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], [[DOTSPLAT3]]
6589; CHECK-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
6590; CHECK-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]]
6591; CHECK:       middle.block:
6592; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
6593; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND]], i32 1
6594; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i32> [[VEC_IND]], i32 0
6595; CHECK-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
6596; CHECK:       scalar.ph:
6597; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
6598; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
6599; CHECK-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
6600; CHECK-NEXT:    br label [[LOOP:%.*]]
6601; CHECK:       loop:
6602; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ]
6603; CHECK-NEXT:    [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ]
6604; CHECK-NEXT:    [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ]
6605; CHECK-NEXT:    [[IV_2_EXT:%.*]] = shl i32 [[IV_2]], 24
6606; CHECK-NEXT:    [[IV_2_CONV]] = ashr exact i32 [[IV_2_EXT]], 24
6607; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 [[IV_1]]
6608; CHECK-NEXT:    store i32 [[SCALAR_RECUR]], i32* [[GEP]], align 4
6609; CHECK-NEXT:    [[IV_2_NEXT]] = add nsw i32 [[IV_2_CONV]], [[STEP]]
6610; CHECK-NEXT:    [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1
6611; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]]
6612; CHECK-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP55:![0-9]+]]
6613; CHECK:       exit:
6614; CHECK-NEXT:    ret void
6615;
6616; IND-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence(
6617; IND-NEXT:  entry:
6618; IND-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
6619; IND-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
6620; IND:       vector.scevcheck:
6621; IND-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
6622; IND-NEXT:    [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8
6623; IND-NEXT:    [[TMP2:%.*]] = sub i8 0, [[TMP1]]
6624; IND-NEXT:    [[TMP3:%.*]] = icmp slt i8 [[TMP1]], 0
6625; IND-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[TMP1]]
6626; IND-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP0]] to i8
6627; IND-NEXT:    [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]])
6628; IND-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
6629; IND-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
6630; IND-NEXT:    [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
6631; IND-NEXT:    [[TMP7:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
6632; IND-NEXT:    [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP6]], i1 [[TMP7]]
6633; IND-NEXT:    [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255
6634; IND-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0
6635; IND-NEXT:    [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]]
6636; IND-NEXT:    [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]]
6637; IND-NEXT:    [[TMP13:%.*]] = or i1 [[TMP12]], [[MUL_OVERFLOW]]
6638; IND-NEXT:    [[TMP14:%.*]] = add i32 [[STEP]], -128
6639; IND-NEXT:    [[TMP15:%.*]] = icmp ult i32 [[TMP14]], -256
6640; IND-NEXT:    [[TMP16:%.*]] = or i1 [[TMP13]], [[TMP15]]
6641; IND-NEXT:    br i1 [[TMP16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
6642; IND:       vector.ph:
6643; IND-NEXT:    [[N_VEC:%.*]] = and i64 [[N]], -2
6644; IND-NEXT:    [[CAST_CRD:%.*]] = trunc i64 [[N_VEC]] to i32
6645; IND-NEXT:    [[IND_END:%.*]] = mul i32 [[CAST_CRD]], [[STEP]]
6646; IND-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0
6647; IND-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
6648; IND-NEXT:    [[TMP17:%.*]] = mul nuw <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
6649; IND-NEXT:    [[TMP18:%.*]] = shl i32 [[STEP]], 1
6650; IND-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i64 0
6651; IND-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer
6652; IND-NEXT:    br label [[VECTOR_BODY:%.*]]
6653; IND:       vector.body:
6654; IND-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6655; IND-NEXT:    [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[VEC_IND:%.*]], [[VECTOR_BODY]] ]
6656; IND-NEXT:    [[VEC_IND]] = phi <2 x i32> [ [[TMP17]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
6657; IND-NEXT:    [[TMP19:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> <i32 1, i32 2>
6658; IND-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 [[INDEX]]
6659; IND-NEXT:    [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <2 x i32>*
6660; IND-NEXT:    store <2 x i32> [[TMP19]], <2 x i32>* [[TMP21]], align 4
6661; IND-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
6662; IND-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[VEC_IND]], [[DOTSPLAT3]]
6663; IND-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
6664; IND-NEXT:    br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]]
6665; IND:       middle.block:
6666; IND-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
6667; IND-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[VEC_IND]], i64 1
6668; IND-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
6669; IND:       scalar.ph:
6670; IND-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
6671; IND-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
6672; IND-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
6673; IND-NEXT:    br label [[LOOP:%.*]]
6674; IND:       loop:
6675; IND-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ]
6676; IND-NEXT:    [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ]
6677; IND-NEXT:    [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ]
6678; IND-NEXT:    [[IV_2_EXT:%.*]] = shl i32 [[IV_2]], 24
6679; IND-NEXT:    [[IV_2_CONV]] = ashr exact i32 [[IV_2_EXT]], 24
6680; IND-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 [[IV_1]]
6681; IND-NEXT:    store i32 [[SCALAR_RECUR]], i32* [[GEP]], align 4
6682; IND-NEXT:    [[IV_2_NEXT]] = add nsw i32 [[IV_2_CONV]], [[STEP]]
6683; IND-NEXT:    [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1
6684; IND-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]]
6685; IND-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP55:![0-9]+]]
6686; IND:       exit:
6687; IND-NEXT:    ret void
6688;
6689; UNROLL-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence(
6690; UNROLL-NEXT:  entry:
6691; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
6692; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
6693; UNROLL:       vector.scevcheck:
6694; UNROLL-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
6695; UNROLL-NEXT:    [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8
6696; UNROLL-NEXT:    [[TMP2:%.*]] = sub i8 0, [[TMP1]]
6697; UNROLL-NEXT:    [[TMP3:%.*]] = icmp slt i8 [[TMP1]], 0
6698; UNROLL-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[TMP1]]
6699; UNROLL-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP0]] to i8
6700; UNROLL-NEXT:    [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]])
6701; UNROLL-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
6702; UNROLL-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
6703; UNROLL-NEXT:    [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
6704; UNROLL-NEXT:    [[TMP7:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
6705; UNROLL-NEXT:    [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP6]], i1 [[TMP7]]
6706; UNROLL-NEXT:    [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255
6707; UNROLL-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0
6708; UNROLL-NEXT:    [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]]
6709; UNROLL-NEXT:    [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]]
6710; UNROLL-NEXT:    [[TMP13:%.*]] = or i1 [[TMP12]], [[MUL_OVERFLOW]]
6711; UNROLL-NEXT:    [[TMP14:%.*]] = add i32 [[STEP]], -128
6712; UNROLL-NEXT:    [[TMP15:%.*]] = icmp ult i32 [[TMP14]], -256
6713; UNROLL-NEXT:    [[TMP16:%.*]] = or i1 [[TMP13]], [[TMP15]]
6714; UNROLL-NEXT:    br i1 [[TMP16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
6715; UNROLL:       vector.ph:
6716; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[N]], -4
6717; UNROLL-NEXT:    [[CAST_CRD:%.*]] = trunc i64 [[N_VEC]] to i32
6718; UNROLL-NEXT:    [[IND_END:%.*]] = mul i32 [[CAST_CRD]], [[STEP]]
6719; UNROLL-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i64 0
6720; UNROLL-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
6721; UNROLL-NEXT:    [[TMP17:%.*]] = mul nuw <2 x i32> [[DOTSPLAT]], <i32 0, i32 1>
6722; UNROLL-NEXT:    [[TMP18:%.*]] = shl i32 [[STEP]], 1
6723; UNROLL-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP18]], i64 0
6724; UNROLL-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer
6725; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
6726; UNROLL:       vector.body:
6727; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6728; UNROLL-NEXT:    [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ]
6729; UNROLL-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ [[TMP17]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
6730; UNROLL-NEXT:    [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], [[DOTSPLAT3]]
6731; UNROLL-NEXT:    [[TMP19:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> <i32 1, i32 2>
6732; UNROLL-NEXT:    [[TMP20:%.*]] = shufflevector <2 x i32> [[VEC_IND]], <2 x i32> [[STEP_ADD]], <2 x i32> <i32 1, i32 2>
6733; UNROLL-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 [[INDEX]]
6734; UNROLL-NEXT:    [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <2 x i32>*
6735; UNROLL-NEXT:    store <2 x i32> [[TMP19]], <2 x i32>* [[TMP22]], align 4
6736; UNROLL-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i64 2
6737; UNROLL-NEXT:    [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <2 x i32>*
6738; UNROLL-NEXT:    store <2 x i32> [[TMP20]], <2 x i32>* [[TMP24]], align 4
6739; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
6740; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], [[DOTSPLAT3]]
6741; UNROLL-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
6742; UNROLL-NEXT:    br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]]
6743; UNROLL:       middle.block:
6744; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
6745; UNROLL-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD]], i64 1
6746; UNROLL-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
6747; UNROLL:       scalar.ph:
6748; UNROLL-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
6749; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
6750; UNROLL-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
6751; UNROLL-NEXT:    br label [[LOOP:%.*]]
6752; UNROLL:       loop:
6753; UNROLL-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ]
6754; UNROLL-NEXT:    [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ]
6755; UNROLL-NEXT:    [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ]
6756; UNROLL-NEXT:    [[IV_2_EXT:%.*]] = shl i32 [[IV_2]], 24
6757; UNROLL-NEXT:    [[IV_2_CONV]] = ashr exact i32 [[IV_2_EXT]], 24
6758; UNROLL-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 [[IV_1]]
6759; UNROLL-NEXT:    store i32 [[SCALAR_RECUR]], i32* [[GEP]], align 4
6760; UNROLL-NEXT:    [[IV_2_NEXT]] = add nsw i32 [[IV_2_CONV]], [[STEP]]
6761; UNROLL-NEXT:    [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1
6762; UNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]]
6763; UNROLL-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP55:![0-9]+]]
6764; UNROLL:       exit:
6765; UNROLL-NEXT:    ret void
6766;
6767; UNROLL-NO-IC-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence(
6768; UNROLL-NO-IC-NEXT:  entry:
6769; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
6770; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
6771; UNROLL-NO-IC:       vector.scevcheck:
6772; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
6773; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8
6774; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = sub i8 0, [[TMP1]]
6775; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = icmp slt i8 [[TMP1]], 0
6776; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[TMP1]]
6777; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP0]] to i8
6778; UNROLL-NO-IC-NEXT:    [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]])
6779; UNROLL-NO-IC-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
6780; UNROLL-NO-IC-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
6781; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = add i8 0, [[MUL_RESULT]]
6782; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = sub i8 0, [[MUL_RESULT]]
6783; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = icmp sgt i8 [[TMP7]], 0
6784; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = icmp slt i8 [[TMP6]], 0
6785; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = select i1 [[TMP3]], i1 [[TMP8]], i1 [[TMP9]]
6786; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = icmp ugt i64 [[TMP0]], 255
6787; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = icmp ne i8 [[TMP1]], 0
6788; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = and i1 [[TMP11]], [[TMP12]]
6789; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = or i1 [[TMP10]], [[TMP13]]
6790; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = or i1 [[TMP14]], [[MUL_OVERFLOW]]
6791; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = sext i8 [[TMP1]] to i32
6792; UNROLL-NO-IC-NEXT:    [[IDENT_CHECK:%.*]] = icmp ne i32 [[STEP]], [[TMP17]]
6793; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = or i1 [[TMP15]], [[IDENT_CHECK]]
6794; UNROLL-NO-IC-NEXT:    br i1 [[TMP18]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
6795; UNROLL-NO-IC:       vector.ph:
6796; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
6797; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
6798; UNROLL-NO-IC-NEXT:    [[CAST_CRD:%.*]] = trunc i64 [[N_VEC]] to i32
6799; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = mul i32 [[CAST_CRD]], [[STEP]]
6800; UNROLL-NO-IC-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x i32> poison, i32 [[STEP]], i32 0
6801; UNROLL-NO-IC-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT]], <2 x i32> poison, <2 x i32> zeroinitializer
6802; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = mul <2 x i32> <i32 0, i32 1>, [[DOTSPLAT]]
6803; UNROLL-NO-IC-NEXT:    [[INDUCTION:%.*]] = add <2 x i32> zeroinitializer, [[TMP19]]
6804; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = mul i32 [[STEP]], 2
6805; UNROLL-NO-IC-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <2 x i32> poison, i32 [[TMP20]], i32 0
6806; UNROLL-NO-IC-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <2 x i32> [[DOTSPLATINSERT2]], <2 x i32> poison, <2 x i32> zeroinitializer
6807; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
6808; UNROLL-NO-IC:       vector.body:
6809; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6810; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <2 x i32> [ <i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ]
6811; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <2 x i32> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
6812; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = add i64 [[INDEX]], 0
6813; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = add i64 [[INDEX]], 2
6814; UNROLL-NO-IC-NEXT:    [[STEP_ADD]] = add <2 x i32> [[VEC_IND]], [[DOTSPLAT3]]
6815; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = shufflevector <2 x i32> [[VECTOR_RECUR]], <2 x i32> [[VEC_IND]], <2 x i32> <i32 1, i32 2>
6816; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = shufflevector <2 x i32> [[VEC_IND]], <2 x i32> [[STEP_ADD]], <2 x i32> <i32 1, i32 2>
6817; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 [[TMP21]]
6818; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 [[TMP22]]
6819; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[TMP25]], i32 0
6820; UNROLL-NO-IC-NEXT:    [[TMP28:%.*]] = bitcast i32* [[TMP27]] to <2 x i32>*
6821; UNROLL-NO-IC-NEXT:    store <2 x i32> [[TMP23]], <2 x i32>* [[TMP28]], align 4
6822; UNROLL-NO-IC-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[TMP25]], i32 2
6823; UNROLL-NO-IC-NEXT:    [[TMP30:%.*]] = bitcast i32* [[TMP29]] to <2 x i32>*
6824; UNROLL-NO-IC-NEXT:    store <2 x i32> [[TMP24]], <2 x i32>* [[TMP30]], align 4
6825; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
6826; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <2 x i32> [[STEP_ADD]], [[DOTSPLAT3]]
6827; UNROLL-NO-IC-NEXT:    [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
6828; UNROLL-NO-IC-NEXT:    br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]]
6829; UNROLL-NO-IC:       middle.block:
6830; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
6831; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <2 x i32> [[STEP_ADD]], i32 1
6832; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <2 x i32> [[STEP_ADD]], i32 0
6833; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
6834; UNROLL-NO-IC:       scalar.ph:
6835; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
6836; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
6837; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_SCEVCHECK]] ]
6838; UNROLL-NO-IC-NEXT:    br label [[LOOP:%.*]]
6839; UNROLL-NO-IC:       loop:
6840; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ]
6841; UNROLL-NO-IC-NEXT:    [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ]
6842; UNROLL-NO-IC-NEXT:    [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ]
6843; UNROLL-NO-IC-NEXT:    [[IV_2_EXT:%.*]] = shl i32 [[IV_2]], 24
6844; UNROLL-NO-IC-NEXT:    [[IV_2_CONV]] = ashr exact i32 [[IV_2_EXT]], 24
6845; UNROLL-NO-IC-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 [[IV_1]]
6846; UNROLL-NO-IC-NEXT:    store i32 [[SCALAR_RECUR]], i32* [[GEP]], align 4
6847; UNROLL-NO-IC-NEXT:    [[IV_2_NEXT]] = add nsw i32 [[IV_2_CONV]], [[STEP]]
6848; UNROLL-NO-IC-NEXT:    [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1
6849; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]]
6850; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP55:![0-9]+]]
6851; UNROLL-NO-IC:       exit:
6852; UNROLL-NO-IC-NEXT:    ret void
6853;
6854; INTERLEAVE-LABEL: @test_optimized_cast_induction_feeding_first_order_recurrence(
6855; INTERLEAVE-NEXT:  entry:
6856; INTERLEAVE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
6857; INTERLEAVE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_SCEVCHECK:%.*]]
6858; INTERLEAVE:       vector.scevcheck:
6859; INTERLEAVE-NEXT:    [[TMP0:%.*]] = add i64 [[N]], -1
6860; INTERLEAVE-NEXT:    [[TMP1:%.*]] = trunc i32 [[STEP:%.*]] to i8
6861; INTERLEAVE-NEXT:    [[TMP2:%.*]] = sub i8 0, [[TMP1]]
6862; INTERLEAVE-NEXT:    [[TMP3:%.*]] = icmp slt i8 [[TMP1]], 0
6863; INTERLEAVE-NEXT:    [[TMP4:%.*]] = select i1 [[TMP3]], i8 [[TMP2]], i8 [[TMP1]]
6864; INTERLEAVE-NEXT:    [[TMP5:%.*]] = trunc i64 [[TMP0]] to i8
6865; INTERLEAVE-NEXT:    [[MUL:%.*]] = call { i8, i1 } @llvm.umul.with.overflow.i8(i8 [[TMP4]], i8 [[TMP5]])
6866; INTERLEAVE-NEXT:    [[MUL_RESULT:%.*]] = extractvalue { i8, i1 } [[MUL]], 0
6867; INTERLEAVE-NEXT:    [[MUL_OVERFLOW:%.*]] = extractvalue { i8, i1 } [[MUL]], 1
6868; INTERLEAVE-NEXT:    [[TMP6:%.*]] = icmp ugt i8 [[MUL_RESULT]], -128
6869; INTERLEAVE-NEXT:    [[TMP7:%.*]] = icmp slt i8 [[MUL_RESULT]], 0
6870; INTERLEAVE-NEXT:    [[TMP8:%.*]] = select i1 [[TMP3]], i1 [[TMP6]], i1 [[TMP7]]
6871; INTERLEAVE-NEXT:    [[TMP9:%.*]] = icmp ugt i64 [[TMP0]], 255
6872; INTERLEAVE-NEXT:    [[TMP10:%.*]] = icmp ne i8 [[TMP1]], 0
6873; INTERLEAVE-NEXT:    [[TMP11:%.*]] = and i1 [[TMP9]], [[TMP10]]
6874; INTERLEAVE-NEXT:    [[TMP12:%.*]] = or i1 [[TMP8]], [[TMP11]]
6875; INTERLEAVE-NEXT:    [[TMP13:%.*]] = or i1 [[TMP12]], [[MUL_OVERFLOW]]
6876; INTERLEAVE-NEXT:    [[TMP14:%.*]] = add i32 [[STEP]], -128
6877; INTERLEAVE-NEXT:    [[TMP15:%.*]] = icmp ult i32 [[TMP14]], -256
6878; INTERLEAVE-NEXT:    [[TMP16:%.*]] = or i1 [[TMP13]], [[TMP15]]
6879; INTERLEAVE-NEXT:    br i1 [[TMP16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
6880; INTERLEAVE:       vector.ph:
6881; INTERLEAVE-NEXT:    [[N_VEC:%.*]] = and i64 [[N]], -8
6882; INTERLEAVE-NEXT:    [[CAST_CRD:%.*]] = trunc i64 [[N_VEC]] to i32
6883; INTERLEAVE-NEXT:    [[IND_END:%.*]] = mul i32 [[CAST_CRD]], [[STEP]]
6884; INTERLEAVE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[STEP]], i64 0
6885; INTERLEAVE-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
6886; INTERLEAVE-NEXT:    [[TMP17:%.*]] = mul <4 x i32> [[DOTSPLAT]], <i32 0, i32 1, i32 2, i32 3>
6887; INTERLEAVE-NEXT:    [[TMP18:%.*]] = shl i32 [[STEP]], 2
6888; INTERLEAVE-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[TMP18]], i64 0
6889; INTERLEAVE-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <4 x i32> [[DOTSPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
6890; INTERLEAVE-NEXT:    br label [[VECTOR_BODY:%.*]]
6891; INTERLEAVE:       vector.body:
6892; INTERLEAVE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
6893; INTERLEAVE-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[STEP_ADD:%.*]], [[VECTOR_BODY]] ]
6894; INTERLEAVE-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ [[TMP17]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
6895; INTERLEAVE-NEXT:    [[STEP_ADD]] = add <4 x i32> [[VEC_IND]], [[DOTSPLAT3]]
6896; INTERLEAVE-NEXT:    [[TMP19:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[VEC_IND]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
6897; INTERLEAVE-NEXT:    [[TMP20:%.*]] = shufflevector <4 x i32> [[VEC_IND]], <4 x i32> [[STEP_ADD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
6898; INTERLEAVE-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[PTR:%.*]], i64 [[INDEX]]
6899; INTERLEAVE-NEXT:    [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
6900; INTERLEAVE-NEXT:    store <4 x i32> [[TMP19]], <4 x i32>* [[TMP22]], align 4
6901; INTERLEAVE-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i64 4
6902; INTERLEAVE-NEXT:    [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>*
6903; INTERLEAVE-NEXT:    store <4 x i32> [[TMP20]], <4 x i32>* [[TMP24]], align 4
6904; INTERLEAVE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
6905; INTERLEAVE-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], [[DOTSPLAT3]]
6906; INTERLEAVE-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
6907; INTERLEAVE-NEXT:    br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP54:![0-9]+]]
6908; INTERLEAVE:       middle.block:
6909; INTERLEAVE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
6910; INTERLEAVE-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[STEP_ADD]], i64 3
6911; INTERLEAVE-NEXT:    br i1 [[CMP_N]], label [[EXIT:%.*]], label [[SCALAR_PH]]
6912; INTERLEAVE:       scalar.ph:
6913; INTERLEAVE-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
6914; INTERLEAVE-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
6915; INTERLEAVE-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ 0, [[VECTOR_SCEVCHECK]] ], [ 0, [[ENTRY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
6916; INTERLEAVE-NEXT:    br label [[LOOP:%.*]]
6917; INTERLEAVE:       loop:
6918; INTERLEAVE-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[IV_2_CONV:%.*]], [[LOOP]] ]
6919; INTERLEAVE-NEXT:    [[IV_1:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_1_NEXT:%.*]], [[LOOP]] ]
6920; INTERLEAVE-NEXT:    [[IV_2:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[IV_2_NEXT:%.*]], [[LOOP]] ]
6921; INTERLEAVE-NEXT:    [[IV_2_EXT:%.*]] = shl i32 [[IV_2]], 24
6922; INTERLEAVE-NEXT:    [[IV_2_CONV]] = ashr exact i32 [[IV_2_EXT]], 24
6923; INTERLEAVE-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, i32* [[PTR]], i64 [[IV_1]]
6924; INTERLEAVE-NEXT:    store i32 [[SCALAR_RECUR]], i32* [[GEP]], align 4
6925; INTERLEAVE-NEXT:    [[IV_2_NEXT]] = add nsw i32 [[IV_2_CONV]], [[STEP]]
6926; INTERLEAVE-NEXT:    [[IV_1_NEXT]] = add nuw nsw i64 [[IV_1]], 1
6927; INTERLEAVE-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_1_NEXT]], [[N]]
6928; INTERLEAVE-NEXT:    br i1 [[EXITCOND]], label [[EXIT]], label [[LOOP]], !llvm.loop [[LOOP55:![0-9]+]]
6929; INTERLEAVE:       exit:
6930; INTERLEAVE-NEXT:    ret void
6931;
6932entry:
6933  br label %loop
6934
6935loop:
6936  %for = phi i32 [ 0, %entry ], [ %iv.2.conv, %loop ]
6937  %iv.1 = phi i64 [ 0, %entry ], [ %iv.1.next, %loop ]
6938  %iv.2 = phi i32 [ 0, %entry ], [ %iv.2.next, %loop ]
6939  %iv.2.ext = shl i32 %iv.2, 24
6940  %iv.2.conv = ashr exact i32 %iv.2.ext, 24
6941  %gep = getelementptr inbounds i32, i32* %ptr, i64 %iv.1
6942  store i32 %for, i32* %gep, align 4
6943  %iv.2.next = add nsw i32 %iv.2.conv, %step
6944  %iv.1.next = add nuw nsw i64 %iv.1, 1
6945  %exitcond = icmp eq i64 %iv.1.next, %n
6946  br i1 %exitcond, label %exit, label %loop
6947
6948exit:
6949  ret void
6950}
6951