1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine -S | FileCheck %s
3; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -dce -instcombine -S | FileCheck %s --check-prefix=UNROLL
4; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S | FileCheck %s --check-prefix=UNROLL-NO-IC
5; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-interleave=2 -S | FileCheck %s --check-prefix=UNROLL-NO-VF
6; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s --check-prefix=SINK-AFTER
7
8target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
9
10; void recurrence_1(int *a, int *b, int n) {
11;   for(int i = 0; i < n; i++)
12;     b[i] =  a[i] + a[i - 1]
13; }
14;
15;
16;
17define void @recurrence_1(i32* nocapture readonly %a, i32* nocapture %b, i32 %n) {
18; CHECK-LABEL: @recurrence_1(
19; CHECK-NEXT:  entry:
20; CHECK-NEXT:    br label [[FOR_PREHEADER:%.*]]
21; CHECK:       for.preheader:
22; CHECK-NEXT:    [[PRE_LOAD:%.*]] = load i32, i32* [[A:%.*]], align 4
23; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
24; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
25; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
26; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
27; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
28; CHECK:       vector.memcheck:
29; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[N]], -1
30; CHECK-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
31; CHECK-NEXT:    [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
32; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[TMP5]]
33; CHECK-NEXT:    [[SCEVGEP3:%.*]] = getelementptr i32, i32* [[A]], i64 1
34; CHECK-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 [[TMP4]], 2
35; CHECK-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP6]]
36; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP5]], [[B]]
37; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult i32* [[SCEVGEP3]], [[SCEVGEP]]
38; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
39; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
40; CHECK:       vector.ph:
41; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
42; CHECK-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD]], i64 3
43; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
44; CHECK:       vector.body:
45; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
46; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
47; CHECK-NEXT:    [[TMP7:%.*]] = or i64 [[INDEX]], 1
48; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]]
49; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>*
50; CHECK-NEXT:    [[WIDE_LOAD]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4
51; CHECK-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
52; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
53; CHECK-NEXT:    [[TMP12:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP10]]
54; CHECK-NEXT:    [[TMP13:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
55; CHECK-NEXT:    store <4 x i32> [[TMP12]], <4 x i32>* [[TMP13]], align 4
56; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
57; CHECK-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
58; CHECK-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
59; CHECK:       middle.block:
60; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
61; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 3
62; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
63; CHECK:       scalar.ph:
64; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_MEMCHECK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
65; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
66; CHECK-NEXT:    br label [[SCALAR_BODY:%.*]]
67; CHECK:       scalar.body:
68; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[SCALAR_BODY]] ]
69; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
70; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
71; CHECK-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
72; CHECK-NEXT:    [[TMP15]] = load i32, i32* [[ARRAYIDX32]], align 4
73; CHECK-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
74; CHECK-NEXT:    [[ADD35:%.*]] = add i32 [[TMP15]], [[SCALAR_RECUR]]
75; CHECK-NEXT:    store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4
76; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
77; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
78; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
79; CHECK:       for.exit:
80; CHECK-NEXT:    ret void
81;
82; UNROLL-LABEL: @recurrence_1(
83; UNROLL-NEXT:  entry:
84; UNROLL-NEXT:    br label [[FOR_PREHEADER:%.*]]
85; UNROLL:       for.preheader:
86; UNROLL-NEXT:    [[PRE_LOAD:%.*]] = load i32, i32* [[A:%.*]], align 4
87; UNROLL-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
88; UNROLL-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
89; UNROLL-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
90; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7
91; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
92; UNROLL:       vector.memcheck:
93; UNROLL-NEXT:    [[TMP3:%.*]] = add i32 [[N]], -1
94; UNROLL-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
95; UNROLL-NEXT:    [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
96; UNROLL-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[TMP5]]
97; UNROLL-NEXT:    [[SCEVGEP3:%.*]] = getelementptr i32, i32* [[A]], i64 1
98; UNROLL-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 [[TMP4]], 2
99; UNROLL-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP6]]
100; UNROLL-NEXT:    [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP5]], [[B]]
101; UNROLL-NEXT:    [[BOUND1:%.*]] = icmp ult i32* [[SCEVGEP3]], [[SCEVGEP]]
102; UNROLL-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
103; UNROLL-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
104; UNROLL:       vector.ph:
105; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584
106; UNROLL-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD]], i64 3
107; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
108; UNROLL:       vector.body:
109; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
110; UNROLL-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD7:%.*]], [[VECTOR_BODY]] ]
111; UNROLL-NEXT:    [[TMP7:%.*]] = or i64 [[INDEX]], 1
112; UNROLL-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]]
113; UNROLL-NEXT:    [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>*
114; UNROLL-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP9]], align 4
115; UNROLL-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i64 4
116; UNROLL-NEXT:    [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>*
117; UNROLL-NEXT:    [[WIDE_LOAD7]] = load <4 x i32>, <4 x i32>* [[TMP11]], align 4
118; UNROLL-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
119; UNROLL-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD7]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
120; UNROLL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
121; UNROLL-NEXT:    [[TMP15:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP12]]
122; UNROLL-NEXT:    [[TMP16:%.*]] = add <4 x i32> [[WIDE_LOAD7]], [[TMP13]]
123; UNROLL-NEXT:    [[TMP17:%.*]] = bitcast i32* [[TMP14]] to <4 x i32>*
124; UNROLL-NEXT:    store <4 x i32> [[TMP15]], <4 x i32>* [[TMP17]], align 4
125; UNROLL-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP14]], i64 4
126; UNROLL-NEXT:    [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
127; UNROLL-NEXT:    store <4 x i32> [[TMP16]], <4 x i32>* [[TMP19]], align 4
128; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
129; UNROLL-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
130; UNROLL-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
131; UNROLL:       middle.block:
132; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
133; UNROLL-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD7]], i64 3
134; UNROLL-NEXT:    br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
135; UNROLL:       scalar.ph:
136; UNROLL-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_MEMCHECK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
137; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[FOR_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
138; UNROLL-NEXT:    br label [[SCALAR_BODY:%.*]]
139; UNROLL:       scalar.body:
140; UNROLL-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP21:%.*]], [[SCALAR_BODY]] ]
141; UNROLL-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
142; UNROLL-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
143; UNROLL-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
144; UNROLL-NEXT:    [[TMP21]] = load i32, i32* [[ARRAYIDX32]], align 4
145; UNROLL-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
146; UNROLL-NEXT:    [[ADD35:%.*]] = add i32 [[TMP21]], [[SCALAR_RECUR]]
147; UNROLL-NEXT:    store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4
148; UNROLL-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
149; UNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
150; UNROLL-NEXT:    br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
151; UNROLL:       for.exit:
152; UNROLL-NEXT:    ret void
153;
154; UNROLL-NO-IC-LABEL: @recurrence_1(
155; UNROLL-NO-IC-NEXT:  entry:
156; UNROLL-NO-IC-NEXT:    [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
157; UNROLL-NO-IC-NEXT:    br label [[FOR_PREHEADER:%.*]]
158; UNROLL-NO-IC:       for.preheader:
159; UNROLL-NO-IC-NEXT:    [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 0
160; UNROLL-NO-IC-NEXT:    [[PRE_LOAD:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4
161; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
162; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
163; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
164; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8
165; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
166; UNROLL-NO-IC:       vector.memcheck:
167; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add i32 [[N]], -1
168; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
169; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
170; UNROLL-NO-IC-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[TMP5]]
171; UNROLL-NO-IC-NEXT:    [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
172; UNROLL-NO-IC-NEXT:    [[SCEVGEP3:%.*]] = getelementptr i32, i32* [[A]], i64 1
173; UNROLL-NO-IC-NEXT:    [[SCEVGEP34:%.*]] = bitcast i32* [[SCEVGEP3]] to i8*
174; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 [[TMP4]], 2
175; UNROLL-NO-IC-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP6]]
176; UNROLL-NO-IC-NEXT:    [[SCEVGEP56:%.*]] = bitcast i32* [[SCEVGEP5]] to i8*
177; UNROLL-NO-IC-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]]
178; UNROLL-NO-IC-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]]
179; UNROLL-NO-IC-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
180; UNROLL-NO-IC-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
181; UNROLL-NO-IC:       vector.ph:
182; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8
183; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
184; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD]], i32 3
185; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
186; UNROLL-NO-IC:       vector.body:
187; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
188; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD7:%.*]], [[VECTOR_BODY]] ]
189; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 0
190; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 4
191; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = add nuw nsw i64 [[TMP7]], 1
192; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = add nuw nsw i64 [[TMP8]], 1
193; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP9]]
194; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP10]]
195; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 0
196; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
197; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP14]], align 4
198; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 4
199; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>*
200; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD7]] = load <4 x i32>, <4 x i32>* [[TMP16]], align 4
201; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
202; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD7]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
203; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP7]]
204; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP8]]
205; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP17]]
206; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = add <4 x i32> [[WIDE_LOAD7]], [[TMP18]]
207; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 0
208; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>*
209; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP21]], <4 x i32>* [[TMP24]], align 4
210; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 4
211; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = bitcast i32* [[TMP25]] to <4 x i32>*
212; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP22]], <4 x i32>* [[TMP26]], align 4
213; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
214; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
215; UNROLL-NO-IC-NEXT:    br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
216; UNROLL-NO-IC:       middle.block:
217; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
218; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD7]], i32 3
219; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[WIDE_LOAD7]], i32 2
220; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
221; UNROLL-NO-IC:       scalar.ph:
222; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_MEMCHECK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
223; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
224; UNROLL-NO-IC-NEXT:    br label [[SCALAR_BODY:%.*]]
225; UNROLL-NO-IC:       scalar.body:
226; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP28:%.*]], [[SCALAR_BODY]] ]
227; UNROLL-NO-IC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
228; UNROLL-NO-IC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
229; UNROLL-NO-IC-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
230; UNROLL-NO-IC-NEXT:    [[TMP28]] = load i32, i32* [[ARRAYIDX32]], align 4
231; UNROLL-NO-IC-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
232; UNROLL-NO-IC-NEXT:    [[ADD35:%.*]] = add i32 [[TMP28]], [[SCALAR_RECUR]]
233; UNROLL-NO-IC-NEXT:    store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4
234; UNROLL-NO-IC-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
235; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
236; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
237; UNROLL-NO-IC:       for.exit:
238; UNROLL-NO-IC-NEXT:    ret void
239;
240; UNROLL-NO-VF-LABEL: @recurrence_1(
241; UNROLL-NO-VF-NEXT:  entry:
242; UNROLL-NO-VF-NEXT:    [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
243; UNROLL-NO-VF-NEXT:    br label [[FOR_PREHEADER:%.*]]
244; UNROLL-NO-VF:       for.preheader:
245; UNROLL-NO-VF-NEXT:    [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 0
246; UNROLL-NO-VF-NEXT:    [[PRE_LOAD:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4
247; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
248; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
249; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
250; UNROLL-NO-VF-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2
251; UNROLL-NO-VF-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
252; UNROLL-NO-VF:       vector.memcheck:
253; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = add i32 [[N]], -1
254; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
255; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
256; UNROLL-NO-VF-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[TMP5]]
257; UNROLL-NO-VF-NEXT:    [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
258; UNROLL-NO-VF-NEXT:    [[SCEVGEP3:%.*]] = getelementptr i32, i32* [[A]], i64 1
259; UNROLL-NO-VF-NEXT:    [[SCEVGEP34:%.*]] = bitcast i32* [[SCEVGEP3]] to i8*
260; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 [[TMP4]], 2
261; UNROLL-NO-VF-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP6]]
262; UNROLL-NO-VF-NEXT:    [[SCEVGEP56:%.*]] = bitcast i32* [[SCEVGEP5]] to i8*
263; UNROLL-NO-VF-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]]
264; UNROLL-NO-VF-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]]
265; UNROLL-NO-VF-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
266; UNROLL-NO-VF-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
267; UNROLL-NO-VF:       vector.ph:
268; UNROLL-NO-VF-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2
269; UNROLL-NO-VF-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
270; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
271; UNROLL-NO-VF:       vector.body:
272; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
273; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
274; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
275; UNROLL-NO-VF-NEXT:    [[INDUCTION7:%.*]] = add i64 [[INDEX]], 1
276; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = add nuw nsw i64 [[INDUCTION]], 1
277; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = add nuw nsw i64 [[INDUCTION7]], 1
278; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP7]]
279; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP8]]
280; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = load i32, i32* [[TMP9]], align 4
281; UNROLL-NO-VF-NEXT:    [[TMP12]] = load i32, i32* [[TMP10]], align 4
282; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION]]
283; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION7]]
284; UNROLL-NO-VF-NEXT:    [[TMP15:%.*]] = add i32 [[TMP11]], [[VECTOR_RECUR]]
285; UNROLL-NO-VF-NEXT:    [[TMP16:%.*]] = add i32 [[TMP12]], [[TMP11]]
286; UNROLL-NO-VF-NEXT:    store i32 [[TMP15]], i32* [[TMP13]], align 4
287; UNROLL-NO-VF-NEXT:    store i32 [[TMP16]], i32* [[TMP14]], align 4
288; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
289; UNROLL-NO-VF-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
290; UNROLL-NO-VF-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
291; UNROLL-NO-VF:       middle.block:
292; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
293; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
294; UNROLL-NO-VF:       scalar.ph:
295; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_MEMCHECK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
296; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
297; UNROLL-NO-VF-NEXT:    br label [[SCALAR_BODY:%.*]]
298; UNROLL-NO-VF:       scalar.body:
299; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[SCALAR_BODY]] ]
300; UNROLL-NO-VF-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
301; UNROLL-NO-VF-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
302; UNROLL-NO-VF-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
303; UNROLL-NO-VF-NEXT:    [[TMP18]] = load i32, i32* [[ARRAYIDX32]], align 4
304; UNROLL-NO-VF-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
305; UNROLL-NO-VF-NEXT:    [[ADD35:%.*]] = add i32 [[TMP18]], [[SCALAR_RECUR]]
306; UNROLL-NO-VF-NEXT:    store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4
307; UNROLL-NO-VF-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
308; UNROLL-NO-VF-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
309; UNROLL-NO-VF-NEXT:    br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
310; UNROLL-NO-VF:       for.exit:
311; UNROLL-NO-VF-NEXT:    ret void
312;
313; SINK-AFTER-LABEL: @recurrence_1(
314; SINK-AFTER-NEXT:  entry:
315; SINK-AFTER-NEXT:    [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
316; SINK-AFTER-NEXT:    br label [[FOR_PREHEADER:%.*]]
317; SINK-AFTER:       for.preheader:
318; SINK-AFTER-NEXT:    [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 0
319; SINK-AFTER-NEXT:    [[PRE_LOAD:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4
320; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
321; SINK-AFTER-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
322; SINK-AFTER-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
323; SINK-AFTER-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
324; SINK-AFTER-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
325; SINK-AFTER:       vector.memcheck:
326; SINK-AFTER-NEXT:    [[TMP3:%.*]] = add i32 [[N]], -1
327; SINK-AFTER-NEXT:    [[TMP4:%.*]] = zext i32 [[TMP3]] to i64
328; SINK-AFTER-NEXT:    [[TMP5:%.*]] = add nuw nsw i64 [[TMP4]], 1
329; SINK-AFTER-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[TMP5]]
330; SINK-AFTER-NEXT:    [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
331; SINK-AFTER-NEXT:    [[SCEVGEP3:%.*]] = getelementptr i32, i32* [[A]], i64 1
332; SINK-AFTER-NEXT:    [[SCEVGEP34:%.*]] = bitcast i32* [[SCEVGEP3]] to i8*
333; SINK-AFTER-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 [[TMP4]], 2
334; SINK-AFTER-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[A]], i64 [[TMP6]]
335; SINK-AFTER-NEXT:    [[SCEVGEP56:%.*]] = bitcast i32* [[SCEVGEP5]] to i8*
336; SINK-AFTER-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]]
337; SINK-AFTER-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]]
338; SINK-AFTER-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
339; SINK-AFTER-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
340; SINK-AFTER:       vector.ph:
341; SINK-AFTER-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
342; SINK-AFTER-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
343; SINK-AFTER-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD]], i32 3
344; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
345; SINK-AFTER:       vector.body:
346; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
347; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
348; SINK-AFTER-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 0
349; SINK-AFTER-NEXT:    [[TMP8:%.*]] = add nuw nsw i64 [[TMP7]], 1
350; SINK-AFTER-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP8]]
351; SINK-AFTER-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0
352; SINK-AFTER-NEXT:    [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>*
353; SINK-AFTER-NEXT:    [[WIDE_LOAD]] = load <4 x i32>, <4 x i32>* [[TMP11]], align 4
354; SINK-AFTER-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
355; SINK-AFTER-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP7]]
356; SINK-AFTER-NEXT:    [[TMP14:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP12]]
357; SINK-AFTER-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i32 0
358; SINK-AFTER-NEXT:    [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>*
359; SINK-AFTER-NEXT:    store <4 x i32> [[TMP14]], <4 x i32>* [[TMP16]], align 4
360; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
361; SINK-AFTER-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
362; SINK-AFTER-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
363; SINK-AFTER:       middle.block:
364; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
365; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3
366; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 2
367; SINK-AFTER-NEXT:    br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
368; SINK-AFTER:       scalar.ph:
369; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_MEMCHECK]] ], [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
370; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ], [ 0, [[VECTOR_MEMCHECK]] ]
371; SINK-AFTER-NEXT:    br label [[SCALAR_BODY:%.*]]
372; SINK-AFTER:       scalar.body:
373; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[SCALAR_BODY]] ]
374; SINK-AFTER-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
375; SINK-AFTER-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
376; SINK-AFTER-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
377; SINK-AFTER-NEXT:    [[TMP18]] = load i32, i32* [[ARRAYIDX32]], align 4
378; SINK-AFTER-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
379; SINK-AFTER-NEXT:    [[ADD35:%.*]] = add i32 [[TMP18]], [[SCALAR_RECUR]]
380; SINK-AFTER-NEXT:    store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4
381; SINK-AFTER-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
382; SINK-AFTER-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
383; SINK-AFTER-NEXT:    br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
384; SINK-AFTER:       for.exit:
385; SINK-AFTER-NEXT:    ret void
386;
387entry:
388  br label %for.preheader
389
390for.preheader:
391  %arrayidx.phi.trans.insert = getelementptr inbounds i32, i32* %a, i64 0
392  %pre_load = load i32, i32* %arrayidx.phi.trans.insert
393  br label %scalar.body
394
395scalar.body:
396  %0 = phi i32 [ %pre_load, %for.preheader ], [ %1, %scalar.body ]
397  %indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %scalar.body ]
398  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
399  %arrayidx32 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
400  %1 = load i32, i32* %arrayidx32
401  %arrayidx34 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
402  %add35 = add i32 %1, %0
403  store i32 %add35, i32* %arrayidx34
404  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
405  %exitcond = icmp eq i32 %lftr.wideiv, %n
406  br i1 %exitcond, label %for.exit, label %scalar.body
407
408for.exit:
409  ret void
410}
411
412; int recurrence_2(int *a, int n) {
413;   int minmax;
414;   for (int i = 0; i < n; ++i)
415;     minmax = min(minmax, max(a[i] - a[i-1], 0));
416;   return minmax;
417; }
418;
419;
420;
421define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) {
422; CHECK-LABEL: @recurrence_2(
423; CHECK-NEXT:  entry:
424; CHECK-NEXT:    [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0
425; CHECK-NEXT:    br i1 [[CMP27]], label [[FOR_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
426; CHECK:       for.preheader:
427; CHECK-NEXT:    [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 -1
428; CHECK-NEXT:    [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4
429; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
430; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
431; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
432; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
433; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
434; CHECK:       vector.ph:
435; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
436; CHECK-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i64 3
437; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
438; CHECK:       vector.body:
439; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
440; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
441; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
442; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDEX]]
443; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
444; CHECK-NEXT:    [[WIDE_LOAD]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
445; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
446; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD]], [[TMP5]]
447; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP6]], <4 x i32> zeroinitializer)
448; CHECK-NEXT:    [[TMP8]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP7]])
449; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
450; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
451; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
452; CHECK:       middle.block:
453; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP8]])
454; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
455; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 3
456; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
457; CHECK:       scalar.ph:
458; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
459; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[FOR_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
460; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
461; CHECK-NEXT:    br label [[SCALAR_BODY:%.*]]
462; CHECK:       for.cond.cleanup.loopexit:
463; CHECK-NEXT:    [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[TMP13:%.*]], [[SCALAR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
464; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
465; CHECK:       for.cond.cleanup:
466; CHECK-NEXT:    [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
467; CHECK-NEXT:    ret i32 [[MINMAX_0_LCSSA]]
468; CHECK:       scalar.body:
469; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP11:%.*]], [[SCALAR_BODY]] ]
470; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
471; CHECK-NEXT:    [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP13]], [[SCALAR_BODY]] ]
472; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
473; CHECK-NEXT:    [[TMP11]] = load i32, i32* [[ARRAYIDX]], align 4
474; CHECK-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[TMP11]], [[SCALAR_RECUR]]
475; CHECK-NEXT:    [[TMP12:%.*]] = call i32 @llvm.smax.i32(i32 [[SUB3]], i32 0)
476; CHECK-NEXT:    [[TMP13]] = call i32 @llvm.smin.i32(i32 [[MINMAX_028]], i32 [[TMP12]])
477; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
478; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
479; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
480; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
481;
482; UNROLL-LABEL: @recurrence_2(
483; UNROLL-NEXT:  entry:
484; UNROLL-NEXT:    [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0
485; UNROLL-NEXT:    br i1 [[CMP27]], label [[FOR_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
486; UNROLL:       for.preheader:
487; UNROLL-NEXT:    [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 -1
488; UNROLL-NEXT:    [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4
489; UNROLL-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
490; UNROLL-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
491; UNROLL-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
492; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7
493; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
494; UNROLL:       vector.ph:
495; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584
496; UNROLL-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i64 3
497; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
498; UNROLL:       vector.body:
499; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
500; UNROLL-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD2:%.*]], [[VECTOR_BODY]] ]
501; UNROLL-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
502; UNROLL-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
503; UNROLL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDEX]]
504; UNROLL-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
505; UNROLL-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
506; UNROLL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 4
507; UNROLL-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
508; UNROLL-NEXT:    [[WIDE_LOAD2]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
509; UNROLL-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
510; UNROLL-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
511; UNROLL-NEXT:    [[TMP9:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD]], [[TMP7]]
512; UNROLL-NEXT:    [[TMP10:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD2]], [[TMP8]]
513; UNROLL-NEXT:    [[TMP11:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP9]], <4 x i32> zeroinitializer)
514; UNROLL-NEXT:    [[TMP12:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP10]], <4 x i32> zeroinitializer)
515; UNROLL-NEXT:    [[TMP13]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP11]])
516; UNROLL-NEXT:    [[TMP14]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI1]], <4 x i32> [[TMP12]])
517; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
518; UNROLL-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
519; UNROLL-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
520; UNROLL:       middle.block:
521; UNROLL-NEXT:    [[TMP16:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP13]], <4 x i32> [[TMP14]])
522; UNROLL-NEXT:    [[TMP17:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP16]])
523; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
524; UNROLL-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i64 3
525; UNROLL-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
526; UNROLL:       scalar.ph:
527; UNROLL-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
528; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[FOR_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
529; UNROLL-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ]
530; UNROLL-NEXT:    br label [[SCALAR_BODY:%.*]]
531; UNROLL:       for.cond.cleanup.loopexit:
532; UNROLL-NEXT:    [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[TMP20:%.*]], [[SCALAR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ]
533; UNROLL-NEXT:    br label [[FOR_COND_CLEANUP]]
534; UNROLL:       for.cond.cleanup:
535; UNROLL-NEXT:    [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
536; UNROLL-NEXT:    ret i32 [[MINMAX_0_LCSSA]]
537; UNROLL:       scalar.body:
538; UNROLL-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[SCALAR_BODY]] ]
539; UNROLL-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
540; UNROLL-NEXT:    [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP20]], [[SCALAR_BODY]] ]
541; UNROLL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
542; UNROLL-NEXT:    [[TMP18]] = load i32, i32* [[ARRAYIDX]], align 4
543; UNROLL-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[TMP18]], [[SCALAR_RECUR]]
544; UNROLL-NEXT:    [[TMP19:%.*]] = call i32 @llvm.smax.i32(i32 [[SUB3]], i32 0)
545; UNROLL-NEXT:    [[TMP20]] = call i32 @llvm.smin.i32(i32 [[MINMAX_028]], i32 [[TMP19]])
546; UNROLL-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
547; UNROLL-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
548; UNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
549; UNROLL-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
550;
551; UNROLL-NO-IC-LABEL: @recurrence_2(
552; UNROLL-NO-IC-NEXT:  entry:
553; UNROLL-NO-IC-NEXT:    [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0
554; UNROLL-NO-IC-NEXT:    br i1 [[CMP27]], label [[FOR_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
555; UNROLL-NO-IC:       for.preheader:
556; UNROLL-NO-IC-NEXT:    [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 -1
557; UNROLL-NO-IC-NEXT:    [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4
558; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
559; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
560; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
561; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8
562; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
563; UNROLL-NO-IC:       vector.ph:
564; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8
565; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
566; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
567; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
568; UNROLL-NO-IC:       vector.body:
569; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
570; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD2:%.*]], [[VECTOR_BODY]] ]
571; UNROLL-NO-IC-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
572; UNROLL-NO-IC-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
573; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
574; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
575; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]]
576; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
577; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
578; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>*
579; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4
580; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 4
581; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
582; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD2]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4
583; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
584; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
585; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD]], [[TMP11]]
586; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD2]], [[TMP12]]
587; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = icmp sgt <4 x i32> [[TMP13]], zeroinitializer
588; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = icmp sgt <4 x i32> [[TMP14]], zeroinitializer
589; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = select <4 x i1> [[TMP15]], <4 x i32> [[TMP13]], <4 x i32> zeroinitializer
590; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = select <4 x i1> [[TMP16]], <4 x i32> [[TMP14]], <4 x i32> zeroinitializer
591; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[TMP17]]
592; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = icmp slt <4 x i32> [[VEC_PHI1]], [[TMP18]]
593; UNROLL-NO-IC-NEXT:    [[TMP21]] = select <4 x i1> [[TMP19]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP17]]
594; UNROLL-NO-IC-NEXT:    [[TMP22]] = select <4 x i1> [[TMP20]], <4 x i32> [[VEC_PHI1]], <4 x i32> [[TMP18]]
595; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
596; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
597; UNROLL-NO-IC-NEXT:    br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
598; UNROLL-NO-IC:       middle.block:
599; UNROLL-NO-IC-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP21]], [[TMP22]]
600; UNROLL-NO-IC-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP21]], <4 x i32> [[TMP22]]
601; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[RDX_MINMAX_SELECT]])
602; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
603; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i32 3
604; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i32 2
605; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
606; UNROLL-NO-IC:       scalar.ph:
607; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
608; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
609; UNROLL-NO-IC-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[TMP24]], [[MIDDLE_BLOCK]] ]
610; UNROLL-NO-IC-NEXT:    br label [[SCALAR_BODY:%.*]]
611; UNROLL-NO-IC:       for.cond.cleanup.loopexit:
612; UNROLL-NO-IC-NEXT:    [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[TMP24]], [[MIDDLE_BLOCK]] ]
613; UNROLL-NO-IC-NEXT:    br label [[FOR_COND_CLEANUP]]
614; UNROLL-NO-IC:       for.cond.cleanup:
615; UNROLL-NO-IC-NEXT:    [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
616; UNROLL-NO-IC-NEXT:    ret i32 [[MINMAX_0_LCSSA]]
617; UNROLL-NO-IC:       scalar.body:
618; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP25:%.*]], [[SCALAR_BODY]] ]
619; UNROLL-NO-IC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
620; UNROLL-NO-IC-NEXT:    [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MINMAX_0_COND]], [[SCALAR_BODY]] ]
621; UNROLL-NO-IC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
622; UNROLL-NO-IC-NEXT:    [[TMP25]] = load i32, i32* [[ARRAYIDX]], align 4
623; UNROLL-NO-IC-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[TMP25]], [[SCALAR_RECUR]]
624; UNROLL-NO-IC-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[SUB3]], 0
625; UNROLL-NO-IC-NEXT:    [[COND:%.*]] = select i1 [[CMP4]], i32 [[SUB3]], i32 0
626; UNROLL-NO-IC-NEXT:    [[CMP5:%.*]] = icmp slt i32 [[MINMAX_028]], [[COND]]
627; UNROLL-NO-IC-NEXT:    [[MINMAX_0_COND]] = select i1 [[CMP5]], i32 [[MINMAX_028]], i32 [[COND]]
628; UNROLL-NO-IC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
629; UNROLL-NO-IC-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
630; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
631; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
632;
633; UNROLL-NO-VF-LABEL: @recurrence_2(
634; UNROLL-NO-VF-NEXT:  entry:
635; UNROLL-NO-VF-NEXT:    [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0
636; UNROLL-NO-VF-NEXT:    br i1 [[CMP27]], label [[FOR_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
637; UNROLL-NO-VF:       for.preheader:
638; UNROLL-NO-VF-NEXT:    [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 -1
639; UNROLL-NO-VF-NEXT:    [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4
640; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
641; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
642; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
643; UNROLL-NO-VF-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2
644; UNROLL-NO-VF-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
645; UNROLL-NO-VF:       vector.ph:
646; UNROLL-NO-VF-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2
647; UNROLL-NO-VF-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
648; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
649; UNROLL-NO-VF:       vector.body:
650; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
651; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
652; UNROLL-NO-VF-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ poison, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
653; UNROLL-NO-VF-NEXT:    [[VEC_PHI2:%.*]] = phi i32 [ poison, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
654; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
655; UNROLL-NO-VF-NEXT:    [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1
656; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDUCTION]]
657; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDUCTION1]]
658; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 4
659; UNROLL-NO-VF-NEXT:    [[TMP6]] = load i32, i32* [[TMP4]], align 4
660; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = sub nsw i32 [[TMP5]], [[VECTOR_RECUR]]
661; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = sub nsw i32 [[TMP6]], [[TMP5]]
662; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP7]], 0
663; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], 0
664; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = select i1 [[TMP9]], i32 [[TMP7]], i32 0
665; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 0
666; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = icmp slt i32 [[VEC_PHI]], [[TMP11]]
667; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = icmp slt i32 [[VEC_PHI2]], [[TMP12]]
668; UNROLL-NO-VF-NEXT:    [[TMP15]] = select i1 [[TMP13]], i32 [[VEC_PHI]], i32 [[TMP11]]
669; UNROLL-NO-VF-NEXT:    [[TMP16]] = select i1 [[TMP14]], i32 [[VEC_PHI2]], i32 [[TMP12]]
670; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
671; UNROLL-NO-VF-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
672; UNROLL-NO-VF-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
673; UNROLL-NO-VF:       middle.block:
674; UNROLL-NO-VF-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp slt i32 [[TMP15]], [[TMP16]]
675; UNROLL-NO-VF-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select i1 [[RDX_MINMAX_CMP]], i32 [[TMP15]], i32 [[TMP16]]
676; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
677; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
678; UNROLL-NO-VF:       scalar.ph:
679; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
680; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
681; UNROLL-NO-VF-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ]
682; UNROLL-NO-VF-NEXT:    br label [[SCALAR_BODY:%.*]]
683; UNROLL-NO-VF:       for.cond.cleanup.loopexit:
684; UNROLL-NO-VF-NEXT:    [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ]
685; UNROLL-NO-VF-NEXT:    br label [[FOR_COND_CLEANUP]]
686; UNROLL-NO-VF:       for.cond.cleanup:
687; UNROLL-NO-VF-NEXT:    [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
688; UNROLL-NO-VF-NEXT:    ret i32 [[MINMAX_0_LCSSA]]
689; UNROLL-NO-VF:       scalar.body:
690; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[SCALAR_BODY]] ]
691; UNROLL-NO-VF-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
692; UNROLL-NO-VF-NEXT:    [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MINMAX_0_COND]], [[SCALAR_BODY]] ]
693; UNROLL-NO-VF-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
694; UNROLL-NO-VF-NEXT:    [[TMP18]] = load i32, i32* [[ARRAYIDX]], align 4
695; UNROLL-NO-VF-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[TMP18]], [[SCALAR_RECUR]]
696; UNROLL-NO-VF-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[SUB3]], 0
697; UNROLL-NO-VF-NEXT:    [[COND:%.*]] = select i1 [[CMP4]], i32 [[SUB3]], i32 0
698; UNROLL-NO-VF-NEXT:    [[CMP5:%.*]] = icmp slt i32 [[MINMAX_028]], [[COND]]
699; UNROLL-NO-VF-NEXT:    [[MINMAX_0_COND]] = select i1 [[CMP5]], i32 [[MINMAX_028]], i32 [[COND]]
700; UNROLL-NO-VF-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
701; UNROLL-NO-VF-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
702; UNROLL-NO-VF-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
703; UNROLL-NO-VF-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
704;
705; SINK-AFTER-LABEL: @recurrence_2(
706; SINK-AFTER-NEXT:  entry:
707; SINK-AFTER-NEXT:    [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0
708; SINK-AFTER-NEXT:    br i1 [[CMP27]], label [[FOR_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
709; SINK-AFTER:       for.preheader:
710; SINK-AFTER-NEXT:    [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 -1
711; SINK-AFTER-NEXT:    [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4
712; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
713; SINK-AFTER-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
714; SINK-AFTER-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
715; SINK-AFTER-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
716; SINK-AFTER-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
717; SINK-AFTER:       vector.ph:
718; SINK-AFTER-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
719; SINK-AFTER-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
720; SINK-AFTER-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
721; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
722; SINK-AFTER:       vector.body:
723; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
724; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
725; SINK-AFTER-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
726; SINK-AFTER-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
727; SINK-AFTER-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]]
728; SINK-AFTER-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
729; SINK-AFTER-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
730; SINK-AFTER-NEXT:    [[WIDE_LOAD]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
731; SINK-AFTER-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
732; SINK-AFTER-NEXT:    [[TMP8:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD]], [[TMP7]]
733; SINK-AFTER-NEXT:    [[TMP9:%.*]] = icmp sgt <4 x i32> [[TMP8]], zeroinitializer
734; SINK-AFTER-NEXT:    [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP8]], <4 x i32> zeroinitializer
735; SINK-AFTER-NEXT:    [[TMP11:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[TMP10]]
736; SINK-AFTER-NEXT:    [[TMP12]] = select <4 x i1> [[TMP11]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP10]]
737; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
738; SINK-AFTER-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
739; SINK-AFTER-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
740; SINK-AFTER:       middle.block:
741; SINK-AFTER-NEXT:    [[TMP14:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP12]])
742; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
743; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3
744; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 2
745; SINK-AFTER-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
746; SINK-AFTER:       scalar.ph:
747; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
748; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
749; SINK-AFTER-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
750; SINK-AFTER-NEXT:    br label [[SCALAR_BODY:%.*]]
751; SINK-AFTER:       for.cond.cleanup.loopexit:
752; SINK-AFTER-NEXT:    [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
753; SINK-AFTER-NEXT:    br label [[FOR_COND_CLEANUP]]
754; SINK-AFTER:       for.cond.cleanup:
755; SINK-AFTER-NEXT:    [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
756; SINK-AFTER-NEXT:    ret i32 [[MINMAX_0_LCSSA]]
757; SINK-AFTER:       scalar.body:
758; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[SCALAR_BODY]] ]
759; SINK-AFTER-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
760; SINK-AFTER-NEXT:    [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MINMAX_0_COND]], [[SCALAR_BODY]] ]
761; SINK-AFTER-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
762; SINK-AFTER-NEXT:    [[TMP15]] = load i32, i32* [[ARRAYIDX]], align 4
763; SINK-AFTER-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[TMP15]], [[SCALAR_RECUR]]
764; SINK-AFTER-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[SUB3]], 0
765; SINK-AFTER-NEXT:    [[COND:%.*]] = select i1 [[CMP4]], i32 [[SUB3]], i32 0
766; SINK-AFTER-NEXT:    [[CMP5:%.*]] = icmp slt i32 [[MINMAX_028]], [[COND]]
767; SINK-AFTER-NEXT:    [[MINMAX_0_COND]] = select i1 [[CMP5]], i32 [[MINMAX_028]], i32 [[COND]]
768; SINK-AFTER-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
769; SINK-AFTER-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
770; SINK-AFTER-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
771; SINK-AFTER-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
772;
773entry:
774  %cmp27 = icmp sgt i32 %n, 0
775  br i1 %cmp27, label %for.preheader, label %for.cond.cleanup
776
777for.preheader:
778  %arrayidx2.phi.trans.insert = getelementptr inbounds i32, i32* %a, i64 -1
779  %.pre = load i32, i32* %arrayidx2.phi.trans.insert, align 4
780  br label %scalar.body
781
782for.cond.cleanup.loopexit:
783  %minmax.0.cond.lcssa = phi i32 [ %minmax.0.cond, %scalar.body ]
784  br label %for.cond.cleanup
785
786for.cond.cleanup:
787  %minmax.0.lcssa = phi i32 [ poison, %entry ], [ %minmax.0.cond.lcssa, %for.cond.cleanup.loopexit ]
788  ret i32 %minmax.0.lcssa
789
790scalar.body:
791  %0 = phi i32 [ %.pre, %for.preheader ], [ %1, %scalar.body ]
792  %indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %scalar.body ]
793  %minmax.028 = phi i32 [ poison, %for.preheader ], [ %minmax.0.cond, %scalar.body ]
794  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
795  %1 = load i32, i32* %arrayidx, align 4
796  %sub3 = sub nsw i32 %1, %0
797  %cmp4 = icmp sgt i32 %sub3, 0
798  %cond = select i1 %cmp4, i32 %sub3, i32 0
799  %cmp5 = icmp slt i32 %minmax.028, %cond
800  %minmax.0.cond = select i1 %cmp5, i32 %minmax.028, i32 %cond
801  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
802  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
803  %exitcond = icmp eq i32 %lftr.wideiv, %n
804  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %scalar.body
805}
806
807; void recurrence_3(short *a, double *b, int n, float f, short p) {
808;   b[0] = (double)a[0] - f * (double)p;
809;   for (int i = 1; i < n; i++)
810;     b[i] = (double)a[i] - f * (double)a[i - 1];
811; }
812;
813; Check also that the casts were not moved needlessly.
814;
815;
816define void @recurrence_3(i16* nocapture readonly %a, double* nocapture %b, i32 %n, float %f, i16 %p) {
817; CHECK-LABEL: @recurrence_3(
818; CHECK-NEXT:  entry:
819; CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
820; CHECK-NEXT:    [[CONV:%.*]] = sitofp i16 [[TMP0]] to double
821; CHECK-NEXT:    [[CONV1:%.*]] = fpext float [[F:%.*]] to double
822; CHECK-NEXT:    [[CONV2:%.*]] = sitofp i16 [[P:%.*]] to double
823; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[CONV2]], [[CONV1]]
824; CHECK-NEXT:    [[SUB:%.*]] = fsub fast double [[CONV]], [[MUL]]
825; CHECK-NEXT:    store double [[SUB]], double* [[B:%.*]], align 8
826; CHECK-NEXT:    [[CMP25:%.*]] = icmp sgt i32 [[N:%.*]], 1
827; CHECK-NEXT:    br i1 [[CMP25]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]]
828; CHECK:       for.preheader:
829; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[N]], -2
830; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
831; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
832; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 3
833; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
834; CHECK:       vector.memcheck:
835; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr double, double* [[B]], i64 1
836; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[N]], -2
837; CHECK-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
838; CHECK-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 2
839; CHECK-NEXT:    [[SCEVGEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP6]]
840; CHECK-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i16, i16* [[A]], i64 1
841; CHECK-NEXT:    [[SCEVGEP6:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP6]]
842; CHECK-NEXT:    [[TMP7:%.*]] = bitcast i16* [[SCEVGEP6]] to double*
843; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult double* [[SCEVGEP]], [[TMP7]]
844; CHECK-NEXT:    [[TMP8:%.*]] = bitcast double* [[SCEVGEP2]] to i16*
845; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult i16* [[SCEVGEP4]], [[TMP8]]
846; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
847; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
848; CHECK:       vector.ph:
849; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP3]], 8589934588
850; CHECK-NEXT:    [[IND_END:%.*]] = or i64 [[N_VEC]], 1
851; CHECK-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 3
852; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i64 0
853; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
854; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
855; CHECK:       vector.body:
856; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
857; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
858; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = or i64 [[INDEX]], 1
859; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[OFFSET_IDX]]
860; CHECK-NEXT:    [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>*
861; CHECK-NEXT:    [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2
862; CHECK-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
863; CHECK-NEXT:    [[TMP12:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double>
864; CHECK-NEXT:    [[TMP13:%.*]] = sitofp <4 x i16> [[TMP11]] to <4 x double>
865; CHECK-NEXT:    [[TMP14:%.*]] = fmul fast <4 x double> [[BROADCAST_SPLAT]], [[TMP13]]
866; CHECK-NEXT:    [[TMP15:%.*]] = fsub fast <4 x double> [[TMP12]], [[TMP14]]
867; CHECK-NEXT:    [[TMP16:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[OFFSET_IDX]]
868; CHECK-NEXT:    [[TMP17:%.*]] = bitcast double* [[TMP16]] to <4 x double>*
869; CHECK-NEXT:    store <4 x double> [[TMP15]], <4 x double>* [[TMP17]], align 8
870; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
871; CHECK-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
872; CHECK-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
873; CHECK:       middle.block:
874; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
875; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3
876; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
877; CHECK:       scalar.ph:
878; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[VECTOR_MEMCHECK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
879; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1, [[VECTOR_MEMCHECK]] ], [ 1, [[FOR_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
880; CHECK-NEXT:    br label [[SCALAR_BODY:%.*]]
881; CHECK:       scalar.body:
882; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP19:%.*]], [[SCALAR_BODY]] ]
883; CHECK-NEXT:    [[ADVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
884; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[ADVARS_IV]]
885; CHECK-NEXT:    [[TMP19]] = load i16, i16* [[ARRAYIDX5]], align 2
886; CHECK-NEXT:    [[CONV6:%.*]] = sitofp i16 [[TMP19]] to double
887; CHECK-NEXT:    [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double
888; CHECK-NEXT:    [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]]
889; CHECK-NEXT:    [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]]
890; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[ADVARS_IV]]
891; CHECK-NEXT:    store double [[SUB13]], double* [[ARRAYIDX15]], align 8
892; CHECK-NEXT:    [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1
893; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32
894; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
895; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
896; CHECK:       for.end.loopexit:
897; CHECK-NEXT:    br label [[FOR_END]]
898; CHECK:       for.end:
899; CHECK-NEXT:    ret void
900;
901; UNROLL-LABEL: @recurrence_3(
902; UNROLL-NEXT:  entry:
903; UNROLL-NEXT:    [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
904; UNROLL-NEXT:    [[CONV:%.*]] = sitofp i16 [[TMP0]] to double
905; UNROLL-NEXT:    [[CONV1:%.*]] = fpext float [[F:%.*]] to double
906; UNROLL-NEXT:    [[CONV2:%.*]] = sitofp i16 [[P:%.*]] to double
907; UNROLL-NEXT:    [[MUL:%.*]] = fmul fast double [[CONV2]], [[CONV1]]
908; UNROLL-NEXT:    [[SUB:%.*]] = fsub fast double [[CONV]], [[MUL]]
909; UNROLL-NEXT:    store double [[SUB]], double* [[B:%.*]], align 8
910; UNROLL-NEXT:    [[CMP25:%.*]] = icmp sgt i32 [[N:%.*]], 1
911; UNROLL-NEXT:    br i1 [[CMP25]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]]
912; UNROLL:       for.preheader:
913; UNROLL-NEXT:    [[TMP1:%.*]] = add i32 [[N]], -2
914; UNROLL-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
915; UNROLL-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
916; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 7
917; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
918; UNROLL:       vector.memcheck:
919; UNROLL-NEXT:    [[SCEVGEP:%.*]] = getelementptr double, double* [[B]], i64 1
920; UNROLL-NEXT:    [[TMP4:%.*]] = add i32 [[N]], -2
921; UNROLL-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
922; UNROLL-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 2
923; UNROLL-NEXT:    [[SCEVGEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP6]]
924; UNROLL-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i16, i16* [[A]], i64 1
925; UNROLL-NEXT:    [[SCEVGEP6:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP6]]
926; UNROLL-NEXT:    [[TMP7:%.*]] = bitcast i16* [[SCEVGEP6]] to double*
927; UNROLL-NEXT:    [[BOUND0:%.*]] = icmp ult double* [[SCEVGEP]], [[TMP7]]
928; UNROLL-NEXT:    [[TMP8:%.*]] = bitcast double* [[SCEVGEP2]] to i16*
929; UNROLL-NEXT:    [[BOUND1:%.*]] = icmp ult i16* [[SCEVGEP4]], [[TMP8]]
930; UNROLL-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
931; UNROLL-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
932; UNROLL:       vector.ph:
933; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP3]], 8589934584
934; UNROLL-NEXT:    [[IND_END:%.*]] = or i64 [[N_VEC]], 1
935; UNROLL-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 3
936; UNROLL-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i64 0
937; UNROLL-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
938; UNROLL-NEXT:    [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i64 0
939; UNROLL-NEXT:    [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT9]], <4 x double> poison, <4 x i32> zeroinitializer
940; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
941; UNROLL:       vector.body:
942; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
943; UNROLL-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD8:%.*]], [[VECTOR_BODY]] ]
944; UNROLL-NEXT:    [[OFFSET_IDX:%.*]] = or i64 [[INDEX]], 1
945; UNROLL-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[OFFSET_IDX]]
946; UNROLL-NEXT:    [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>*
947; UNROLL-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2
948; UNROLL-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i16, i16* [[TMP9]], i64 4
949; UNROLL-NEXT:    [[TMP12:%.*]] = bitcast i16* [[TMP11]] to <4 x i16>*
950; UNROLL-NEXT:    [[WIDE_LOAD8]] = load <4 x i16>, <4 x i16>* [[TMP12]], align 2
951; UNROLL-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
952; UNROLL-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD8]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
953; UNROLL-NEXT:    [[TMP15:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double>
954; UNROLL-NEXT:    [[TMP16:%.*]] = sitofp <4 x i16> [[WIDE_LOAD8]] to <4 x double>
955; UNROLL-NEXT:    [[TMP17:%.*]] = sitofp <4 x i16> [[TMP13]] to <4 x double>
956; UNROLL-NEXT:    [[TMP18:%.*]] = sitofp <4 x i16> [[TMP14]] to <4 x double>
957; UNROLL-NEXT:    [[TMP19:%.*]] = fmul fast <4 x double> [[BROADCAST_SPLAT]], [[TMP17]]
958; UNROLL-NEXT:    [[TMP20:%.*]] = fmul fast <4 x double> [[BROADCAST_SPLAT10]], [[TMP18]]
959; UNROLL-NEXT:    [[TMP21:%.*]] = fsub fast <4 x double> [[TMP15]], [[TMP19]]
960; UNROLL-NEXT:    [[TMP22:%.*]] = fsub fast <4 x double> [[TMP16]], [[TMP20]]
961; UNROLL-NEXT:    [[TMP23:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[OFFSET_IDX]]
962; UNROLL-NEXT:    [[TMP24:%.*]] = bitcast double* [[TMP23]] to <4 x double>*
963; UNROLL-NEXT:    store <4 x double> [[TMP21]], <4 x double>* [[TMP24]], align 8
964; UNROLL-NEXT:    [[TMP25:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 4
965; UNROLL-NEXT:    [[TMP26:%.*]] = bitcast double* [[TMP25]] to <4 x double>*
966; UNROLL-NEXT:    store <4 x double> [[TMP22]], <4 x double>* [[TMP26]], align 8
967; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
968; UNROLL-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
969; UNROLL-NEXT:    br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
970; UNROLL:       middle.block:
971; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
972; UNROLL-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD8]], i64 3
973; UNROLL-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
974; UNROLL:       scalar.ph:
975; UNROLL-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[VECTOR_MEMCHECK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
976; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1, [[VECTOR_MEMCHECK]] ], [ 1, [[FOR_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
977; UNROLL-NEXT:    br label [[SCALAR_BODY:%.*]]
978; UNROLL:       scalar.body:
979; UNROLL-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP28:%.*]], [[SCALAR_BODY]] ]
980; UNROLL-NEXT:    [[ADVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
981; UNROLL-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[ADVARS_IV]]
982; UNROLL-NEXT:    [[TMP28]] = load i16, i16* [[ARRAYIDX5]], align 2
983; UNROLL-NEXT:    [[CONV6:%.*]] = sitofp i16 [[TMP28]] to double
984; UNROLL-NEXT:    [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double
985; UNROLL-NEXT:    [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]]
986; UNROLL-NEXT:    [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]]
987; UNROLL-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[ADVARS_IV]]
988; UNROLL-NEXT:    store double [[SUB13]], double* [[ARRAYIDX15]], align 8
989; UNROLL-NEXT:    [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1
990; UNROLL-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32
991; UNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
992; UNROLL-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
993; UNROLL:       for.end.loopexit:
994; UNROLL-NEXT:    br label [[FOR_END]]
995; UNROLL:       for.end:
996; UNROLL-NEXT:    ret void
997;
998; UNROLL-NO-IC-LABEL: @recurrence_3(
999; UNROLL-NO-IC-NEXT:  entry:
1000; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
1001; UNROLL-NO-IC-NEXT:    [[CONV:%.*]] = sitofp i16 [[TMP0]] to double
1002; UNROLL-NO-IC-NEXT:    [[CONV1:%.*]] = fpext float [[F:%.*]] to double
1003; UNROLL-NO-IC-NEXT:    [[CONV2:%.*]] = sitofp i16 [[P:%.*]] to double
1004; UNROLL-NO-IC-NEXT:    [[MUL:%.*]] = fmul fast double [[CONV2]], [[CONV1]]
1005; UNROLL-NO-IC-NEXT:    [[SUB:%.*]] = fsub fast double [[CONV]], [[MUL]]
1006; UNROLL-NO-IC-NEXT:    store double [[SUB]], double* [[B:%.*]], align 8
1007; UNROLL-NO-IC-NEXT:    [[CMP25:%.*]] = icmp sgt i32 [[N:%.*]], 1
1008; UNROLL-NO-IC-NEXT:    br i1 [[CMP25]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]]
1009; UNROLL-NO-IC:       for.preheader:
1010; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add i32 [[N]], -2
1011; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
1012; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
1013; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
1014; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
1015; UNROLL-NO-IC:       vector.memcheck:
1016; UNROLL-NO-IC-NEXT:    [[SCEVGEP:%.*]] = getelementptr double, double* [[B]], i64 1
1017; UNROLL-NO-IC-NEXT:    [[SCEVGEP1:%.*]] = bitcast double* [[SCEVGEP]] to i8*
1018; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add i32 [[N]], -2
1019; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
1020; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 2
1021; UNROLL-NO-IC-NEXT:    [[SCEVGEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP6]]
1022; UNROLL-NO-IC-NEXT:    [[SCEVGEP23:%.*]] = bitcast double* [[SCEVGEP2]] to i8*
1023; UNROLL-NO-IC-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i16, i16* [[A]], i64 1
1024; UNROLL-NO-IC-NEXT:    [[SCEVGEP45:%.*]] = bitcast i16* [[SCEVGEP4]] to i8*
1025; UNROLL-NO-IC-NEXT:    [[SCEVGEP6:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP6]]
1026; UNROLL-NO-IC-NEXT:    [[SCEVGEP67:%.*]] = bitcast i16* [[SCEVGEP6]] to i8*
1027; UNROLL-NO-IC-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP67]]
1028; UNROLL-NO-IC-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP45]], [[SCEVGEP23]]
1029; UNROLL-NO-IC-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1030; UNROLL-NO-IC-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1031; UNROLL-NO-IC:       vector.ph:
1032; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
1033; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
1034; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = add i64 1, [[N_VEC]]
1035; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 3
1036; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i32 0
1037; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
1038; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT9:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i32 0
1039; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT10:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT9]], <4 x double> poison, <4 x i32> zeroinitializer
1040; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
1041; UNROLL-NO-IC:       vector.body:
1042; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1043; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD8:%.*]], [[VECTOR_BODY]] ]
1044; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
1045; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
1046; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 4
1047; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP7]]
1048; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP8]]
1049; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i16, i16* [[TMP9]], i32 0
1050; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = bitcast i16* [[TMP11]] to <4 x i16>*
1051; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP12]], align 2
1052; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i16, i16* [[TMP9]], i32 4
1053; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = bitcast i16* [[TMP13]] to <4 x i16>*
1054; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD8]] = load <4 x i16>, <4 x i16>* [[TMP14]], align 2
1055; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1056; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD8]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1057; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double>
1058; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = sitofp <4 x i16> [[WIDE_LOAD8]] to <4 x double>
1059; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = sitofp <4 x i16> [[TMP15]] to <4 x double>
1060; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = sitofp <4 x i16> [[TMP16]] to <4 x double>
1061; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = fmul fast <4 x double> [[TMP19]], [[BROADCAST_SPLAT]]
1062; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = fmul fast <4 x double> [[TMP20]], [[BROADCAST_SPLAT10]]
1063; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = fsub fast <4 x double> [[TMP17]], [[TMP21]]
1064; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = fsub fast <4 x double> [[TMP18]], [[TMP22]]
1065; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP7]]
1066; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP8]]
1067; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = getelementptr inbounds double, double* [[TMP25]], i32 0
1068; UNROLL-NO-IC-NEXT:    [[TMP28:%.*]] = bitcast double* [[TMP27]] to <4 x double>*
1069; UNROLL-NO-IC-NEXT:    store <4 x double> [[TMP23]], <4 x double>* [[TMP28]], align 8
1070; UNROLL-NO-IC-NEXT:    [[TMP29:%.*]] = getelementptr inbounds double, double* [[TMP25]], i32 4
1071; UNROLL-NO-IC-NEXT:    [[TMP30:%.*]] = bitcast double* [[TMP29]] to <4 x double>*
1072; UNROLL-NO-IC-NEXT:    store <4 x double> [[TMP24]], <4 x double>* [[TMP30]], align 8
1073; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1074; UNROLL-NO-IC-NEXT:    [[TMP31:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1075; UNROLL-NO-IC-NEXT:    br i1 [[TMP31]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
1076; UNROLL-NO-IC:       middle.block:
1077; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
1078; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD8]], i32 3
1079; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD8]], i32 2
1080; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
1081; UNROLL-NO-IC:       scalar.ph:
1082; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[VECTOR_MEMCHECK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
1083; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ], [ 1, [[VECTOR_MEMCHECK]] ]
1084; UNROLL-NO-IC-NEXT:    br label [[SCALAR_BODY:%.*]]
1085; UNROLL-NO-IC:       scalar.body:
1086; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP32:%.*]], [[SCALAR_BODY]] ]
1087; UNROLL-NO-IC-NEXT:    [[ADVARS_IV:%.*]] = phi i64 [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1088; UNROLL-NO-IC-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[ADVARS_IV]]
1089; UNROLL-NO-IC-NEXT:    [[TMP32]] = load i16, i16* [[ARRAYIDX5]], align 2
1090; UNROLL-NO-IC-NEXT:    [[CONV6:%.*]] = sitofp i16 [[TMP32]] to double
1091; UNROLL-NO-IC-NEXT:    [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double
1092; UNROLL-NO-IC-NEXT:    [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]]
1093; UNROLL-NO-IC-NEXT:    [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]]
1094; UNROLL-NO-IC-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[ADVARS_IV]]
1095; UNROLL-NO-IC-NEXT:    store double [[SUB13]], double* [[ARRAYIDX15]], align 8
1096; UNROLL-NO-IC-NEXT:    [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1
1097; UNROLL-NO-IC-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32
1098; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
1099; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
1100; UNROLL-NO-IC:       for.end.loopexit:
1101; UNROLL-NO-IC-NEXT:    br label [[FOR_END]]
1102; UNROLL-NO-IC:       for.end:
1103; UNROLL-NO-IC-NEXT:    ret void
1104;
1105; UNROLL-NO-VF-LABEL: @recurrence_3(
1106; UNROLL-NO-VF-NEXT:  entry:
1107; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
1108; UNROLL-NO-VF-NEXT:    [[CONV:%.*]] = sitofp i16 [[TMP0]] to double
1109; UNROLL-NO-VF-NEXT:    [[CONV1:%.*]] = fpext float [[F:%.*]] to double
1110; UNROLL-NO-VF-NEXT:    [[CONV2:%.*]] = sitofp i16 [[P:%.*]] to double
1111; UNROLL-NO-VF-NEXT:    [[MUL:%.*]] = fmul fast double [[CONV2]], [[CONV1]]
1112; UNROLL-NO-VF-NEXT:    [[SUB:%.*]] = fsub fast double [[CONV]], [[MUL]]
1113; UNROLL-NO-VF-NEXT:    store double [[SUB]], double* [[B:%.*]], align 8
1114; UNROLL-NO-VF-NEXT:    [[CMP25:%.*]] = icmp sgt i32 [[N:%.*]], 1
1115; UNROLL-NO-VF-NEXT:    br i1 [[CMP25]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]]
1116; UNROLL-NO-VF:       for.preheader:
1117; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = add i32 [[N]], -2
1118; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
1119; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
1120; UNROLL-NO-VF-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2
1121; UNROLL-NO-VF-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
1122; UNROLL-NO-VF:       vector.memcheck:
1123; UNROLL-NO-VF-NEXT:    [[SCEVGEP:%.*]] = getelementptr double, double* [[B]], i64 1
1124; UNROLL-NO-VF-NEXT:    [[SCEVGEP1:%.*]] = bitcast double* [[SCEVGEP]] to i8*
1125; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = add i32 [[N]], -2
1126; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
1127; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 2
1128; UNROLL-NO-VF-NEXT:    [[SCEVGEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP6]]
1129; UNROLL-NO-VF-NEXT:    [[SCEVGEP23:%.*]] = bitcast double* [[SCEVGEP2]] to i8*
1130; UNROLL-NO-VF-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i16, i16* [[A]], i64 1
1131; UNROLL-NO-VF-NEXT:    [[SCEVGEP45:%.*]] = bitcast i16* [[SCEVGEP4]] to i8*
1132; UNROLL-NO-VF-NEXT:    [[SCEVGEP6:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP6]]
1133; UNROLL-NO-VF-NEXT:    [[SCEVGEP67:%.*]] = bitcast i16* [[SCEVGEP6]] to i8*
1134; UNROLL-NO-VF-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP67]]
1135; UNROLL-NO-VF-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP45]], [[SCEVGEP23]]
1136; UNROLL-NO-VF-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1137; UNROLL-NO-VF-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1138; UNROLL-NO-VF:       vector.ph:
1139; UNROLL-NO-VF-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2
1140; UNROLL-NO-VF-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
1141; UNROLL-NO-VF-NEXT:    [[IND_END:%.*]] = add i64 1, [[N_VEC]]
1142; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
1143; UNROLL-NO-VF:       vector.body:
1144; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1145; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i16 [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ]
1146; UNROLL-NO-VF-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
1147; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i64 [[OFFSET_IDX]], 0
1148; UNROLL-NO-VF-NEXT:    [[INDUCTION8:%.*]] = add i64 [[OFFSET_IDX]], 1
1149; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDUCTION]]
1150; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDUCTION8]]
1151; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = load i16, i16* [[TMP7]], align 2
1152; UNROLL-NO-VF-NEXT:    [[TMP10]] = load i16, i16* [[TMP8]], align 2
1153; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = sitofp i16 [[TMP9]] to double
1154; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = sitofp i16 [[TMP10]] to double
1155; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = sitofp i16 [[VECTOR_RECUR]] to double
1156; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = sitofp i16 [[TMP9]] to double
1157; UNROLL-NO-VF-NEXT:    [[TMP15:%.*]] = fmul fast double [[TMP13]], [[CONV1]]
1158; UNROLL-NO-VF-NEXT:    [[TMP16:%.*]] = fmul fast double [[TMP14]], [[CONV1]]
1159; UNROLL-NO-VF-NEXT:    [[TMP17:%.*]] = fsub fast double [[TMP11]], [[TMP15]]
1160; UNROLL-NO-VF-NEXT:    [[TMP18:%.*]] = fsub fast double [[TMP12]], [[TMP16]]
1161; UNROLL-NO-VF-NEXT:    [[TMP19:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDUCTION]]
1162; UNROLL-NO-VF-NEXT:    [[TMP20:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDUCTION8]]
1163; UNROLL-NO-VF-NEXT:    store double [[TMP17]], double* [[TMP19]], align 8
1164; UNROLL-NO-VF-NEXT:    store double [[TMP18]], double* [[TMP20]], align 8
1165; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1166; UNROLL-NO-VF-NEXT:    [[TMP21:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1167; UNROLL-NO-VF-NEXT:    br i1 [[TMP21]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
1168; UNROLL-NO-VF:       middle.block:
1169; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
1170; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
1171; UNROLL-NO-VF:       scalar.ph:
1172; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[VECTOR_MEMCHECK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
1173; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ], [ 1, [[VECTOR_MEMCHECK]] ]
1174; UNROLL-NO-VF-NEXT:    br label [[SCALAR_BODY:%.*]]
1175; UNROLL-NO-VF:       scalar.body:
1176; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP22:%.*]], [[SCALAR_BODY]] ]
1177; UNROLL-NO-VF-NEXT:    [[ADVARS_IV:%.*]] = phi i64 [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1178; UNROLL-NO-VF-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[ADVARS_IV]]
1179; UNROLL-NO-VF-NEXT:    [[TMP22]] = load i16, i16* [[ARRAYIDX5]], align 2
1180; UNROLL-NO-VF-NEXT:    [[CONV6:%.*]] = sitofp i16 [[TMP22]] to double
1181; UNROLL-NO-VF-NEXT:    [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double
1182; UNROLL-NO-VF-NEXT:    [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]]
1183; UNROLL-NO-VF-NEXT:    [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]]
1184; UNROLL-NO-VF-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[ADVARS_IV]]
1185; UNROLL-NO-VF-NEXT:    store double [[SUB13]], double* [[ARRAYIDX15]], align 8
1186; UNROLL-NO-VF-NEXT:    [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1
1187; UNROLL-NO-VF-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32
1188; UNROLL-NO-VF-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
1189; UNROLL-NO-VF-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
1190; UNROLL-NO-VF:       for.end.loopexit:
1191; UNROLL-NO-VF-NEXT:    br label [[FOR_END]]
1192; UNROLL-NO-VF:       for.end:
1193; UNROLL-NO-VF-NEXT:    ret void
1194;
1195; SINK-AFTER-LABEL: @recurrence_3(
1196; SINK-AFTER-NEXT:  entry:
1197; SINK-AFTER-NEXT:    [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
1198; SINK-AFTER-NEXT:    [[CONV:%.*]] = sitofp i16 [[TMP0]] to double
1199; SINK-AFTER-NEXT:    [[CONV1:%.*]] = fpext float [[F:%.*]] to double
1200; SINK-AFTER-NEXT:    [[CONV2:%.*]] = sitofp i16 [[P:%.*]] to double
1201; SINK-AFTER-NEXT:    [[MUL:%.*]] = fmul fast double [[CONV2]], [[CONV1]]
1202; SINK-AFTER-NEXT:    [[SUB:%.*]] = fsub fast double [[CONV]], [[MUL]]
1203; SINK-AFTER-NEXT:    store double [[SUB]], double* [[B:%.*]], align 8
1204; SINK-AFTER-NEXT:    [[CMP25:%.*]] = icmp sgt i32 [[N:%.*]], 1
1205; SINK-AFTER-NEXT:    br i1 [[CMP25]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]]
1206; SINK-AFTER:       for.preheader:
1207; SINK-AFTER-NEXT:    [[TMP1:%.*]] = add i32 [[N]], -2
1208; SINK-AFTER-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
1209; SINK-AFTER-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
1210; SINK-AFTER-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
1211; SINK-AFTER-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
1212; SINK-AFTER:       vector.memcheck:
1213; SINK-AFTER-NEXT:    [[SCEVGEP:%.*]] = getelementptr double, double* [[B]], i64 1
1214; SINK-AFTER-NEXT:    [[SCEVGEP1:%.*]] = bitcast double* [[SCEVGEP]] to i8*
1215; SINK-AFTER-NEXT:    [[TMP4:%.*]] = add i32 [[N]], -2
1216; SINK-AFTER-NEXT:    [[TMP5:%.*]] = zext i32 [[TMP4]] to i64
1217; SINK-AFTER-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 [[TMP5]], 2
1218; SINK-AFTER-NEXT:    [[SCEVGEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP6]]
1219; SINK-AFTER-NEXT:    [[SCEVGEP23:%.*]] = bitcast double* [[SCEVGEP2]] to i8*
1220; SINK-AFTER-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i16, i16* [[A]], i64 1
1221; SINK-AFTER-NEXT:    [[SCEVGEP45:%.*]] = bitcast i16* [[SCEVGEP4]] to i8*
1222; SINK-AFTER-NEXT:    [[SCEVGEP6:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP6]]
1223; SINK-AFTER-NEXT:    [[SCEVGEP67:%.*]] = bitcast i16* [[SCEVGEP6]] to i8*
1224; SINK-AFTER-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP67]]
1225; SINK-AFTER-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP45]], [[SCEVGEP23]]
1226; SINK-AFTER-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
1227; SINK-AFTER-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
1228; SINK-AFTER:       vector.ph:
1229; SINK-AFTER-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
1230; SINK-AFTER-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
1231; SINK-AFTER-NEXT:    [[IND_END:%.*]] = add i64 1, [[N_VEC]]
1232; SINK-AFTER-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 3
1233; SINK-AFTER-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i32 0
1234; SINK-AFTER-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
1235; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
1236; SINK-AFTER:       vector.body:
1237; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1238; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
1239; SINK-AFTER-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
1240; SINK-AFTER-NEXT:    [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 0
1241; SINK-AFTER-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP7]]
1242; SINK-AFTER-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP8]], i32 0
1243; SINK-AFTER-NEXT:    [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>*
1244; SINK-AFTER-NEXT:    [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2
1245; SINK-AFTER-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1246; SINK-AFTER-NEXT:    [[TMP12:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double>
1247; SINK-AFTER-NEXT:    [[TMP13:%.*]] = sitofp <4 x i16> [[TMP11]] to <4 x double>
1248; SINK-AFTER-NEXT:    [[TMP14:%.*]] = fmul fast <4 x double> [[TMP13]], [[BROADCAST_SPLAT]]
1249; SINK-AFTER-NEXT:    [[TMP15:%.*]] = fsub fast <4 x double> [[TMP12]], [[TMP14]]
1250; SINK-AFTER-NEXT:    [[TMP16:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP7]]
1251; SINK-AFTER-NEXT:    [[TMP17:%.*]] = getelementptr inbounds double, double* [[TMP16]], i32 0
1252; SINK-AFTER-NEXT:    [[TMP18:%.*]] = bitcast double* [[TMP17]] to <4 x double>*
1253; SINK-AFTER-NEXT:    store <4 x double> [[TMP15]], <4 x double>* [[TMP18]], align 8
1254; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1255; SINK-AFTER-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1256; SINK-AFTER-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
1257; SINK-AFTER:       middle.block:
1258; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
1259; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
1260; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
1261; SINK-AFTER-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
1262; SINK-AFTER:       scalar.ph:
1263; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[VECTOR_MEMCHECK]] ], [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
1264; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ], [ 1, [[VECTOR_MEMCHECK]] ]
1265; SINK-AFTER-NEXT:    br label [[SCALAR_BODY:%.*]]
1266; SINK-AFTER:       scalar.body:
1267; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP20:%.*]], [[SCALAR_BODY]] ]
1268; SINK-AFTER-NEXT:    [[ADVARS_IV:%.*]] = phi i64 [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1269; SINK-AFTER-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[ADVARS_IV]]
1270; SINK-AFTER-NEXT:    [[TMP20]] = load i16, i16* [[ARRAYIDX5]], align 2
1271; SINK-AFTER-NEXT:    [[CONV6:%.*]] = sitofp i16 [[TMP20]] to double
1272; SINK-AFTER-NEXT:    [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double
1273; SINK-AFTER-NEXT:    [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]]
1274; SINK-AFTER-NEXT:    [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]]
1275; SINK-AFTER-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[ADVARS_IV]]
1276; SINK-AFTER-NEXT:    store double [[SUB13]], double* [[ARRAYIDX15]], align 8
1277; SINK-AFTER-NEXT:    [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1
1278; SINK-AFTER-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32
1279; SINK-AFTER-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
1280; SINK-AFTER-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
1281; SINK-AFTER:       for.end.loopexit:
1282; SINK-AFTER-NEXT:    br label [[FOR_END]]
1283; SINK-AFTER:       for.end:
1284; SINK-AFTER-NEXT:    ret void
1285;
1286entry:
1287  %0 = load i16, i16* %a, align 2
1288  %conv = sitofp i16 %0 to double
1289  %conv1 = fpext float %f to double
1290  %conv2 = sitofp i16 %p to double
1291  %mul = fmul fast double %conv2, %conv1
1292  %sub = fsub fast double %conv, %mul
1293  store double %sub, double* %b, align 8
1294  %cmp25 = icmp sgt i32 %n, 1
1295  br i1 %cmp25, label %for.preheader, label %for.end
1296
1297for.preheader:
1298  br label %scalar.body
1299
1300scalar.body:
1301  %1 = phi i16 [ %0, %for.preheader ], [ %2, %scalar.body ]
1302  %advars.iv = phi i64 [ %advars.iv.next, %scalar.body ], [ 1, %for.preheader ]
1303  %arrayidx5 = getelementptr inbounds i16, i16* %a, i64 %advars.iv
1304  %2 = load i16, i16* %arrayidx5, align 2
1305  %conv6 = sitofp i16 %2 to double
1306  %conv11 = sitofp i16 %1 to double
1307  %mul12 = fmul fast double %conv11, %conv1
1308  %sub13 = fsub fast double %conv6, %mul12
1309  %arrayidx15 = getelementptr inbounds double, double* %b, i64 %advars.iv
1310  store double %sub13, double* %arrayidx15, align 8
1311  %advars.iv.next = add nuw nsw i64 %advars.iv, 1
1312  %lftr.wideiv = trunc i64 %advars.iv.next to i32
1313  %exitcond = icmp eq i32 %lftr.wideiv, %n
1314  br i1 %exitcond, label %for.end.loopexit, label %scalar.body
1315
1316for.end.loopexit:
1317  br label %for.end
1318
1319for.end:
1320  ret void
1321}
1322
1323; void PR26734(short *a, int *b, int *c, int d, short *e) {
1324;   for (; d != 21; d++) {
1325;     *b &= *c;
1326;     *e = *a - 6;
1327;     *c = *e;
1328;   }
1329; }
1330;
1331;
1332define void @PR26734(i16* %a, i32* %b, i32* %c, i32 %d, i16* %e) {
1333; CHECK-LABEL: @PR26734(
1334; CHECK-NEXT:  entry:
1335; CHECK-NEXT:    [[CMP4:%.*]] = icmp eq i32 [[D:%.*]], 21
1336; CHECK-NEXT:    br i1 [[CMP4]], label [[ENTRY_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH:%.*]]
1337; CHECK:       entry.for.end_crit_edge:
1338; CHECK-NEXT:    br label [[FOR_END:%.*]]
1339; CHECK:       for.body.lr.ph:
1340; CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
1341; CHECK-NEXT:    [[SUB:%.*]] = add i16 [[TMP0]], -6
1342; CHECK-NEXT:    [[CONV2:%.*]] = sext i16 [[SUB]] to i32
1343; CHECK-NEXT:    [[C_PROMOTED:%.*]] = load i32, i32* [[C:%.*]], align 4
1344; CHECK-NEXT:    [[B_PROMOTED:%.*]] = load i32, i32* [[B:%.*]], align 4
1345; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
1346; CHECK:       for.body:
1347; CHECK-NEXT:    [[INC7:%.*]] = phi i32 [ [[D]], [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
1348; CHECK-NEXT:    [[AND6:%.*]] = phi i32 [ [[B_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ]
1349; CHECK-NEXT:    [[CONV25:%.*]] = phi i32 [ [[C_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[CONV2]], [[FOR_BODY]] ]
1350; CHECK-NEXT:    [[AND]] = and i32 [[AND6]], [[CONV25]]
1351; CHECK-NEXT:    [[INC]] = add nsw i32 [[INC7]], 1
1352; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[INC]], 21
1353; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
1354; CHECK:       for.cond.for.end_crit_edge:
1355; CHECK-NEXT:    store i32 [[CONV2]], i32* [[C]], align 4
1356; CHECK-NEXT:    store i32 [[AND]], i32* [[B]], align 4
1357; CHECK-NEXT:    store i16 [[SUB]], i16* [[E:%.*]], align 2
1358; CHECK-NEXT:    br label [[FOR_END]]
1359; CHECK:       for.end:
1360; CHECK-NEXT:    ret void
1361;
1362; UNROLL-LABEL: @PR26734(
1363; UNROLL-NEXT:  entry:
1364; UNROLL-NEXT:    [[CMP4:%.*]] = icmp eq i32 [[D:%.*]], 21
1365; UNROLL-NEXT:    br i1 [[CMP4]], label [[ENTRY_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH:%.*]]
1366; UNROLL:       entry.for.end_crit_edge:
1367; UNROLL-NEXT:    br label [[FOR_END:%.*]]
1368; UNROLL:       for.body.lr.ph:
1369; UNROLL-NEXT:    [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
1370; UNROLL-NEXT:    [[SUB:%.*]] = add i16 [[TMP0]], -6
1371; UNROLL-NEXT:    [[CONV2:%.*]] = sext i16 [[SUB]] to i32
1372; UNROLL-NEXT:    [[C_PROMOTED:%.*]] = load i32, i32* [[C:%.*]], align 4
1373; UNROLL-NEXT:    [[B_PROMOTED:%.*]] = load i32, i32* [[B:%.*]], align 4
1374; UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
1375; UNROLL:       for.body:
1376; UNROLL-NEXT:    [[INC7:%.*]] = phi i32 [ [[D]], [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
1377; UNROLL-NEXT:    [[AND6:%.*]] = phi i32 [ [[B_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ]
1378; UNROLL-NEXT:    [[CONV25:%.*]] = phi i32 [ [[C_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[CONV2]], [[FOR_BODY]] ]
1379; UNROLL-NEXT:    [[AND]] = and i32 [[AND6]], [[CONV25]]
1380; UNROLL-NEXT:    [[INC]] = add nsw i32 [[INC7]], 1
1381; UNROLL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[INC]], 21
1382; UNROLL-NEXT:    br i1 [[CMP]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
1383; UNROLL:       for.cond.for.end_crit_edge:
1384; UNROLL-NEXT:    store i32 [[CONV2]], i32* [[C]], align 4
1385; UNROLL-NEXT:    store i32 [[AND]], i32* [[B]], align 4
1386; UNROLL-NEXT:    store i16 [[SUB]], i16* [[E:%.*]], align 2
1387; UNROLL-NEXT:    br label [[FOR_END]]
1388; UNROLL:       for.end:
1389; UNROLL-NEXT:    ret void
1390;
1391; UNROLL-NO-IC-LABEL: @PR26734(
1392; UNROLL-NO-IC-NEXT:  entry:
1393; UNROLL-NO-IC-NEXT:    [[CMP4:%.*]] = icmp eq i32 [[D:%.*]], 21
1394; UNROLL-NO-IC-NEXT:    br i1 [[CMP4]], label [[ENTRY_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH:%.*]]
1395; UNROLL-NO-IC:       entry.for.end_crit_edge:
1396; UNROLL-NO-IC-NEXT:    [[DOTPRE:%.*]] = load i32, i32* [[B:%.*]], align 4
1397; UNROLL-NO-IC-NEXT:    br label [[FOR_END:%.*]]
1398; UNROLL-NO-IC:       for.body.lr.ph:
1399; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
1400; UNROLL-NO-IC-NEXT:    [[SUB:%.*]] = add i16 [[TMP0]], -6
1401; UNROLL-NO-IC-NEXT:    [[CONV2:%.*]] = sext i16 [[SUB]] to i32
1402; UNROLL-NO-IC-NEXT:    [[C_PROMOTED:%.*]] = load i32, i32* [[C:%.*]], align 4
1403; UNROLL-NO-IC-NEXT:    [[B_PROMOTED:%.*]] = load i32, i32* [[B]], align 4
1404; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
1405; UNROLL-NO-IC:       for.body:
1406; UNROLL-NO-IC-NEXT:    [[INC7:%.*]] = phi i32 [ [[D]], [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
1407; UNROLL-NO-IC-NEXT:    [[AND6:%.*]] = phi i32 [ [[B_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ]
1408; UNROLL-NO-IC-NEXT:    [[CONV25:%.*]] = phi i32 [ [[C_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[CONV2]], [[FOR_BODY]] ]
1409; UNROLL-NO-IC-NEXT:    [[AND]] = and i32 [[AND6]], [[CONV25]]
1410; UNROLL-NO-IC-NEXT:    [[INC]] = add nsw i32 [[INC7]], 1
1411; UNROLL-NO-IC-NEXT:    [[CMP:%.*]] = icmp eq i32 [[INC]], 21
1412; UNROLL-NO-IC-NEXT:    br i1 [[CMP]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
1413; UNROLL-NO-IC:       for.cond.for.end_crit_edge:
1414; UNROLL-NO-IC-NEXT:    [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ]
1415; UNROLL-NO-IC-NEXT:    store i32 [[CONV2]], i32* [[C]], align 4
1416; UNROLL-NO-IC-NEXT:    store i32 [[AND_LCSSA]], i32* [[B]], align 4
1417; UNROLL-NO-IC-NEXT:    store i16 [[SUB]], i16* [[E:%.*]], align 2
1418; UNROLL-NO-IC-NEXT:    br label [[FOR_END]]
1419; UNROLL-NO-IC:       for.end:
1420; UNROLL-NO-IC-NEXT:    ret void
1421;
1422; UNROLL-NO-VF-LABEL: @PR26734(
1423; UNROLL-NO-VF-NEXT:  entry:
1424; UNROLL-NO-VF-NEXT:    [[CMP4:%.*]] = icmp eq i32 [[D:%.*]], 21
1425; UNROLL-NO-VF-NEXT:    br i1 [[CMP4]], label [[ENTRY_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH:%.*]]
1426; UNROLL-NO-VF:       entry.for.end_crit_edge:
1427; UNROLL-NO-VF-NEXT:    [[DOTPRE:%.*]] = load i32, i32* [[B:%.*]], align 4
1428; UNROLL-NO-VF-NEXT:    br label [[FOR_END:%.*]]
1429; UNROLL-NO-VF:       for.body.lr.ph:
1430; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
1431; UNROLL-NO-VF-NEXT:    [[SUB:%.*]] = add i16 [[TMP0]], -6
1432; UNROLL-NO-VF-NEXT:    [[CONV2:%.*]] = sext i16 [[SUB]] to i32
1433; UNROLL-NO-VF-NEXT:    [[C_PROMOTED:%.*]] = load i32, i32* [[C:%.*]], align 4
1434; UNROLL-NO-VF-NEXT:    [[B_PROMOTED:%.*]] = load i32, i32* [[B]], align 4
1435; UNROLL-NO-VF-NEXT:    br label [[FOR_BODY:%.*]]
1436; UNROLL-NO-VF:       for.body:
1437; UNROLL-NO-VF-NEXT:    [[INC7:%.*]] = phi i32 [ [[D]], [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
1438; UNROLL-NO-VF-NEXT:    [[AND6:%.*]] = phi i32 [ [[B_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ]
1439; UNROLL-NO-VF-NEXT:    [[CONV25:%.*]] = phi i32 [ [[C_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[CONV2]], [[FOR_BODY]] ]
1440; UNROLL-NO-VF-NEXT:    [[AND]] = and i32 [[AND6]], [[CONV25]]
1441; UNROLL-NO-VF-NEXT:    [[INC]] = add nsw i32 [[INC7]], 1
1442; UNROLL-NO-VF-NEXT:    [[CMP:%.*]] = icmp eq i32 [[INC]], 21
1443; UNROLL-NO-VF-NEXT:    br i1 [[CMP]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
1444; UNROLL-NO-VF:       for.cond.for.end_crit_edge:
1445; UNROLL-NO-VF-NEXT:    [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ]
1446; UNROLL-NO-VF-NEXT:    store i32 [[CONV2]], i32* [[C]], align 4
1447; UNROLL-NO-VF-NEXT:    store i32 [[AND_LCSSA]], i32* [[B]], align 4
1448; UNROLL-NO-VF-NEXT:    store i16 [[SUB]], i16* [[E:%.*]], align 2
1449; UNROLL-NO-VF-NEXT:    br label [[FOR_END]]
1450; UNROLL-NO-VF:       for.end:
1451; UNROLL-NO-VF-NEXT:    ret void
1452;
1453; SINK-AFTER-LABEL: @PR26734(
1454; SINK-AFTER-NEXT:  entry:
1455; SINK-AFTER-NEXT:    [[CMP4:%.*]] = icmp eq i32 [[D:%.*]], 21
1456; SINK-AFTER-NEXT:    br i1 [[CMP4]], label [[ENTRY_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH:%.*]]
1457; SINK-AFTER:       entry.for.end_crit_edge:
1458; SINK-AFTER-NEXT:    [[DOTPRE:%.*]] = load i32, i32* [[B:%.*]], align 4
1459; SINK-AFTER-NEXT:    br label [[FOR_END:%.*]]
1460; SINK-AFTER:       for.body.lr.ph:
1461; SINK-AFTER-NEXT:    [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
1462; SINK-AFTER-NEXT:    [[SUB:%.*]] = add i16 [[TMP0]], -6
1463; SINK-AFTER-NEXT:    [[CONV2:%.*]] = sext i16 [[SUB]] to i32
1464; SINK-AFTER-NEXT:    [[C_PROMOTED:%.*]] = load i32, i32* [[C:%.*]], align 4
1465; SINK-AFTER-NEXT:    [[B_PROMOTED:%.*]] = load i32, i32* [[B]], align 4
1466; SINK-AFTER-NEXT:    br label [[FOR_BODY:%.*]]
1467; SINK-AFTER:       for.body:
1468; SINK-AFTER-NEXT:    [[INC7:%.*]] = phi i32 [ [[D]], [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
1469; SINK-AFTER-NEXT:    [[AND6:%.*]] = phi i32 [ [[B_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ]
1470; SINK-AFTER-NEXT:    [[CONV25:%.*]] = phi i32 [ [[C_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[CONV2]], [[FOR_BODY]] ]
1471; SINK-AFTER-NEXT:    [[AND]] = and i32 [[AND6]], [[CONV25]]
1472; SINK-AFTER-NEXT:    [[INC]] = add nsw i32 [[INC7]], 1
1473; SINK-AFTER-NEXT:    [[CMP:%.*]] = icmp eq i32 [[INC]], 21
1474; SINK-AFTER-NEXT:    br i1 [[CMP]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
1475; SINK-AFTER:       for.cond.for.end_crit_edge:
1476; SINK-AFTER-NEXT:    [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ]
1477; SINK-AFTER-NEXT:    store i32 [[CONV2]], i32* [[C]], align 4
1478; SINK-AFTER-NEXT:    store i32 [[AND_LCSSA]], i32* [[B]], align 4
1479; SINK-AFTER-NEXT:    store i16 [[SUB]], i16* [[E:%.*]], align 2
1480; SINK-AFTER-NEXT:    br label [[FOR_END]]
1481; SINK-AFTER:       for.end:
1482; SINK-AFTER-NEXT:    ret void
1483;
1484entry:
1485  %cmp4 = icmp eq i32 %d, 21
1486  br i1 %cmp4, label %entry.for.end_crit_edge, label %for.body.lr.ph
1487
1488entry.for.end_crit_edge:
1489  %.pre = load i32, i32* %b, align 4
1490  br label %for.end
1491
1492for.body.lr.ph:
1493  %0 = load i16, i16* %a, align 2
1494  %sub = add i16 %0, -6
1495  %conv2 = sext i16 %sub to i32
1496  %c.promoted = load i32, i32* %c, align 4
1497  %b.promoted = load i32, i32* %b, align 4
1498  br label %for.body
1499
1500for.body:
1501  %inc7 = phi i32 [ %d, %for.body.lr.ph ], [ %inc, %for.body ]
1502  %and6 = phi i32 [ %b.promoted, %for.body.lr.ph ], [ %and, %for.body ]
1503  %conv25 = phi i32 [ %c.promoted, %for.body.lr.ph ], [ %conv2, %for.body ]
1504  %and = and i32 %and6, %conv25
1505  %inc = add nsw i32 %inc7, 1
1506  %cmp = icmp eq i32 %inc, 21
1507  br i1 %cmp, label %for.cond.for.end_crit_edge, label %for.body
1508
1509for.cond.for.end_crit_edge:
1510  %and.lcssa = phi i32 [ %and, %for.body ]
1511  store i32 %conv2, i32* %c, align 4
1512  store i32 %and.lcssa, i32* %b, align 4
1513  store i16 %sub, i16* %e, align 2
1514  br label %for.end
1515
1516for.end:
1517  ret void
1518}
1519
1520; int PR27246() {
1521;   unsigned int e, n;
1522;   for (int i = 1; i < 49; ++i) {
1523;     for (int k = i; k > 1; --k)
1524;       e = k;
1525;     n = e;
1526;   }
1527;   return n;
1528; }
1529;
1530;
1531define i32 @PR27246() {
1532; CHECK-LABEL: @PR27246(
1533; CHECK-NEXT:  entry:
1534; CHECK-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
1535; CHECK:       for.cond1.preheader:
1536; CHECK-NEXT:    [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
1537; CHECK-NEXT:    [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1:%.*]], [[FOR_COND_CLEANUP3]] ]
1538; CHECK-NEXT:    br label [[FOR_COND1:%.*]]
1539; CHECK:       for.cond.cleanup:
1540; CHECK-NEXT:    ret i32 [[E_1]]
1541; CHECK:       for.cond1:
1542; CHECK-NEXT:    [[E_1]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ]
1543; CHECK-NEXT:    [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ]
1544; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1
1545; CHECK-NEXT:    [[DEC]] = add nsw i32 [[K_0]], -1
1546; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]]
1547; CHECK:       for.cond.cleanup3:
1548; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_016]], 1
1549; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49
1550; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
1551;
1552; UNROLL-LABEL: @PR27246(
1553; UNROLL-NEXT:  entry:
1554; UNROLL-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
1555; UNROLL:       for.cond1.preheader:
1556; UNROLL-NEXT:    [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
1557; UNROLL-NEXT:    [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1:%.*]], [[FOR_COND_CLEANUP3]] ]
1558; UNROLL-NEXT:    br label [[FOR_COND1:%.*]]
1559; UNROLL:       for.cond.cleanup:
1560; UNROLL-NEXT:    ret i32 [[E_1]]
1561; UNROLL:       for.cond1:
1562; UNROLL-NEXT:    [[E_1]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ]
1563; UNROLL-NEXT:    [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ]
1564; UNROLL-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1
1565; UNROLL-NEXT:    [[DEC]] = add nsw i32 [[K_0]], -1
1566; UNROLL-NEXT:    br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]]
1567; UNROLL:       for.cond.cleanup3:
1568; UNROLL-NEXT:    [[INC]] = add nuw nsw i32 [[I_016]], 1
1569; UNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49
1570; UNROLL-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
1571;
1572; UNROLL-NO-IC-LABEL: @PR27246(
1573; UNROLL-NO-IC-NEXT:  entry:
1574; UNROLL-NO-IC-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
1575; UNROLL-NO-IC:       for.cond1.preheader:
1576; UNROLL-NO-IC-NEXT:    [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
1577; UNROLL-NO-IC-NEXT:    [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1_LCSSA:%.*]], [[FOR_COND_CLEANUP3]] ]
1578; UNROLL-NO-IC-NEXT:    br label [[FOR_COND1:%.*]]
1579; UNROLL-NO-IC:       for.cond.cleanup:
1580; UNROLL-NO-IC-NEXT:    [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ]
1581; UNROLL-NO-IC-NEXT:    ret i32 [[E_1_LCSSA_LCSSA]]
1582; UNROLL-NO-IC:       for.cond1:
1583; UNROLL-NO-IC-NEXT:    [[E_1:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ]
1584; UNROLL-NO-IC-NEXT:    [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ]
1585; UNROLL-NO-IC-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1
1586; UNROLL-NO-IC-NEXT:    [[DEC]] = add nsw i32 [[K_0]], -1
1587; UNROLL-NO-IC-NEXT:    br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]]
1588; UNROLL-NO-IC:       for.cond.cleanup3:
1589; UNROLL-NO-IC-NEXT:    [[E_1_LCSSA]] = phi i32 [ [[E_1]], [[FOR_COND1]] ]
1590; UNROLL-NO-IC-NEXT:    [[INC]] = add nuw nsw i32 [[I_016]], 1
1591; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49
1592; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
1593;
1594; UNROLL-NO-VF-LABEL: @PR27246(
1595; UNROLL-NO-VF-NEXT:  entry:
1596; UNROLL-NO-VF-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
1597; UNROLL-NO-VF:       for.cond1.preheader:
1598; UNROLL-NO-VF-NEXT:    [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
1599; UNROLL-NO-VF-NEXT:    [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1_LCSSA:%.*]], [[FOR_COND_CLEANUP3]] ]
1600; UNROLL-NO-VF-NEXT:    br label [[FOR_COND1:%.*]]
1601; UNROLL-NO-VF:       for.cond.cleanup:
1602; UNROLL-NO-VF-NEXT:    [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ]
1603; UNROLL-NO-VF-NEXT:    ret i32 [[E_1_LCSSA_LCSSA]]
1604; UNROLL-NO-VF:       for.cond1:
1605; UNROLL-NO-VF-NEXT:    [[E_1:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ]
1606; UNROLL-NO-VF-NEXT:    [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ]
1607; UNROLL-NO-VF-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1
1608; UNROLL-NO-VF-NEXT:    [[DEC]] = add nsw i32 [[K_0]], -1
1609; UNROLL-NO-VF-NEXT:    br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]]
1610; UNROLL-NO-VF:       for.cond.cleanup3:
1611; UNROLL-NO-VF-NEXT:    [[E_1_LCSSA]] = phi i32 [ [[E_1]], [[FOR_COND1]] ]
1612; UNROLL-NO-VF-NEXT:    [[INC]] = add nuw nsw i32 [[I_016]], 1
1613; UNROLL-NO-VF-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49
1614; UNROLL-NO-VF-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
1615;
1616; SINK-AFTER-LABEL: @PR27246(
1617; SINK-AFTER-NEXT:  entry:
1618; SINK-AFTER-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
1619; SINK-AFTER:       for.cond1.preheader:
1620; SINK-AFTER-NEXT:    [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
1621; SINK-AFTER-NEXT:    [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1_LCSSA:%.*]], [[FOR_COND_CLEANUP3]] ]
1622; SINK-AFTER-NEXT:    br label [[FOR_COND1:%.*]]
1623; SINK-AFTER:       for.cond.cleanup:
1624; SINK-AFTER-NEXT:    [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ]
1625; SINK-AFTER-NEXT:    ret i32 [[E_1_LCSSA_LCSSA]]
1626; SINK-AFTER:       for.cond1:
1627; SINK-AFTER-NEXT:    [[E_1:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ]
1628; SINK-AFTER-NEXT:    [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ]
1629; SINK-AFTER-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1
1630; SINK-AFTER-NEXT:    [[DEC]] = add nsw i32 [[K_0]], -1
1631; SINK-AFTER-NEXT:    br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]]
1632; SINK-AFTER:       for.cond.cleanup3:
1633; SINK-AFTER-NEXT:    [[E_1_LCSSA]] = phi i32 [ [[E_1]], [[FOR_COND1]] ]
1634; SINK-AFTER-NEXT:    [[INC]] = add nuw nsw i32 [[I_016]], 1
1635; SINK-AFTER-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49
1636; SINK-AFTER-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
1637;
1638entry:
1639  br label %for.cond1.preheader
1640
1641for.cond1.preheader:
1642  %i.016 = phi i32 [ 1, %entry ], [ %inc, %for.cond.cleanup3 ]
1643  %e.015 = phi i32 [ poison, %entry ], [ %e.1.lcssa, %for.cond.cleanup3 ]
1644  br label %for.cond1
1645
1646for.cond.cleanup:
1647  %e.1.lcssa.lcssa = phi i32 [ %e.1.lcssa, %for.cond.cleanup3 ]
1648  ret i32 %e.1.lcssa.lcssa
1649
1650for.cond1:
1651  %e.1 = phi i32 [ %k.0, %for.cond1 ], [ %e.015, %for.cond1.preheader ]
1652  %k.0 = phi i32 [ %dec, %for.cond1 ], [ %i.016, %for.cond1.preheader ]
1653  %cmp2 = icmp sgt i32 %k.0, 1
1654  %dec = add nsw i32 %k.0, -1
1655  br i1 %cmp2, label %for.cond1, label %for.cond.cleanup3
1656
1657for.cond.cleanup3:
1658  %e.1.lcssa = phi i32 [ %e.1, %for.cond1 ]
1659  %inc = add nuw nsw i32 %i.016, 1
1660  %exitcond = icmp eq i32 %inc, 49
1661  br i1 %exitcond, label %for.cond.cleanup, label %for.cond1.preheader
1662}
1663
1664;
1665define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) {
1666; CHECK-LABEL: @PR30183(
1667; CHECK-NEXT:  entry:
1668; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N:%.*]], -2
1669; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
1670; CHECK-NEXT:    [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
1671; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 6
1672; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1673; CHECK:       vector.ph:
1674; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], -4
1675; CHECK-NEXT:    [[IND_END:%.*]] = shl i64 [[N_VEC]], 1
1676; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1677; CHECK:       vector.body:
1678; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1679; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1680; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1681; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
1682; CHECK:       middle.block:
1683; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1684; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1685; CHECK:       scalar.ph:
1686; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1687; CHECK-NEXT:    br label [[SCALAR_BODY:%.*]]
1688; CHECK:       scalar.body:
1689; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
1690; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 2
1691; CHECK-NEXT:    [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
1692; CHECK-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
1693; CHECK:       for.end:
1694; CHECK-NEXT:    ret void
1695;
1696; UNROLL-LABEL: @PR30183(
1697; UNROLL-NEXT:  entry:
1698; UNROLL-NEXT:    [[TMP0:%.*]] = add i64 [[N:%.*]], -2
1699; UNROLL-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
1700; UNROLL-NEXT:    [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
1701; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 14
1702; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1703; UNROLL:       vector.ph:
1704; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], -8
1705; UNROLL-NEXT:    [[IND_END:%.*]] = shl i64 [[N_VEC]], 1
1706; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
1707; UNROLL:       vector.body:
1708; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1709; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1710; UNROLL-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1711; UNROLL-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
1712; UNROLL:       middle.block:
1713; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1714; UNROLL-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1715; UNROLL:       scalar.ph:
1716; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1717; UNROLL-NEXT:    br label [[SCALAR_BODY:%.*]]
1718; UNROLL:       scalar.body:
1719; UNROLL-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
1720; UNROLL-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 2
1721; UNROLL-NEXT:    [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
1722; UNROLL-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
1723; UNROLL:       for.end:
1724; UNROLL-NEXT:    ret void
1725;
1726; UNROLL-NO-IC-LABEL: @PR30183(
1727; UNROLL-NO-IC-NEXT:  entry:
1728; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i64 [[N:%.*]], -2
1729; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
1730; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
1731; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8
1732; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1733; UNROLL-NO-IC:       vector.ph:
1734; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8
1735; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
1736; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = mul i64 [[N_VEC]], 2
1737; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD:%.*]], i32 3
1738; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
1739; UNROLL-NO-IC:       vector.body:
1740; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1741; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[VECTOR_BODY]] ]
1742; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
1743; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
1744; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 2
1745; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 4
1746; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 6
1747; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 8
1748; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 10
1749; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 12
1750; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 14
1751; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = add nuw nsw i64 [[TMP3]], 2
1752; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = add nuw nsw i64 [[TMP4]], 2
1753; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = add nuw nsw i64 [[TMP5]], 2
1754; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = add nuw nsw i64 [[TMP6]], 2
1755; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = add nuw nsw i64 [[TMP7]], 2
1756; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = add nuw nsw i64 [[TMP8]], 2
1757; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = add nuw nsw i64 [[TMP9]], 2
1758; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = add nuw nsw i64 [[TMP10]], 2
1759; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP11]]
1760; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP12]]
1761; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]]
1762; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP14]]
1763; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP15]]
1764; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP16]]
1765; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP17]]
1766; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP18]]
1767; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = load i32, i32* [[TMP19]], align 4
1768; UNROLL-NO-IC-NEXT:    [[TMP28:%.*]] = load i32, i32* [[TMP20]], align 4
1769; UNROLL-NO-IC-NEXT:    [[TMP29:%.*]] = load i32, i32* [[TMP21]], align 4
1770; UNROLL-NO-IC-NEXT:    [[TMP30:%.*]] = load i32, i32* [[TMP22]], align 4
1771; UNROLL-NO-IC-NEXT:    [[TMP31:%.*]] = insertelement <4 x i32> poison, i32 [[TMP27]], i32 0
1772; UNROLL-NO-IC-NEXT:    [[TMP32:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP28]], i32 1
1773; UNROLL-NO-IC-NEXT:    [[TMP33:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP29]], i32 2
1774; UNROLL-NO-IC-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP30]], i32 3
1775; UNROLL-NO-IC-NEXT:    [[TMP35:%.*]] = load i32, i32* [[TMP23]], align 4
1776; UNROLL-NO-IC-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP24]], align 4
1777; UNROLL-NO-IC-NEXT:    [[TMP37:%.*]] = load i32, i32* [[TMP25]], align 4
1778; UNROLL-NO-IC-NEXT:    [[TMP38:%.*]] = load i32, i32* [[TMP26]], align 4
1779; UNROLL-NO-IC-NEXT:    [[TMP39:%.*]] = insertelement <4 x i32> poison, i32 [[TMP35]], i32 0
1780; UNROLL-NO-IC-NEXT:    [[TMP40:%.*]] = insertelement <4 x i32> [[TMP39]], i32 [[TMP36]], i32 1
1781; UNROLL-NO-IC-NEXT:    [[TMP41:%.*]] = insertelement <4 x i32> [[TMP40]], i32 [[TMP37]], i32 2
1782; UNROLL-NO-IC-NEXT:    [[TMP42]] = insertelement <4 x i32> [[TMP41]], i32 [[TMP38]], i32 3
1783; UNROLL-NO-IC-NEXT:    [[TMP43:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP34]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1784; UNROLL-NO-IC-NEXT:    [[TMP44:%.*]] = shufflevector <4 x i32> [[TMP34]], <4 x i32> [[TMP42]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1785; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1786; UNROLL-NO-IC-NEXT:    [[TMP45:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1787; UNROLL-NO-IC-NEXT:    br i1 [[TMP45]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
1788; UNROLL-NO-IC:       middle.block:
1789; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1790; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP42]], i32 3
1791; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP42]], i32 2
1792; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1793; UNROLL-NO-IC:       scalar.ph:
1794; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
1795; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1796; UNROLL-NO-IC-NEXT:    br label [[SCALAR_BODY:%.*]]
1797; UNROLL-NO-IC:       scalar.body:
1798; UNROLL-NO-IC-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
1799; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR2:%.*]], [[SCALAR_BODY]] ]
1800; UNROLL-NO-IC-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 2
1801; UNROLL-NO-IC-NEXT:    [[VAR1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_NEXT]]
1802; UNROLL-NO-IC-NEXT:    [[VAR2]] = load i32, i32* [[VAR1]], align 4
1803; UNROLL-NO-IC-NEXT:    [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
1804; UNROLL-NO-IC-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
1805; UNROLL-NO-IC:       for.end:
1806; UNROLL-NO-IC-NEXT:    ret void
1807;
1808; UNROLL-NO-VF-LABEL: @PR30183(
1809; UNROLL-NO-VF-NEXT:  entry:
1810; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i64 [[N:%.*]], -2
1811; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
1812; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
1813; UNROLL-NO-VF-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2
1814; UNROLL-NO-VF-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1815; UNROLL-NO-VF:       vector.ph:
1816; UNROLL-NO-VF-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2
1817; UNROLL-NO-VF-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
1818; UNROLL-NO-VF-NEXT:    [[IND_END:%.*]] = mul i64 [[N_VEC]], 2
1819; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
1820; UNROLL-NO-VF:       vector.body:
1821; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1822; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ [[PRE_LOAD:%.*]], [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
1823; UNROLL-NO-VF-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
1824; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i64 [[OFFSET_IDX]], 0
1825; UNROLL-NO-VF-NEXT:    [[INDUCTION1:%.*]] = add i64 [[OFFSET_IDX]], 2
1826; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[INDUCTION]], 2
1827; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = add nuw nsw i64 [[INDUCTION1]], 2
1828; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP3]]
1829; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
1830; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
1831; UNROLL-NO-VF-NEXT:    [[TMP8]] = load i32, i32* [[TMP6]], align 4
1832; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1833; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1834; UNROLL-NO-VF-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
1835; UNROLL-NO-VF:       middle.block:
1836; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1837; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1838; UNROLL-NO-VF:       scalar.ph:
1839; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[ENTRY:%.*]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ]
1840; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1841; UNROLL-NO-VF-NEXT:    br label [[SCALAR_BODY:%.*]]
1842; UNROLL-NO-VF:       scalar.body:
1843; UNROLL-NO-VF-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
1844; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR2:%.*]], [[SCALAR_BODY]] ]
1845; UNROLL-NO-VF-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 2
1846; UNROLL-NO-VF-NEXT:    [[VAR1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_NEXT]]
1847; UNROLL-NO-VF-NEXT:    [[VAR2]] = load i32, i32* [[VAR1]], align 4
1848; UNROLL-NO-VF-NEXT:    [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
1849; UNROLL-NO-VF-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
1850; UNROLL-NO-VF:       for.end:
1851; UNROLL-NO-VF-NEXT:    ret void
1852;
1853; SINK-AFTER-LABEL: @PR30183(
1854; SINK-AFTER-NEXT:  entry:
1855; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i64 [[N:%.*]], -2
1856; SINK-AFTER-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
1857; SINK-AFTER-NEXT:    [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
1858; SINK-AFTER-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
1859; SINK-AFTER-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1860; SINK-AFTER:       vector.ph:
1861; SINK-AFTER-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
1862; SINK-AFTER-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
1863; SINK-AFTER-NEXT:    [[IND_END:%.*]] = mul i64 [[N_VEC]], 2
1864; SINK-AFTER-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD:%.*]], i32 3
1865; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
1866; SINK-AFTER:       vector.body:
1867; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1868; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
1869; SINK-AFTER-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
1870; SINK-AFTER-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
1871; SINK-AFTER-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 2
1872; SINK-AFTER-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 4
1873; SINK-AFTER-NEXT:    [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 6
1874; SINK-AFTER-NEXT:    [[TMP7:%.*]] = add nuw nsw i64 [[TMP3]], 2
1875; SINK-AFTER-NEXT:    [[TMP8:%.*]] = add nuw nsw i64 [[TMP4]], 2
1876; SINK-AFTER-NEXT:    [[TMP9:%.*]] = add nuw nsw i64 [[TMP5]], 2
1877; SINK-AFTER-NEXT:    [[TMP10:%.*]] = add nuw nsw i64 [[TMP6]], 2
1878; SINK-AFTER-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP7]]
1879; SINK-AFTER-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP8]]
1880; SINK-AFTER-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP9]]
1881; SINK-AFTER-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP10]]
1882; SINK-AFTER-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP11]], align 4
1883; SINK-AFTER-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP12]], align 4
1884; SINK-AFTER-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP13]], align 4
1885; SINK-AFTER-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP14]], align 4
1886; SINK-AFTER-NEXT:    [[TMP19:%.*]] = insertelement <4 x i32> poison, i32 [[TMP15]], i32 0
1887; SINK-AFTER-NEXT:    [[TMP20:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP16]], i32 1
1888; SINK-AFTER-NEXT:    [[TMP21:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP17]], i32 2
1889; SINK-AFTER-NEXT:    [[TMP22]] = insertelement <4 x i32> [[TMP21]], i32 [[TMP18]], i32 3
1890; SINK-AFTER-NEXT:    [[TMP23:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP22]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1891; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1892; SINK-AFTER-NEXT:    [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1893; SINK-AFTER-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
1894; SINK-AFTER:       middle.block:
1895; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1896; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP22]], i32 3
1897; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP22]], i32 2
1898; SINK-AFTER-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1899; SINK-AFTER:       scalar.ph:
1900; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
1901; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1902; SINK-AFTER-NEXT:    br label [[SCALAR_BODY:%.*]]
1903; SINK-AFTER:       scalar.body:
1904; SINK-AFTER-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
1905; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR2:%.*]], [[SCALAR_BODY]] ]
1906; SINK-AFTER-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 2
1907; SINK-AFTER-NEXT:    [[VAR1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_NEXT]]
1908; SINK-AFTER-NEXT:    [[VAR2]] = load i32, i32* [[VAR1]], align 4
1909; SINK-AFTER-NEXT:    [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
1910; SINK-AFTER-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
1911; SINK-AFTER:       for.end:
1912; SINK-AFTER-NEXT:    ret void
1913;
1914entry:
1915  br label %scalar.body
1916
1917scalar.body:
1918  %i = phi i64 [ 0, %entry ], [ %i.next, %scalar.body ]
1919  %var0 = phi i32 [ %pre_load, %entry ], [ %var2, %scalar.body ]
1920  %i.next = add nuw nsw i64 %i, 2
1921  %var1 = getelementptr inbounds i32, i32* %a, i64 %i.next
1922  %var2 = load i32, i32* %var1
1923  %cond = icmp eq i64 %i.next,%n
1924  br i1 %cond, label %for.end, label %scalar.body
1925
1926for.end:
1927  ret void
1928}
1929
1930;
1931define void @constant_folded_previous_value() {
1932; CHECK-LABEL: @constant_folded_previous_value(
1933; CHECK-NEXT:  entry:
1934; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1935; CHECK:       vector.ph:
1936; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1937; CHECK:       vector.body:
1938; CHECK-NEXT:    br i1 undef, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
1939; CHECK:       middle.block:
1940; CHECK-NEXT:    br i1 undef, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1941; CHECK:       scalar.ph:
1942; CHECK-NEXT:    br label [[SCALAR_BODY:%.*]]
1943; CHECK:       scalar.body:
1944; CHECK-NEXT:    br i1 undef, label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
1945; CHECK:       for.end:
1946; CHECK-NEXT:    ret void
1947;
1948; UNROLL-LABEL: @constant_folded_previous_value(
1949; UNROLL-NEXT:  entry:
1950; UNROLL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1951; UNROLL:       vector.ph:
1952; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
1953; UNROLL:       vector.body:
1954; UNROLL-NEXT:    br i1 undef, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
1955; UNROLL:       middle.block:
1956; UNROLL-NEXT:    br i1 undef, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1957; UNROLL:       scalar.ph:
1958; UNROLL-NEXT:    br label [[SCALAR_BODY:%.*]]
1959; UNROLL:       scalar.body:
1960; UNROLL-NEXT:    br i1 undef, label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
1961; UNROLL:       for.end:
1962; UNROLL-NEXT:    ret void
1963;
1964; UNROLL-NO-IC-LABEL: @constant_folded_previous_value(
1965; UNROLL-NO-IC-NEXT:  entry:
1966; UNROLL-NO-IC-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1967; UNROLL-NO-IC:       vector.ph:
1968; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
1969; UNROLL-NO-IC:       vector.body:
1970; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1971; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ <i64 1, i64 1, i64 1, i64 1>, [[VECTOR_BODY]] ]
1972; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> <i64 1, i64 1, i64 1, i64 1>, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1973; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1974; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef
1975; UNROLL-NO-IC-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
1976; UNROLL-NO-IC:       middle.block:
1977; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 undef, undef
1978; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1979; UNROLL-NO-IC:       scalar.ph:
1980; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 1, [[MIDDLE_BLOCK]] ]
1981; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ undef, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1982; UNROLL-NO-IC-NEXT:    br label [[SCALAR_BODY:%.*]]
1983; UNROLL-NO-IC:       scalar.body:
1984; UNROLL-NO-IC-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
1985; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ]
1986; UNROLL-NO-IC-NEXT:    [[VAR3]] = add i64 0, 1
1987; UNROLL-NO-IC-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1988; UNROLL-NO-IC-NEXT:    [[COND:%.*]] = icmp eq i64 [[I_NEXT]], undef
1989; UNROLL-NO-IC-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
1990; UNROLL-NO-IC:       for.end:
1991; UNROLL-NO-IC-NEXT:    ret void
1992;
1993; UNROLL-NO-VF-LABEL: @constant_folded_previous_value(
1994; UNROLL-NO-VF-NEXT:  entry:
1995; UNROLL-NO-VF-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1996; UNROLL-NO-VF:       vector.ph:
1997; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
1998; UNROLL-NO-VF:       vector.body:
1999; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2000; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
2001; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i64 0, 1
2002; UNROLL-NO-VF-NEXT:    [[TMP1]] = add i64 0, 1
2003; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
2004; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef
2005; UNROLL-NO-VF-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
2006; UNROLL-NO-VF:       middle.block:
2007; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i64 undef, undef
2008; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2009; UNROLL-NO-VF:       scalar.ph:
2010; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ]
2011; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ undef, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2012; UNROLL-NO-VF-NEXT:    br label [[SCALAR_BODY:%.*]]
2013; UNROLL-NO-VF:       scalar.body:
2014; UNROLL-NO-VF-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
2015; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ]
2016; UNROLL-NO-VF-NEXT:    [[VAR3]] = add i64 0, 1
2017; UNROLL-NO-VF-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
2018; UNROLL-NO-VF-NEXT:    [[COND:%.*]] = icmp eq i64 [[I_NEXT]], undef
2019; UNROLL-NO-VF-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
2020; UNROLL-NO-VF:       for.end:
2021; UNROLL-NO-VF-NEXT:    ret void
2022;
2023; SINK-AFTER-LABEL: @constant_folded_previous_value(
2024; SINK-AFTER-NEXT:  entry:
2025; SINK-AFTER-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2026; SINK-AFTER:       vector.ph:
2027; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
2028; SINK-AFTER:       vector.body:
2029; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2030; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ <i64 1, i64 1, i64 1, i64 1>, [[VECTOR_BODY]] ]
2031; SINK-AFTER-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> <i64 1, i64 1, i64 1, i64 1>, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2032; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
2033; SINK-AFTER-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef
2034; SINK-AFTER-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
2035; SINK-AFTER:       middle.block:
2036; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i64 undef, undef
2037; SINK-AFTER-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2038; SINK-AFTER:       scalar.ph:
2039; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 1, [[MIDDLE_BLOCK]] ]
2040; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ undef, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2041; SINK-AFTER-NEXT:    br label [[SCALAR_BODY:%.*]]
2042; SINK-AFTER:       scalar.body:
2043; SINK-AFTER-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
2044; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ]
2045; SINK-AFTER-NEXT:    [[VAR3]] = add i64 0, 1
2046; SINK-AFTER-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
2047; SINK-AFTER-NEXT:    [[COND:%.*]] = icmp eq i64 [[I_NEXT]], undef
2048; SINK-AFTER-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
2049; SINK-AFTER:       for.end:
2050; SINK-AFTER-NEXT:    ret void
2051;
2052entry:
2053  br label %scalar.body
2054
2055scalar.body:
2056  %i = phi i64 [ 0, %entry ], [ %i.next, %scalar.body ]
2057  %var2 = phi i64 [ 0, %entry ], [ %var3, %scalar.body ]
2058  %var3 = add i64 0, 1
2059  %i.next = add nuw nsw i64 %i, 1
2060  %cond = icmp eq i64 %i.next, undef
2061  br i1 %cond, label %for.end, label %scalar.body
2062
2063for.end:
2064  ret void
2065}
2066
2067; We vectorize this first order recurrence, by generating two
2068; extracts for the phi `val.phi` - one at the last index and
2069; another at the second last index. We need these 2 extracts because
2070; the first order recurrence phi is used outside the loop, so we require the phi
2071; itself and not its update (addx).
2072; Check the case when unrolled but not vectorized.
2073define i32 @extract_second_last_iteration(i32* %cval, i32 %x)  {
2074; CHECK-LABEL: @extract_second_last_iteration(
2075; CHECK-NEXT:  entry:
2076; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2077; CHECK:       vector.ph:
2078; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
2079; CHECK:       vector.body:
2080; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2081; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ 2, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
2082; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
2083; CHECK-NEXT:    [[TMP1]] = add i32 [[TMP0]], 4
2084; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
2085; CHECK-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
2086; CHECK:       middle.block:
2087; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = add i32 [[TMP0]], [[X:%.*]]
2088; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
2089; CHECK:       scalar.ph:
2090; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
2091; CHECK:       for.body:
2092; CHECK-NEXT:    br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
2093; CHECK:       for.end:
2094; CHECK-NEXT:    [[VAL_PHI_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
2095; CHECK-NEXT:    ret i32 [[VAL_PHI_LCSSA]]
2096;
2097; UNROLL-LABEL: @extract_second_last_iteration(
2098; UNROLL-NEXT:  entry:
2099; UNROLL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2100; UNROLL:       vector.ph:
2101; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
2102; UNROLL:       vector.body:
2103; UNROLL-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2104; UNROLL-NEXT:    [[TMP0:%.*]] = phi i32 [ 2, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
2105; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
2106; UNROLL-NEXT:    [[TMP1]] = add i32 [[TMP0]], 8
2107; UNROLL-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
2108; UNROLL-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
2109; UNROLL:       middle.block:
2110; UNROLL-NEXT:    [[TMP3:%.*]] = add i32 [[TMP0]], 4
2111; UNROLL-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = add i32 [[TMP3]], [[X:%.*]]
2112; UNROLL-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
2113; UNROLL:       scalar.ph:
2114; UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
2115; UNROLL:       for.body:
2116; UNROLL-NEXT:    br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
2117; UNROLL:       for.end:
2118; UNROLL-NEXT:    [[VAL_PHI_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
2119; UNROLL-NEXT:    ret i32 [[VAL_PHI_LCSSA]]
2120;
2121; UNROLL-NO-IC-LABEL: @extract_second_last_iteration(
2122; UNROLL-NO-IC-NEXT:  entry:
2123; UNROLL-NO-IC-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2124; UNROLL-NO-IC:       vector.ph:
2125; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
2126; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
2127; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
2128; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
2129; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
2130; UNROLL-NO-IC:       vector.body:
2131; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2132; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
2133; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ]
2134; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
2135; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
2136; UNROLL-NO-IC-NEXT:    [[TMP11]] = add <4 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT3]]
2137; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP10]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2138; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i32> [[TMP10]], <4 x i32> [[TMP11]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2139; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
2140; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], <i32 4, i32 4, i32 4, i32 4>
2141; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
2142; UNROLL-NO-IC-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
2143; UNROLL-NO-IC:       middle.block:
2144; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i32 96, 96
2145; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP11]], i32 3
2146; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP11]], i32 2
2147; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2148; UNROLL-NO-IC:       scalar.ph:
2149; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
2150; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2151; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
2152; UNROLL-NO-IC:       for.body:
2153; UNROLL-NO-IC-NEXT:    [[INC_PHI:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
2154; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADDX:%.*]], [[FOR_BODY]] ]
2155; UNROLL-NO-IC-NEXT:    [[INC]] = add i32 [[INC_PHI]], 1
2156; UNROLL-NO-IC-NEXT:    [[BC:%.*]] = zext i32 [[INC_PHI]] to i64
2157; UNROLL-NO-IC-NEXT:    [[ADDX]] = add i32 [[INC_PHI]], [[X]]
2158; UNROLL-NO-IC-NEXT:    [[CMP:%.*]] = icmp eq i32 [[INC_PHI]], 95
2159; UNROLL-NO-IC-NEXT:    br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
2160; UNROLL-NO-IC:       for.end:
2161; UNROLL-NO-IC-NEXT:    [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
2162; UNROLL-NO-IC-NEXT:    ret i32 [[VAL_PHI_LCSSA]]
2163;
2164; UNROLL-NO-VF-LABEL: @extract_second_last_iteration(
2165; UNROLL-NO-VF-NEXT:  entry:
2166; UNROLL-NO-VF-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2167; UNROLL-NO-VF:       vector.ph:
2168; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
2169; UNROLL-NO-VF:       vector.body:
2170; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2171; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
2172; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i32 [[INDEX]], 0
2173; UNROLL-NO-VF-NEXT:    [[INDUCTION1:%.*]] = add i32 [[INDEX]], 1
2174; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = add i32 [[INDUCTION]], [[X:%.*]]
2175; UNROLL-NO-VF-NEXT:    [[TMP3]] = add i32 [[INDUCTION1]], [[X]]
2176; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
2177; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
2178; UNROLL-NO-VF-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
2179; UNROLL-NO-VF:       middle.block:
2180; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i32 96, 96
2181; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2182; UNROLL-NO-VF:       scalar.ph:
2183; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP3]], [[MIDDLE_BLOCK]] ]
2184; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2185; UNROLL-NO-VF-NEXT:    br label [[FOR_BODY:%.*]]
2186; UNROLL-NO-VF:       for.body:
2187; UNROLL-NO-VF-NEXT:    [[INC_PHI:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
2188; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADDX:%.*]], [[FOR_BODY]] ]
2189; UNROLL-NO-VF-NEXT:    [[INC]] = add i32 [[INC_PHI]], 1
2190; UNROLL-NO-VF-NEXT:    [[BC:%.*]] = zext i32 [[INC_PHI]] to i64
2191; UNROLL-NO-VF-NEXT:    [[ADDX]] = add i32 [[INC_PHI]], [[X]]
2192; UNROLL-NO-VF-NEXT:    [[CMP:%.*]] = icmp eq i32 [[INC_PHI]], 95
2193; UNROLL-NO-VF-NEXT:    br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
2194; UNROLL-NO-VF:       for.end:
2195; UNROLL-NO-VF-NEXT:    [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[TMP2]], [[MIDDLE_BLOCK]] ]
2196; UNROLL-NO-VF-NEXT:    ret i32 [[VAL_PHI_LCSSA]]
2197;
2198; SINK-AFTER-LABEL: @extract_second_last_iteration(
2199; SINK-AFTER-NEXT:  entry:
2200; SINK-AFTER-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2201; SINK-AFTER:       vector.ph:
2202; SINK-AFTER-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
2203; SINK-AFTER-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
2204; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
2205; SINK-AFTER:       vector.body:
2206; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2207; SINK-AFTER-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
2208; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
2209; SINK-AFTER-NEXT:    [[TMP5]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
2210; SINK-AFTER-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP5]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2211; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
2212; SINK-AFTER-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
2213; SINK-AFTER-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
2214; SINK-AFTER-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
2215; SINK-AFTER:       middle.block:
2216; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i32 96, 96
2217; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP5]], i32 3
2218; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP5]], i32 2
2219; SINK-AFTER-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2220; SINK-AFTER:       scalar.ph:
2221; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
2222; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2223; SINK-AFTER-NEXT:    br label [[FOR_BODY:%.*]]
2224; SINK-AFTER:       for.body:
2225; SINK-AFTER-NEXT:    [[INC_PHI:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
2226; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADDX:%.*]], [[FOR_BODY]] ]
2227; SINK-AFTER-NEXT:    [[INC]] = add i32 [[INC_PHI]], 1
2228; SINK-AFTER-NEXT:    [[BC:%.*]] = zext i32 [[INC_PHI]] to i64
2229; SINK-AFTER-NEXT:    [[ADDX]] = add i32 [[INC_PHI]], [[X]]
2230; SINK-AFTER-NEXT:    [[CMP:%.*]] = icmp eq i32 [[INC_PHI]], 95
2231; SINK-AFTER-NEXT:    br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
2232; SINK-AFTER:       for.end:
2233; SINK-AFTER-NEXT:    [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
2234; SINK-AFTER-NEXT:    ret i32 [[VAL_PHI_LCSSA]]
2235;
2236entry:
2237  br label %for.body
2238
2239for.body:
2240  %inc.phi = phi i32 [ 0, %entry ], [ %inc, %for.body ]
2241  %val.phi = phi i32 [ 0, %entry ], [ %addx, %for.body ]
2242  %inc = add i32 %inc.phi, 1
2243  %bc = zext i32 %inc.phi to i64
2244  %addx = add i32 %inc.phi, %x
2245  %cmp = icmp eq i32 %inc.phi, 95
2246  br i1 %cmp, label %for.end, label %for.body
2247
2248for.end:
2249  ret i32 %val.phi
2250}
2251
2252; We vectorize this first order recurrence, with a set of insertelements for
2253; each unrolled part. Make sure these insertelements are generated in-order,
2254; because the shuffle of the first order recurrence will be added after the
2255; insertelement of the last part UF - 1, assuming the latter appears after the
2256; insertelements of all other parts.
2257;
2258; int PR33613(double *b, double j, int d) {
2259;   int a = 0;
2260;   for(int i = 0; i < 10240; i++, b+=25) {
2261;     double f = b[d]; // Scalarize to form insertelements
2262;     if (j * f)
2263;       a++;
2264;     j = f;
2265;   }
2266;   return a;
2267; }
2268;
2269;
2270define i32 @PR33613(double* %b, double %j, i32 %d) {
2271; CHECK-LABEL: @PR33613(
2272; CHECK-NEXT:  entry:
2273; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[D:%.*]] to i64
2274; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2275; CHECK:       vector.ph:
2276; CHECK-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x double> poison, double [[J:%.*]], i64 3
2277; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
2278; CHECK:       vector.body:
2279; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2280; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[VECTOR_BODY]] ]
2281; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
2282; CHECK-NEXT:    [[TMP0:%.*]] = mul i64 [[INDEX]], 25
2283; CHECK-NEXT:    [[NEXT_GEP:%.*]] = getelementptr double, double* [[B:%.*]], i64 [[IDXPROM]]
2284; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[INDEX]], 1
2285; CHECK-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 25
2286; CHECK-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
2287; CHECK-NEXT:    [[TMP3:%.*]] = or i64 [[INDEX]], 2
2288; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], 25
2289; CHECK-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
2290; CHECK-NEXT:    [[TMP5:%.*]] = or i64 [[INDEX]], 3
2291; CHECK-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 25
2292; CHECK-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
2293; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP]], i64 [[TMP0]]
2294; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP2]], i64 [[TMP2]]
2295; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP3]], i64 [[TMP4]]
2296; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP4]], i64 [[TMP6]]
2297; CHECK-NEXT:    [[TMP11:%.*]] = load double, double* [[TMP7]], align 8
2298; CHECK-NEXT:    [[TMP12:%.*]] = load double, double* [[TMP8]], align 8
2299; CHECK-NEXT:    [[TMP13:%.*]] = load double, double* [[TMP9]], align 8
2300; CHECK-NEXT:    [[TMP14:%.*]] = load double, double* [[TMP10]], align 8
2301; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <4 x double> poison, double [[TMP11]], i64 0
2302; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x double> [[TMP15]], double [[TMP12]], i64 1
2303; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x double> [[TMP16]], double [[TMP13]], i64 2
2304; CHECK-NEXT:    [[TMP18]] = insertelement <4 x double> [[TMP17]], double [[TMP14]], i64 3
2305; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[TMP17]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2306; CHECK-NEXT:    [[TMP20:%.*]] = fmul <4 x double> [[TMP19]], [[TMP18]]
2307; CHECK-NEXT:    [[TMP21:%.*]] = fcmp une <4 x double> [[TMP20]], zeroinitializer
2308; CHECK-NEXT:    [[TMP22:%.*]] = zext <4 x i1> [[TMP21]] to <4 x i32>
2309; CHECK-NEXT:    [[TMP23]] = add <4 x i32> [[VEC_PHI]], [[TMP22]]
2310; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
2311; CHECK-NEXT:    [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240
2312; CHECK-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
2313; CHECK:       middle.block:
2314; CHECK-NEXT:    [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP23]])
2315; CHECK-NEXT:    br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
2316; CHECK:       scalar.ph:
2317; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
2318; CHECK:       for.cond.cleanup:
2319; CHECK-NEXT:    [[A_1_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP25]], [[MIDDLE_BLOCK]] ]
2320; CHECK-NEXT:    ret i32 [[A_1_LCSSA]]
2321; CHECK:       for.body:
2322; CHECK-NEXT:    br i1 undef, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
2323;
2324; UNROLL-LABEL: @PR33613(
2325; UNROLL-NEXT:  entry:
2326; UNROLL-NEXT:    [[IDXPROM:%.*]] = sext i32 [[D:%.*]] to i64
2327; UNROLL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2328; UNROLL:       vector.ph:
2329; UNROLL-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x double> poison, double [[J:%.*]], i64 3
2330; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
2331; UNROLL:       vector.body:
2332; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2333; UNROLL-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[VECTOR_BODY]] ]
2334; UNROLL-NEXT:    [[VEC_PHI9:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[VECTOR_BODY]] ]
2335; UNROLL-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP38:%.*]], [[VECTOR_BODY]] ]
2336; UNROLL-NEXT:    [[TMP0:%.*]] = mul i64 [[INDEX]], 25
2337; UNROLL-NEXT:    [[NEXT_GEP:%.*]] = getelementptr double, double* [[B:%.*]], i64 [[IDXPROM]]
2338; UNROLL-NEXT:    [[TMP1:%.*]] = or i64 [[INDEX]], 1
2339; UNROLL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 25
2340; UNROLL-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
2341; UNROLL-NEXT:    [[TMP3:%.*]] = or i64 [[INDEX]], 2
2342; UNROLL-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], 25
2343; UNROLL-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
2344; UNROLL-NEXT:    [[TMP5:%.*]] = or i64 [[INDEX]], 3
2345; UNROLL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 25
2346; UNROLL-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
2347; UNROLL-NEXT:    [[TMP7:%.*]] = or i64 [[INDEX]], 4
2348; UNROLL-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 25
2349; UNROLL-NEXT:    [[NEXT_GEP5:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
2350; UNROLL-NEXT:    [[TMP9:%.*]] = or i64 [[INDEX]], 5
2351; UNROLL-NEXT:    [[TMP10:%.*]] = mul i64 [[TMP9]], 25
2352; UNROLL-NEXT:    [[NEXT_GEP6:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
2353; UNROLL-NEXT:    [[TMP11:%.*]] = or i64 [[INDEX]], 6
2354; UNROLL-NEXT:    [[TMP12:%.*]] = mul i64 [[TMP11]], 25
2355; UNROLL-NEXT:    [[NEXT_GEP7:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
2356; UNROLL-NEXT:    [[TMP13:%.*]] = or i64 [[INDEX]], 7
2357; UNROLL-NEXT:    [[TMP14:%.*]] = mul i64 [[TMP13]], 25
2358; UNROLL-NEXT:    [[NEXT_GEP8:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
2359; UNROLL-NEXT:    [[TMP15:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP]], i64 [[TMP0]]
2360; UNROLL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP2]], i64 [[TMP2]]
2361; UNROLL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP3]], i64 [[TMP4]]
2362; UNROLL-NEXT:    [[TMP18:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP4]], i64 [[TMP6]]
2363; UNROLL-NEXT:    [[TMP19:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP5]], i64 [[TMP8]]
2364; UNROLL-NEXT:    [[TMP20:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP6]], i64 [[TMP10]]
2365; UNROLL-NEXT:    [[TMP21:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP7]], i64 [[TMP12]]
2366; UNROLL-NEXT:    [[TMP22:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP8]], i64 [[TMP14]]
2367; UNROLL-NEXT:    [[TMP23:%.*]] = load double, double* [[TMP15]], align 8
2368; UNROLL-NEXT:    [[TMP24:%.*]] = load double, double* [[TMP16]], align 8
2369; UNROLL-NEXT:    [[TMP25:%.*]] = load double, double* [[TMP17]], align 8
2370; UNROLL-NEXT:    [[TMP26:%.*]] = load double, double* [[TMP18]], align 8
2371; UNROLL-NEXT:    [[TMP27:%.*]] = insertelement <4 x double> poison, double [[TMP23]], i64 0
2372; UNROLL-NEXT:    [[TMP28:%.*]] = insertelement <4 x double> [[TMP27]], double [[TMP24]], i64 1
2373; UNROLL-NEXT:    [[TMP29:%.*]] = insertelement <4 x double> [[TMP28]], double [[TMP25]], i64 2
2374; UNROLL-NEXT:    [[TMP30:%.*]] = insertelement <4 x double> [[TMP29]], double [[TMP26]], i64 3
2375; UNROLL-NEXT:    [[TMP31:%.*]] = load double, double* [[TMP19]], align 8
2376; UNROLL-NEXT:    [[TMP32:%.*]] = load double, double* [[TMP20]], align 8
2377; UNROLL-NEXT:    [[TMP33:%.*]] = load double, double* [[TMP21]], align 8
2378; UNROLL-NEXT:    [[TMP34:%.*]] = load double, double* [[TMP22]], align 8
2379; UNROLL-NEXT:    [[TMP35:%.*]] = insertelement <4 x double> poison, double [[TMP31]], i64 0
2380; UNROLL-NEXT:    [[TMP36:%.*]] = insertelement <4 x double> [[TMP35]], double [[TMP32]], i64 1
2381; UNROLL-NEXT:    [[TMP37:%.*]] = insertelement <4 x double> [[TMP36]], double [[TMP33]], i64 2
2382; UNROLL-NEXT:    [[TMP38]] = insertelement <4 x double> [[TMP37]], double [[TMP34]], i64 3
2383; UNROLL-NEXT:    [[TMP39:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[TMP29]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2384; UNROLL-NEXT:    [[TMP40:%.*]] = shufflevector <4 x double> [[TMP30]], <4 x double> [[TMP37]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2385; UNROLL-NEXT:    [[TMP41:%.*]] = fmul <4 x double> [[TMP39]], [[TMP30]]
2386; UNROLL-NEXT:    [[TMP42:%.*]] = fmul <4 x double> [[TMP40]], [[TMP38]]
2387; UNROLL-NEXT:    [[TMP43:%.*]] = fcmp une <4 x double> [[TMP41]], zeroinitializer
2388; UNROLL-NEXT:    [[TMP44:%.*]] = fcmp une <4 x double> [[TMP42]], zeroinitializer
2389; UNROLL-NEXT:    [[TMP45:%.*]] = zext <4 x i1> [[TMP43]] to <4 x i32>
2390; UNROLL-NEXT:    [[TMP46:%.*]] = zext <4 x i1> [[TMP44]] to <4 x i32>
2391; UNROLL-NEXT:    [[TMP47]] = add <4 x i32> [[VEC_PHI]], [[TMP45]]
2392; UNROLL-NEXT:    [[TMP48]] = add <4 x i32> [[VEC_PHI9]], [[TMP46]]
2393; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
2394; UNROLL-NEXT:    [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240
2395; UNROLL-NEXT:    br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
2396; UNROLL:       middle.block:
2397; UNROLL-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP48]], [[TMP47]]
2398; UNROLL-NEXT:    [[TMP50:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
2399; UNROLL-NEXT:    br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
2400; UNROLL:       scalar.ph:
2401; UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
2402; UNROLL:       for.cond.cleanup:
2403; UNROLL-NEXT:    [[A_1_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP50]], [[MIDDLE_BLOCK]] ]
2404; UNROLL-NEXT:    ret i32 [[A_1_LCSSA]]
2405; UNROLL:       for.body:
2406; UNROLL-NEXT:    br i1 undef, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
2407;
2408; UNROLL-NO-IC-LABEL: @PR33613(
2409; UNROLL-NO-IC-NEXT:  entry:
2410; UNROLL-NO-IC-NEXT:    [[IDXPROM:%.*]] = sext i32 [[D:%.*]] to i64
2411; UNROLL-NO-IC-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2412; UNROLL-NO-IC:       vector.ph:
2413; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = getelementptr double, double* [[B:%.*]], i64 256000
2414; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x double> poison, double [[J:%.*]], i32 3
2415; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
2416; UNROLL-NO-IC:       vector.body:
2417; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2418; UNROLL-NO-IC-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP50:%.*]], [[VECTOR_BODY]] ]
2419; UNROLL-NO-IC-NEXT:    [[VEC_PHI9:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP51:%.*]], [[VECTOR_BODY]] ]
2420; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP41:%.*]], [[VECTOR_BODY]] ]
2421; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
2422; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 25
2423; UNROLL-NO-IC-NEXT:    [[NEXT_GEP:%.*]] = getelementptr double, double* [[B]], i64 [[TMP1]]
2424; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 1
2425; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 25
2426; UNROLL-NO-IC-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP3]]
2427; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 2
2428; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 25
2429; UNROLL-NO-IC-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr double, double* [[B]], i64 [[TMP5]]
2430; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 3
2431; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 25
2432; UNROLL-NO-IC-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr double, double* [[B]], i64 [[TMP7]]
2433; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 4
2434; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 25
2435; UNROLL-NO-IC-NEXT:    [[NEXT_GEP5:%.*]] = getelementptr double, double* [[B]], i64 [[TMP9]]
2436; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 5
2437; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = mul i64 [[TMP10]], 25
2438; UNROLL-NO-IC-NEXT:    [[NEXT_GEP6:%.*]] = getelementptr double, double* [[B]], i64 [[TMP11]]
2439; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 6
2440; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = mul i64 [[TMP12]], 25
2441; UNROLL-NO-IC-NEXT:    [[NEXT_GEP7:%.*]] = getelementptr double, double* [[B]], i64 [[TMP13]]
2442; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 7
2443; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = mul i64 [[TMP14]], 25
2444; UNROLL-NO-IC-NEXT:    [[NEXT_GEP8:%.*]] = getelementptr double, double* [[B]], i64 [[TMP15]]
2445; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP]], i64 [[IDXPROM]]
2446; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP2]], i64 [[IDXPROM]]
2447; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP3]], i64 [[IDXPROM]]
2448; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP4]], i64 [[IDXPROM]]
2449; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP5]], i64 [[IDXPROM]]
2450; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP6]], i64 [[IDXPROM]]
2451; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP7]], i64 [[IDXPROM]]
2452; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP8]], i64 [[IDXPROM]]
2453; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = load double, double* [[TMP18]], align 8
2454; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = load double, double* [[TMP19]], align 8
2455; UNROLL-NO-IC-NEXT:    [[TMP28:%.*]] = load double, double* [[TMP20]], align 8
2456; UNROLL-NO-IC-NEXT:    [[TMP29:%.*]] = load double, double* [[TMP21]], align 8
2457; UNROLL-NO-IC-NEXT:    [[TMP30:%.*]] = insertelement <4 x double> poison, double [[TMP26]], i32 0
2458; UNROLL-NO-IC-NEXT:    [[TMP31:%.*]] = insertelement <4 x double> [[TMP30]], double [[TMP27]], i32 1
2459; UNROLL-NO-IC-NEXT:    [[TMP32:%.*]] = insertelement <4 x double> [[TMP31]], double [[TMP28]], i32 2
2460; UNROLL-NO-IC-NEXT:    [[TMP33:%.*]] = insertelement <4 x double> [[TMP32]], double [[TMP29]], i32 3
2461; UNROLL-NO-IC-NEXT:    [[TMP34:%.*]] = load double, double* [[TMP22]], align 8
2462; UNROLL-NO-IC-NEXT:    [[TMP35:%.*]] = load double, double* [[TMP23]], align 8
2463; UNROLL-NO-IC-NEXT:    [[TMP36:%.*]] = load double, double* [[TMP24]], align 8
2464; UNROLL-NO-IC-NEXT:    [[TMP37:%.*]] = load double, double* [[TMP25]], align 8
2465; UNROLL-NO-IC-NEXT:    [[TMP38:%.*]] = insertelement <4 x double> poison, double [[TMP34]], i32 0
2466; UNROLL-NO-IC-NEXT:    [[TMP39:%.*]] = insertelement <4 x double> [[TMP38]], double [[TMP35]], i32 1
2467; UNROLL-NO-IC-NEXT:    [[TMP40:%.*]] = insertelement <4 x double> [[TMP39]], double [[TMP36]], i32 2
2468; UNROLL-NO-IC-NEXT:    [[TMP41]] = insertelement <4 x double> [[TMP40]], double [[TMP37]], i32 3
2469; UNROLL-NO-IC-NEXT:    [[TMP42:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[TMP33]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2470; UNROLL-NO-IC-NEXT:    [[TMP43:%.*]] = shufflevector <4 x double> [[TMP33]], <4 x double> [[TMP41]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2471; UNROLL-NO-IC-NEXT:    [[TMP44:%.*]] = fmul <4 x double> [[TMP42]], [[TMP33]]
2472; UNROLL-NO-IC-NEXT:    [[TMP45:%.*]] = fmul <4 x double> [[TMP43]], [[TMP41]]
2473; UNROLL-NO-IC-NEXT:    [[TMP46:%.*]] = fcmp une <4 x double> [[TMP44]], zeroinitializer
2474; UNROLL-NO-IC-NEXT:    [[TMP47:%.*]] = fcmp une <4 x double> [[TMP45]], zeroinitializer
2475; UNROLL-NO-IC-NEXT:    [[TMP48:%.*]] = zext <4 x i1> [[TMP46]] to <4 x i32>
2476; UNROLL-NO-IC-NEXT:    [[TMP49:%.*]] = zext <4 x i1> [[TMP47]] to <4 x i32>
2477; UNROLL-NO-IC-NEXT:    [[TMP50]] = add <4 x i32> [[VEC_PHI]], [[TMP48]]
2478; UNROLL-NO-IC-NEXT:    [[TMP51]] = add <4 x i32> [[VEC_PHI9]], [[TMP49]]
2479; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
2480; UNROLL-NO-IC-NEXT:    [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240
2481; UNROLL-NO-IC-NEXT:    br i1 [[TMP52]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
2482; UNROLL-NO-IC:       middle.block:
2483; UNROLL-NO-IC-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP51]], [[TMP50]]
2484; UNROLL-NO-IC-NEXT:    [[TMP53:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
2485; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 10240, 10240
2486; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[TMP41]], i32 3
2487; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x double> [[TMP41]], i32 2
2488; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
2489; UNROLL-NO-IC:       scalar.ph:
2490; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[J]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
2491; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi double* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ]
2492; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2493; UNROLL-NO-IC-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP53]], [[MIDDLE_BLOCK]] ]
2494; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
2495; UNROLL-NO-IC:       for.cond.cleanup:
2496; UNROLL-NO-IC-NEXT:    [[A_1_LCSSA:%.*]] = phi i32 [ [[A_1:%.*]], [[FOR_BODY]] ], [ [[TMP53]], [[MIDDLE_BLOCK]] ]
2497; UNROLL-NO-IC-NEXT:    ret i32 [[A_1_LCSSA]]
2498; UNROLL-NO-IC:       for.body:
2499; UNROLL-NO-IC-NEXT:    [[B_ADDR_012:%.*]] = phi double* [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
2500; UNROLL-NO-IC-NEXT:    [[I_011:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ]
2501; UNROLL-NO-IC-NEXT:    [[A_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_1]], [[FOR_BODY]] ]
2502; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP54:%.*]], [[FOR_BODY]] ]
2503; UNROLL-NO-IC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 [[IDXPROM]]
2504; UNROLL-NO-IC-NEXT:    [[TMP54]] = load double, double* [[ARRAYIDX]], align 8
2505; UNROLL-NO-IC-NEXT:    [[MUL:%.*]] = fmul double [[SCALAR_RECUR]], [[TMP54]]
2506; UNROLL-NO-IC-NEXT:    [[TOBOOL:%.*]] = fcmp une double [[MUL]], 0.000000e+00
2507; UNROLL-NO-IC-NEXT:    [[INC:%.*]] = zext i1 [[TOBOOL]] to i32
2508; UNROLL-NO-IC-NEXT:    [[A_1]] = add nsw i32 [[A_010]], [[INC]]
2509; UNROLL-NO-IC-NEXT:    [[INC1]] = add nuw nsw i32 [[I_011]], 1
2510; UNROLL-NO-IC-NEXT:    [[ADD_PTR]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 25
2511; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC1]], 10240
2512; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
2513;
2514; UNROLL-NO-VF-LABEL: @PR33613(
2515; UNROLL-NO-VF-NEXT:  entry:
2516; UNROLL-NO-VF-NEXT:    [[IDXPROM:%.*]] = sext i32 [[D:%.*]] to i64
2517; UNROLL-NO-VF-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2518; UNROLL-NO-VF:       vector.ph:
2519; UNROLL-NO-VF-NEXT:    [[IND_END:%.*]] = getelementptr double, double* [[B:%.*]], i64 256000
2520; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
2521; UNROLL-NO-VF:       vector.body:
2522; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2523; UNROLL-NO-VF-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
2524; UNROLL-NO-VF-NEXT:    [[VEC_PHI4:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
2525; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi double [ [[J:%.*]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
2526; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
2527; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 25
2528; UNROLL-NO-VF-NEXT:    [[NEXT_GEP:%.*]] = getelementptr double, double* [[B]], i64 [[TMP1]]
2529; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 1
2530; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 25
2531; UNROLL-NO-VF-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP3]]
2532; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP]], i64 [[IDXPROM]]
2533; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP2]], i64 [[IDXPROM]]
2534; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = load double, double* [[TMP4]], align 8
2535; UNROLL-NO-VF-NEXT:    [[TMP7]] = load double, double* [[TMP5]], align 8
2536; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = fmul double [[VECTOR_RECUR]], [[TMP6]]
2537; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = fmul double [[TMP6]], [[TMP7]]
2538; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = fcmp une double [[TMP8]], 0.000000e+00
2539; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = fcmp une double [[TMP9]], 0.000000e+00
2540; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = zext i1 [[TMP10]] to i32
2541; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = zext i1 [[TMP11]] to i32
2542; UNROLL-NO-VF-NEXT:    [[TMP14]] = add i32 [[VEC_PHI]], [[TMP12]]
2543; UNROLL-NO-VF-NEXT:    [[TMP15]] = add i32 [[VEC_PHI4]], [[TMP13]]
2544; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
2545; UNROLL-NO-VF-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240
2546; UNROLL-NO-VF-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP23:![0-9]+]]
2547; UNROLL-NO-VF:       middle.block:
2548; UNROLL-NO-VF-NEXT:    [[BIN_RDX:%.*]] = add i32 [[TMP15]], [[TMP14]]
2549; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i64 10240, 10240
2550; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
2551; UNROLL-NO-VF:       scalar.ph:
2552; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[J]], [[ENTRY:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
2553; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi double* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ]
2554; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2555; UNROLL-NO-VF-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
2556; UNROLL-NO-VF-NEXT:    br label [[FOR_BODY:%.*]]
2557; UNROLL-NO-VF:       for.cond.cleanup:
2558; UNROLL-NO-VF-NEXT:    [[A_1_LCSSA:%.*]] = phi i32 [ [[A_1:%.*]], [[FOR_BODY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
2559; UNROLL-NO-VF-NEXT:    ret i32 [[A_1_LCSSA]]
2560; UNROLL-NO-VF:       for.body:
2561; UNROLL-NO-VF-NEXT:    [[B_ADDR_012:%.*]] = phi double* [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
2562; UNROLL-NO-VF-NEXT:    [[I_011:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ]
2563; UNROLL-NO-VF-NEXT:    [[A_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_1]], [[FOR_BODY]] ]
2564; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP17:%.*]], [[FOR_BODY]] ]
2565; UNROLL-NO-VF-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 [[IDXPROM]]
2566; UNROLL-NO-VF-NEXT:    [[TMP17]] = load double, double* [[ARRAYIDX]], align 8
2567; UNROLL-NO-VF-NEXT:    [[MUL:%.*]] = fmul double [[SCALAR_RECUR]], [[TMP17]]
2568; UNROLL-NO-VF-NEXT:    [[TOBOOL:%.*]] = fcmp une double [[MUL]], 0.000000e+00
2569; UNROLL-NO-VF-NEXT:    [[INC:%.*]] = zext i1 [[TOBOOL]] to i32
2570; UNROLL-NO-VF-NEXT:    [[A_1]] = add nsw i32 [[A_010]], [[INC]]
2571; UNROLL-NO-VF-NEXT:    [[INC1]] = add nuw nsw i32 [[I_011]], 1
2572; UNROLL-NO-VF-NEXT:    [[ADD_PTR]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 25
2573; UNROLL-NO-VF-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC1]], 10240
2574; UNROLL-NO-VF-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
2575;
2576; SINK-AFTER-LABEL: @PR33613(
2577; SINK-AFTER-NEXT:  entry:
2578; SINK-AFTER-NEXT:    [[IDXPROM:%.*]] = sext i32 [[D:%.*]] to i64
2579; SINK-AFTER-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2580; SINK-AFTER:       vector.ph:
2581; SINK-AFTER-NEXT:    [[IND_END:%.*]] = getelementptr double, double* [[B:%.*]], i64 256000
2582; SINK-AFTER-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x double> poison, double [[J:%.*]], i32 3
2583; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
2584; SINK-AFTER:       vector.body:
2585; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2586; SINK-AFTER-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP25:%.*]], [[VECTOR_BODY]] ]
2587; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
2588; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
2589; SINK-AFTER-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 25
2590; SINK-AFTER-NEXT:    [[NEXT_GEP:%.*]] = getelementptr double, double* [[B]], i64 [[TMP1]]
2591; SINK-AFTER-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 1
2592; SINK-AFTER-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 25
2593; SINK-AFTER-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP3]]
2594; SINK-AFTER-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 2
2595; SINK-AFTER-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 25
2596; SINK-AFTER-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr double, double* [[B]], i64 [[TMP5]]
2597; SINK-AFTER-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 3
2598; SINK-AFTER-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 25
2599; SINK-AFTER-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr double, double* [[B]], i64 [[TMP7]]
2600; SINK-AFTER-NEXT:    [[TMP9:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP]], i64 [[IDXPROM]]
2601; SINK-AFTER-NEXT:    [[TMP10:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP2]], i64 [[IDXPROM]]
2602; SINK-AFTER-NEXT:    [[TMP11:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP3]], i64 [[IDXPROM]]
2603; SINK-AFTER-NEXT:    [[TMP12:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP4]], i64 [[IDXPROM]]
2604; SINK-AFTER-NEXT:    [[TMP13:%.*]] = load double, double* [[TMP9]], align 8
2605; SINK-AFTER-NEXT:    [[TMP14:%.*]] = load double, double* [[TMP10]], align 8
2606; SINK-AFTER-NEXT:    [[TMP15:%.*]] = load double, double* [[TMP11]], align 8
2607; SINK-AFTER-NEXT:    [[TMP16:%.*]] = load double, double* [[TMP12]], align 8
2608; SINK-AFTER-NEXT:    [[TMP17:%.*]] = insertelement <4 x double> poison, double [[TMP13]], i32 0
2609; SINK-AFTER-NEXT:    [[TMP18:%.*]] = insertelement <4 x double> [[TMP17]], double [[TMP14]], i32 1
2610; SINK-AFTER-NEXT:    [[TMP19:%.*]] = insertelement <4 x double> [[TMP18]], double [[TMP15]], i32 2
2611; SINK-AFTER-NEXT:    [[TMP20]] = insertelement <4 x double> [[TMP19]], double [[TMP16]], i32 3
2612; SINK-AFTER-NEXT:    [[TMP21:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[TMP20]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2613; SINK-AFTER-NEXT:    [[TMP22:%.*]] = fmul <4 x double> [[TMP21]], [[TMP20]]
2614; SINK-AFTER-NEXT:    [[TMP23:%.*]] = fcmp une <4 x double> [[TMP22]], zeroinitializer
2615; SINK-AFTER-NEXT:    [[TMP24:%.*]] = zext <4 x i1> [[TMP23]] to <4 x i32>
2616; SINK-AFTER-NEXT:    [[TMP25]] = add <4 x i32> [[VEC_PHI]], [[TMP24]]
2617; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
2618; SINK-AFTER-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240
2619; SINK-AFTER-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP24:![0-9]+]]
2620; SINK-AFTER:       middle.block:
2621; SINK-AFTER-NEXT:    [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP25]])
2622; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i64 10240, 10240
2623; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[TMP20]], i32 3
2624; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x double> [[TMP20]], i32 2
2625; SINK-AFTER-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
2626; SINK-AFTER:       scalar.ph:
2627; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[J]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
2628; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi double* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ]
2629; SINK-AFTER-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2630; SINK-AFTER-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ]
2631; SINK-AFTER-NEXT:    br label [[FOR_BODY:%.*]]
2632; SINK-AFTER:       for.cond.cleanup:
2633; SINK-AFTER-NEXT:    [[A_1_LCSSA:%.*]] = phi i32 [ [[A_1:%.*]], [[FOR_BODY]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ]
2634; SINK-AFTER-NEXT:    ret i32 [[A_1_LCSSA]]
2635; SINK-AFTER:       for.body:
2636; SINK-AFTER-NEXT:    [[B_ADDR_012:%.*]] = phi double* [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
2637; SINK-AFTER-NEXT:    [[I_011:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ]
2638; SINK-AFTER-NEXT:    [[A_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_1]], [[FOR_BODY]] ]
2639; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP28:%.*]], [[FOR_BODY]] ]
2640; SINK-AFTER-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 [[IDXPROM]]
2641; SINK-AFTER-NEXT:    [[TMP28]] = load double, double* [[ARRAYIDX]], align 8
2642; SINK-AFTER-NEXT:    [[MUL:%.*]] = fmul double [[SCALAR_RECUR]], [[TMP28]]
2643; SINK-AFTER-NEXT:    [[TOBOOL:%.*]] = fcmp une double [[MUL]], 0.000000e+00
2644; SINK-AFTER-NEXT:    [[INC:%.*]] = zext i1 [[TOBOOL]] to i32
2645; SINK-AFTER-NEXT:    [[A_1]] = add nsw i32 [[A_010]], [[INC]]
2646; SINK-AFTER-NEXT:    [[INC1]] = add nuw nsw i32 [[I_011]], 1
2647; SINK-AFTER-NEXT:    [[ADD_PTR]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 25
2648; SINK-AFTER-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC1]], 10240
2649; SINK-AFTER-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP25:![0-9]+]]
2650;
2651entry:
2652  %idxprom = sext i32 %d to i64
2653  br label %for.body
2654
2655for.cond.cleanup:
2656  %a.1.lcssa = phi i32 [ %a.1, %for.body ]
2657  ret i32 %a.1.lcssa
2658
2659for.body:
2660  %b.addr.012 = phi double* [ %b, %entry ], [ %add.ptr, %for.body ]
2661  %i.011 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
2662  %a.010 = phi i32 [ 0, %entry ], [ %a.1, %for.body ]
2663  %j.addr.09 = phi double [ %j, %entry ], [ %0, %for.body ]
2664  %arrayidx = getelementptr inbounds double, double* %b.addr.012, i64 %idxprom
2665  %0 = load double, double* %arrayidx, align 8
2666  %mul = fmul double %j.addr.09, %0
2667  %tobool = fcmp une double %mul, 0.000000e+00
2668  %inc = zext i1 %tobool to i32
2669  %a.1 = add nsw i32 %a.010, %inc
2670  %inc1 = add nuw nsw i32 %i.011, 1
2671  %add.ptr = getelementptr inbounds double, double* %b.addr.012, i64 25
2672  %exitcond = icmp eq i32 %inc1, 10240
2673  br i1 %exitcond, label %for.cond.cleanup, label %for.body
2674}
2675
2676; void sink_after(short *a, int n, int *b) {
2677;   for(int i = 0; i < n; i++)
2678;     b[i] = (a[i] * a[i + 1]);
2679; }
2680;
2681; Check that the sext sank after the load in the vector loop.
2682;
2683define void @sink_after(i16* %a, i32* %b, i64 %n) {
2684; CHECK-LABEL: @sink_after(
2685; CHECK-NEXT:  entry:
2686; CHECK-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
2687; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
2688; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
2689; CHECK:       vector.memcheck:
2690; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[N]]
2691; CHECK-NEXT:    [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1
2692; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], 1
2693; CHECK-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]]
2694; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[SCEVGEP5]] to i32*
2695; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ugt i32* [[TMP1]], [[B]]
2696; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[SCEVGEP]] to i16*
2697; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult i16* [[SCEVGEP3]], [[TMP2]]
2698; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
2699; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
2700; CHECK:       vector.ph:
2701; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[N]], -4
2702; CHECK-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i64 3
2703; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
2704; CHECK:       vector.body:
2705; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2706; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
2707; CHECK-NEXT:    [[TMP3:%.*]] = or i64 [[INDEX]], 1
2708; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP3]]
2709; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>*
2710; CHECK-NEXT:    [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2
2711; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2712; CHECK-NEXT:    [[TMP7:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32>
2713; CHECK-NEXT:    [[TMP8:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
2714; CHECK-NEXT:    [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP7]]
2715; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
2716; CHECK-NEXT:    [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>*
2717; CHECK-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* [[TMP11]], align 4
2718; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
2719; CHECK-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2720; CHECK-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
2721; CHECK:       middle.block:
2722; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
2723; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3
2724; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2725; CHECK:       scalar.ph:
2726; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
2727; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
2728; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
2729; CHECK:       for.body:
2730; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ]
2731; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
2732; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
2733; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
2734; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
2735; CHECK-NEXT:    [[TMP13]] = load i16, i16* [[ARRAYIDX2]], align 2
2736; CHECK-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP13]] to i32
2737; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
2738; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
2739; CHECK-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
2740; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
2741; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
2742; CHECK:       for.end:
2743; CHECK-NEXT:    ret void
2744;
2745; UNROLL-LABEL: @sink_after(
2746; UNROLL-NEXT:  entry:
2747; UNROLL-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
2748; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
2749; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
2750; UNROLL:       vector.memcheck:
2751; UNROLL-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[N]]
2752; UNROLL-NEXT:    [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1
2753; UNROLL-NEXT:    [[TMP0:%.*]] = add i64 [[N]], 1
2754; UNROLL-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]]
2755; UNROLL-NEXT:    [[TMP1:%.*]] = bitcast i16* [[SCEVGEP5]] to i32*
2756; UNROLL-NEXT:    [[BOUND0:%.*]] = icmp ugt i32* [[TMP1]], [[B]]
2757; UNROLL-NEXT:    [[TMP2:%.*]] = bitcast i32* [[SCEVGEP]] to i16*
2758; UNROLL-NEXT:    [[BOUND1:%.*]] = icmp ult i16* [[SCEVGEP3]], [[TMP2]]
2759; UNROLL-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
2760; UNROLL-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
2761; UNROLL:       vector.ph:
2762; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[N]], -8
2763; UNROLL-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i64 3
2764; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
2765; UNROLL:       vector.body:
2766; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2767; UNROLL-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD7:%.*]], [[VECTOR_BODY]] ]
2768; UNROLL-NEXT:    [[TMP3:%.*]] = or i64 [[INDEX]], 1
2769; UNROLL-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP3]]
2770; UNROLL-NEXT:    [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>*
2771; UNROLL-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2
2772; UNROLL-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i64 4
2773; UNROLL-NEXT:    [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <4 x i16>*
2774; UNROLL-NEXT:    [[WIDE_LOAD7]] = load <4 x i16>, <4 x i16>* [[TMP7]], align 2
2775; UNROLL-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2776; UNROLL-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD7]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2777; UNROLL-NEXT:    [[TMP10:%.*]] = sext <4 x i16> [[TMP8]] to <4 x i32>
2778; UNROLL-NEXT:    [[TMP11:%.*]] = sext <4 x i16> [[TMP9]] to <4 x i32>
2779; UNROLL-NEXT:    [[TMP12:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
2780; UNROLL-NEXT:    [[TMP13:%.*]] = sext <4 x i16> [[WIDE_LOAD7]] to <4 x i32>
2781; UNROLL-NEXT:    [[TMP14:%.*]] = mul nsw <4 x i32> [[TMP12]], [[TMP10]]
2782; UNROLL-NEXT:    [[TMP15:%.*]] = mul nsw <4 x i32> [[TMP13]], [[TMP11]]
2783; UNROLL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
2784; UNROLL-NEXT:    [[TMP17:%.*]] = bitcast i32* [[TMP16]] to <4 x i32>*
2785; UNROLL-NEXT:    store <4 x i32> [[TMP14]], <4 x i32>* [[TMP17]], align 4
2786; UNROLL-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP16]], i64 4
2787; UNROLL-NEXT:    [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
2788; UNROLL-NEXT:    store <4 x i32> [[TMP15]], <4 x i32>* [[TMP19]], align 4
2789; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
2790; UNROLL-NEXT:    [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2791; UNROLL-NEXT:    br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
2792; UNROLL:       middle.block:
2793; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
2794; UNROLL-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD7]], i64 3
2795; UNROLL-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2796; UNROLL:       scalar.ph:
2797; UNROLL-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
2798; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
2799; UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
2800; UNROLL:       for.body:
2801; UNROLL-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP21:%.*]], [[FOR_BODY]] ]
2802; UNROLL-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
2803; UNROLL-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
2804; UNROLL-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
2805; UNROLL-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
2806; UNROLL-NEXT:    [[TMP21]] = load i16, i16* [[ARRAYIDX2]], align 2
2807; UNROLL-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP21]] to i32
2808; UNROLL-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
2809; UNROLL-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
2810; UNROLL-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
2811; UNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
2812; UNROLL-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
2813; UNROLL:       for.end:
2814; UNROLL-NEXT:    ret void
2815;
2816; UNROLL-NO-IC-LABEL: @sink_after(
2817; UNROLL-NO-IC-NEXT:  entry:
2818; UNROLL-NO-IC-NEXT:    [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
2819; UNROLL-NO-IC-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
2820; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
2821; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
2822; UNROLL-NO-IC:       vector.memcheck:
2823; UNROLL-NO-IC-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]]
2824; UNROLL-NO-IC-NEXT:    [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
2825; UNROLL-NO-IC-NEXT:    [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1
2826; UNROLL-NO-IC-NEXT:    [[SCEVGEP34:%.*]] = bitcast i16* [[SCEVGEP3]] to i8*
2827; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i64 [[N]], 1
2828; UNROLL-NO-IC-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]]
2829; UNROLL-NO-IC-NEXT:    [[SCEVGEP56:%.*]] = bitcast i16* [[SCEVGEP5]] to i8*
2830; UNROLL-NO-IC-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]]
2831; UNROLL-NO-IC-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]]
2832; UNROLL-NO-IC-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
2833; UNROLL-NO-IC-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
2834; UNROLL-NO-IC:       vector.ph:
2835; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
2836; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
2837; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
2838; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
2839; UNROLL-NO-IC:       vector.body:
2840; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2841; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD7:%.*]], [[VECTOR_BODY]] ]
2842; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
2843; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 4
2844; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP1]], 1
2845; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add nuw nsw i64 [[TMP2]], 1
2846; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP3]]
2847; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP4]]
2848; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0
2849; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <4 x i16>*
2850; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP8]], align 2
2851; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 4
2852; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>*
2853; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD7]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2
2854; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2855; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD7]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2856; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = sext <4 x i16> [[TMP11]] to <4 x i32>
2857; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = sext <4 x i16> [[TMP12]] to <4 x i32>
2858; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
2859; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = sext <4 x i16> [[WIDE_LOAD7]] to <4 x i32>
2860; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = mul nsw <4 x i32> [[TMP15]], [[TMP13]]
2861; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = mul nsw <4 x i32> [[TMP16]], [[TMP14]]
2862; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]]
2863; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]]
2864; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 0
2865; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
2866; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP17]], <4 x i32>* [[TMP22]], align 4
2867; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[TMP19]], i32 4
2868; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>*
2869; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP18]], <4 x i32>* [[TMP24]], align 4
2870; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
2871; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2872; UNROLL-NO-IC-NEXT:    br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
2873; UNROLL-NO-IC:       middle.block:
2874; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
2875; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD7]], i32 3
2876; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD7]], i32 2
2877; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2878; UNROLL-NO-IC:       scalar.ph:
2879; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
2880; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ]
2881; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
2882; UNROLL-NO-IC:       for.body:
2883; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP26:%.*]], [[FOR_BODY]] ]
2884; UNROLL-NO-IC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
2885; UNROLL-NO-IC-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
2886; UNROLL-NO-IC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
2887; UNROLL-NO-IC-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
2888; UNROLL-NO-IC-NEXT:    [[TMP26]] = load i16, i16* [[ARRAYIDX2]], align 2
2889; UNROLL-NO-IC-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP26]] to i32
2890; UNROLL-NO-IC-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
2891; UNROLL-NO-IC-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
2892; UNROLL-NO-IC-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
2893; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
2894; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
2895; UNROLL-NO-IC:       for.end:
2896; UNROLL-NO-IC-NEXT:    ret void
2897;
2898; UNROLL-NO-VF-LABEL: @sink_after(
2899; UNROLL-NO-VF-NEXT:  entry:
2900; UNROLL-NO-VF-NEXT:    [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
2901; UNROLL-NO-VF-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
2902; UNROLL-NO-VF-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
2903; UNROLL-NO-VF-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
2904; UNROLL-NO-VF:       vector.memcheck:
2905; UNROLL-NO-VF-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]]
2906; UNROLL-NO-VF-NEXT:    [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
2907; UNROLL-NO-VF-NEXT:    [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1
2908; UNROLL-NO-VF-NEXT:    [[SCEVGEP34:%.*]] = bitcast i16* [[SCEVGEP3]] to i8*
2909; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i64 [[N]], 1
2910; UNROLL-NO-VF-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]]
2911; UNROLL-NO-VF-NEXT:    [[SCEVGEP56:%.*]] = bitcast i16* [[SCEVGEP5]] to i8*
2912; UNROLL-NO-VF-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]]
2913; UNROLL-NO-VF-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]]
2914; UNROLL-NO-VF-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
2915; UNROLL-NO-VF-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
2916; UNROLL-NO-VF:       vector.ph:
2917; UNROLL-NO-VF-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
2918; UNROLL-NO-VF-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
2919; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
2920; UNROLL-NO-VF:       vector.body:
2921; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2922; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
2923; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
2924; UNROLL-NO-VF-NEXT:    [[INDUCTION7:%.*]] = add i64 [[INDEX]], 1
2925; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[INDUCTION]], 1
2926; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[INDUCTION7]], 1
2927; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP1]]
2928; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP2]]
2929; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = load i16, i16* [[TMP3]], align 2
2930; UNROLL-NO-VF-NEXT:    [[TMP6]] = load i16, i16* [[TMP4]], align 2
2931; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = sext i16 [[VECTOR_RECUR]] to i32
2932; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = sext i16 [[TMP5]] to i32
2933; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = sext i16 [[TMP5]] to i32
2934; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = sext i16 [[TMP6]] to i32
2935; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = mul nsw i32 [[TMP9]], [[TMP7]]
2936; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = mul nsw i32 [[TMP10]], [[TMP8]]
2937; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION]]
2938; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION7]]
2939; UNROLL-NO-VF-NEXT:    store i32 [[TMP11]], i32* [[TMP13]], align 4
2940; UNROLL-NO-VF-NEXT:    store i32 [[TMP12]], i32* [[TMP14]], align 4
2941; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
2942; UNROLL-NO-VF-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2943; UNROLL-NO-VF-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
2944; UNROLL-NO-VF:       middle.block:
2945; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
2946; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2947; UNROLL-NO-VF:       scalar.ph:
2948; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
2949; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ]
2950; UNROLL-NO-VF-NEXT:    br label [[FOR_BODY:%.*]]
2951; UNROLL-NO-VF:       for.body:
2952; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP16:%.*]], [[FOR_BODY]] ]
2953; UNROLL-NO-VF-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
2954; UNROLL-NO-VF-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
2955; UNROLL-NO-VF-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
2956; UNROLL-NO-VF-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
2957; UNROLL-NO-VF-NEXT:    [[TMP16]] = load i16, i16* [[ARRAYIDX2]], align 2
2958; UNROLL-NO-VF-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP16]] to i32
2959; UNROLL-NO-VF-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
2960; UNROLL-NO-VF-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
2961; UNROLL-NO-VF-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
2962; UNROLL-NO-VF-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
2963; UNROLL-NO-VF-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
2964; UNROLL-NO-VF:       for.end:
2965; UNROLL-NO-VF-NEXT:    ret void
2966;
2967; SINK-AFTER-LABEL: @sink_after(
2968; SINK-AFTER-NEXT:  entry:
2969; SINK-AFTER-NEXT:    [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
2970; SINK-AFTER-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
2971; SINK-AFTER-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
2972; SINK-AFTER-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
2973; SINK-AFTER:       vector.memcheck:
2974; SINK-AFTER-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]]
2975; SINK-AFTER-NEXT:    [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
2976; SINK-AFTER-NEXT:    [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1
2977; SINK-AFTER-NEXT:    [[SCEVGEP34:%.*]] = bitcast i16* [[SCEVGEP3]] to i8*
2978; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i64 [[N]], 1
2979; SINK-AFTER-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]]
2980; SINK-AFTER-NEXT:    [[SCEVGEP56:%.*]] = bitcast i16* [[SCEVGEP5]] to i8*
2981; SINK-AFTER-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]]
2982; SINK-AFTER-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]]
2983; SINK-AFTER-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
2984; SINK-AFTER-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
2985; SINK-AFTER:       vector.ph:
2986; SINK-AFTER-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
2987; SINK-AFTER-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
2988; SINK-AFTER-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
2989; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
2990; SINK-AFTER:       vector.body:
2991; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2992; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
2993; SINK-AFTER-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
2994; SINK-AFTER-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
2995; SINK-AFTER-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP2]]
2996; SINK-AFTER-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 0
2997; SINK-AFTER-NEXT:    [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>*
2998; SINK-AFTER-NEXT:    [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2
2999; SINK-AFTER-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3000; SINK-AFTER-NEXT:    [[TMP7:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32>
3001; SINK-AFTER-NEXT:    [[TMP8:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
3002; SINK-AFTER-NEXT:    [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP7]]
3003; SINK-AFTER-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]]
3004; SINK-AFTER-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 0
3005; SINK-AFTER-NEXT:    [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
3006; SINK-AFTER-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* [[TMP12]], align 4
3007; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
3008; SINK-AFTER-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3009; SINK-AFTER-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP31:![0-9]+]]
3010; SINK-AFTER:       middle.block:
3011; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
3012; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
3013; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
3014; SINK-AFTER-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
3015; SINK-AFTER:       scalar.ph:
3016; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
3017; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ]
3018; SINK-AFTER-NEXT:    br label [[FOR_BODY:%.*]]
3019; SINK-AFTER:       for.body:
3020; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP14:%.*]], [[FOR_BODY]] ]
3021; SINK-AFTER-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
3022; SINK-AFTER-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
3023; SINK-AFTER-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
3024; SINK-AFTER-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
3025; SINK-AFTER-NEXT:    [[TMP14]] = load i16, i16* [[ARRAYIDX2]], align 2
3026; SINK-AFTER-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP14]] to i32
3027; SINK-AFTER-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
3028; SINK-AFTER-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
3029; SINK-AFTER-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
3030; SINK-AFTER-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
3031; SINK-AFTER-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP32:![0-9]+]]
3032; SINK-AFTER:       for.end:
3033; SINK-AFTER-NEXT:    ret void
3034;
3035entry:
3036  %.pre = load i16, i16* %a
3037  br label %for.body
3038
3039for.body:
3040  %0 = phi i16 [ %.pre, %entry ], [ %1, %for.body ]
3041  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
3042  %conv = sext i16 %0 to i32
3043  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
3044  %arrayidx2 = getelementptr inbounds i16, i16* %a, i64 %indvars.iv.next
3045  %1 = load i16, i16* %arrayidx2
3046  %conv3 = sext i16 %1 to i32
3047  %mul = mul nsw i32 %conv3, %conv
3048  %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
3049  store i32 %mul, i32* %arrayidx5
3050  %exitcond = icmp eq i64 %indvars.iv.next, %n
3051  br i1 %exitcond, label %for.end, label %for.body
3052
3053for.end:
3054  ret void
3055}
3056
3057; PR34711: given three consecutive instructions such that the first will be
3058; widened, the second is a cast that will be widened and needs to sink after the
3059; third, and the third is a first-order-recurring load that will be replicated
3060; instead of widened. Although the cast and the first instruction will both be
3061; widened, and are originally adjacent to each other, make sure the replicated
3062; load ends up appearing between them.
3063;
3064; void PR34711(short[2] *a, int *b, int *c, int n) {
3065;   for(int i = 0; i < n; i++) {
3066;     c[i] = 7;
3067;     b[i] = (a[i][0] * a[i][1]);
3068;   }
3069; }
3070;
3071; Check that the sext sank after the load in the vector loop.
3072;
3073define void @PR34711([2 x i16]* %a, i32* %b, i32* %c, i64 %n) {
3074; CHECK-LABEL: @PR34711(
3075; CHECK-NEXT:  entry:
3076; CHECK-NEXT:    [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0
3077; CHECK-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2
3078; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
3079; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
3080; CHECK:       vector.memcheck:
3081; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[C:%.*]], i64 [[N]]
3082; CHECK-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[N]]
3083; CHECK-NEXT:    [[SCEVGEP6:%.*]] = getelementptr [2 x i16], [2 x i16]* [[A]], i64 0, i64 1
3084; CHECK-NEXT:    [[SCEVGEP8:%.*]] = getelementptr [2 x i16], [2 x i16]* [[A]], i64 [[N]], i64 0
3085; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP4]], [[C]]
3086; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[B]]
3087; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
3088; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[SCEVGEP8]] to i32*
3089; CHECK-NEXT:    [[BOUND010:%.*]] = icmp ugt i32* [[TMP0]], [[C]]
3090; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i32* [[SCEVGEP]] to i16*
3091; CHECK-NEXT:    [[BOUND111:%.*]] = icmp ult i16* [[SCEVGEP6]], [[TMP1]]
3092; CHECK-NEXT:    [[FOUND_CONFLICT12:%.*]] = and i1 [[BOUND010]], [[BOUND111]]
3093; CHECK-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT12]]
3094; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16* [[SCEVGEP8]] to i32*
3095; CHECK-NEXT:    [[BOUND013:%.*]] = icmp ugt i32* [[TMP2]], [[B]]
3096; CHECK-NEXT:    [[TMP3:%.*]] = bitcast i32* [[SCEVGEP4]] to i16*
3097; CHECK-NEXT:    [[BOUND114:%.*]] = icmp ult i16* [[SCEVGEP6]], [[TMP3]]
3098; CHECK-NEXT:    [[FOUND_CONFLICT15:%.*]] = and i1 [[BOUND013]], [[BOUND114]]
3099; CHECK-NEXT:    [[CONFLICT_RDX16:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT15]]
3100; CHECK-NEXT:    br i1 [[CONFLICT_RDX16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
3101; CHECK:       vector.ph:
3102; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[N]], -4
3103; CHECK-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i64 3
3104; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
3105; CHECK:       vector.body:
3106; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3107; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[VECTOR_BODY]] ]
3108; CHECK-NEXT:    [[TMP4:%.*]] = or i64 [[INDEX]], 1
3109; CHECK-NEXT:    [[TMP5:%.*]] = or i64 [[INDEX]], 2
3110; CHECK-NEXT:    [[TMP6:%.*]] = or i64 [[INDEX]], 3
3111; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDEX]]
3112; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDEX]], i64 1
3113; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP4]], i64 1
3114; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP5]], i64 1
3115; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP6]], i64 1
3116; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>*
3117; CHECK-NEXT:    store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, <4 x i32>* [[TMP12]], align 4
3118; CHECK-NEXT:    [[TMP13:%.*]] = load i16, i16* [[TMP8]], align 2
3119; CHECK-NEXT:    [[TMP14:%.*]] = load i16, i16* [[TMP9]], align 2
3120; CHECK-NEXT:    [[TMP15:%.*]] = load i16, i16* [[TMP10]], align 2
3121; CHECK-NEXT:    [[TMP16:%.*]] = load i16, i16* [[TMP11]], align 2
3122; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x i16> poison, i16 [[TMP13]], i64 0
3123; CHECK-NEXT:    [[TMP18:%.*]] = insertelement <4 x i16> [[TMP17]], i16 [[TMP14]], i64 1
3124; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x i16> [[TMP18]], i16 [[TMP15]], i64 2
3125; CHECK-NEXT:    [[TMP20]] = insertelement <4 x i16> [[TMP19]], i16 [[TMP16]], i64 3
3126; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP19]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3127; CHECK-NEXT:    [[TMP22:%.*]] = sext <4 x i16> [[TMP21]] to <4 x i32>
3128; CHECK-NEXT:    [[TMP23:%.*]] = sext <4 x i16> [[TMP20]] to <4 x i32>
3129; CHECK-NEXT:    [[TMP24:%.*]] = mul nsw <4 x i32> [[TMP23]], [[TMP22]]
3130; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
3131; CHECK-NEXT:    [[TMP26:%.*]] = bitcast i32* [[TMP25]] to <4 x i32>*
3132; CHECK-NEXT:    store <4 x i32> [[TMP24]], <4 x i32>* [[TMP26]], align 4
3133; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
3134; CHECK-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3135; CHECK-NEXT:    br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
3136; CHECK:       middle.block:
3137; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
3138; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
3139; CHECK:       scalar.ph:
3140; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP16]], [[MIDDLE_BLOCK]] ]
3141; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
3142; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
3143; CHECK:       for.body:
3144; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP28:%.*]], [[FOR_BODY]] ]
3145; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
3146; CHECK-NEXT:    [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]]
3147; CHECK-NEXT:    [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1
3148; CHECK-NEXT:    store i32 7, i32* [[ARRAYCIDX]], align 4
3149; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
3150; CHECK-NEXT:    [[TMP28]] = load i16, i16* [[CUR_INDEX]], align 2
3151; CHECK-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP28]] to i32
3152; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
3153; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
3154; CHECK-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
3155; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
3156; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
3157; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
3158; CHECK:       for.end:
3159; CHECK-NEXT:    ret void
3160;
3161; UNROLL-LABEL: @PR34711(
3162; UNROLL-NEXT:  entry:
3163; UNROLL-NEXT:    [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0
3164; UNROLL-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2
3165; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
3166; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
3167; UNROLL:       vector.memcheck:
3168; UNROLL-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[C:%.*]], i64 [[N]]
3169; UNROLL-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[N]]
3170; UNROLL-NEXT:    [[SCEVGEP6:%.*]] = getelementptr [2 x i16], [2 x i16]* [[A]], i64 0, i64 1
3171; UNROLL-NEXT:    [[SCEVGEP8:%.*]] = getelementptr [2 x i16], [2 x i16]* [[A]], i64 [[N]], i64 0
3172; UNROLL-NEXT:    [[BOUND0:%.*]] = icmp ugt i32* [[SCEVGEP4]], [[C]]
3173; UNROLL-NEXT:    [[BOUND1:%.*]] = icmp ugt i32* [[SCEVGEP]], [[B]]
3174; UNROLL-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
3175; UNROLL-NEXT:    [[TMP0:%.*]] = bitcast i16* [[SCEVGEP8]] to i32*
3176; UNROLL-NEXT:    [[BOUND010:%.*]] = icmp ugt i32* [[TMP0]], [[C]]
3177; UNROLL-NEXT:    [[TMP1:%.*]] = bitcast i32* [[SCEVGEP]] to i16*
3178; UNROLL-NEXT:    [[BOUND111:%.*]] = icmp ult i16* [[SCEVGEP6]], [[TMP1]]
3179; UNROLL-NEXT:    [[FOUND_CONFLICT12:%.*]] = and i1 [[BOUND010]], [[BOUND111]]
3180; UNROLL-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT12]]
3181; UNROLL-NEXT:    [[TMP2:%.*]] = bitcast i16* [[SCEVGEP8]] to i32*
3182; UNROLL-NEXT:    [[BOUND013:%.*]] = icmp ugt i32* [[TMP2]], [[B]]
3183; UNROLL-NEXT:    [[TMP3:%.*]] = bitcast i32* [[SCEVGEP4]] to i16*
3184; UNROLL-NEXT:    [[BOUND114:%.*]] = icmp ult i16* [[SCEVGEP6]], [[TMP3]]
3185; UNROLL-NEXT:    [[FOUND_CONFLICT15:%.*]] = and i1 [[BOUND013]], [[BOUND114]]
3186; UNROLL-NEXT:    [[CONFLICT_RDX16:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT15]]
3187; UNROLL-NEXT:    br i1 [[CONFLICT_RDX16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
3188; UNROLL:       vector.ph:
3189; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[N]], -8
3190; UNROLL-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i64 3
3191; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
3192; UNROLL:       vector.body:
3193; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3194; UNROLL-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP38:%.*]], [[VECTOR_BODY]] ]
3195; UNROLL-NEXT:    [[TMP4:%.*]] = or i64 [[INDEX]], 1
3196; UNROLL-NEXT:    [[TMP5:%.*]] = or i64 [[INDEX]], 2
3197; UNROLL-NEXT:    [[TMP6:%.*]] = or i64 [[INDEX]], 3
3198; UNROLL-NEXT:    [[TMP7:%.*]] = or i64 [[INDEX]], 4
3199; UNROLL-NEXT:    [[TMP8:%.*]] = or i64 [[INDEX]], 5
3200; UNROLL-NEXT:    [[TMP9:%.*]] = or i64 [[INDEX]], 6
3201; UNROLL-NEXT:    [[TMP10:%.*]] = or i64 [[INDEX]], 7
3202; UNROLL-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDEX]]
3203; UNROLL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDEX]], i64 1
3204; UNROLL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP4]], i64 1
3205; UNROLL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP5]], i64 1
3206; UNROLL-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP6]], i64 1
3207; UNROLL-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP7]], i64 1
3208; UNROLL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP8]], i64 1
3209; UNROLL-NEXT:    [[TMP18:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP9]], i64 1
3210; UNROLL-NEXT:    [[TMP19:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP10]], i64 1
3211; UNROLL-NEXT:    [[TMP20:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
3212; UNROLL-NEXT:    store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, <4 x i32>* [[TMP20]], align 4
3213; UNROLL-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i64 4
3214; UNROLL-NEXT:    [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
3215; UNROLL-NEXT:    store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, <4 x i32>* [[TMP22]], align 4
3216; UNROLL-NEXT:    [[TMP23:%.*]] = load i16, i16* [[TMP12]], align 2
3217; UNROLL-NEXT:    [[TMP24:%.*]] = load i16, i16* [[TMP13]], align 2
3218; UNROLL-NEXT:    [[TMP25:%.*]] = load i16, i16* [[TMP14]], align 2
3219; UNROLL-NEXT:    [[TMP26:%.*]] = load i16, i16* [[TMP15]], align 2
3220; UNROLL-NEXT:    [[TMP27:%.*]] = insertelement <4 x i16> poison, i16 [[TMP23]], i64 0
3221; UNROLL-NEXT:    [[TMP28:%.*]] = insertelement <4 x i16> [[TMP27]], i16 [[TMP24]], i64 1
3222; UNROLL-NEXT:    [[TMP29:%.*]] = insertelement <4 x i16> [[TMP28]], i16 [[TMP25]], i64 2
3223; UNROLL-NEXT:    [[TMP30:%.*]] = insertelement <4 x i16> [[TMP29]], i16 [[TMP26]], i64 3
3224; UNROLL-NEXT:    [[TMP31:%.*]] = load i16, i16* [[TMP16]], align 2
3225; UNROLL-NEXT:    [[TMP32:%.*]] = load i16, i16* [[TMP17]], align 2
3226; UNROLL-NEXT:    [[TMP33:%.*]] = load i16, i16* [[TMP18]], align 2
3227; UNROLL-NEXT:    [[TMP34:%.*]] = load i16, i16* [[TMP19]], align 2
3228; UNROLL-NEXT:    [[TMP35:%.*]] = insertelement <4 x i16> poison, i16 [[TMP31]], i64 0
3229; UNROLL-NEXT:    [[TMP36:%.*]] = insertelement <4 x i16> [[TMP35]], i16 [[TMP32]], i64 1
3230; UNROLL-NEXT:    [[TMP37:%.*]] = insertelement <4 x i16> [[TMP36]], i16 [[TMP33]], i64 2
3231; UNROLL-NEXT:    [[TMP38]] = insertelement <4 x i16> [[TMP37]], i16 [[TMP34]], i64 3
3232; UNROLL-NEXT:    [[TMP39:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP29]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3233; UNROLL-NEXT:    [[TMP40:%.*]] = shufflevector <4 x i16> [[TMP30]], <4 x i16> [[TMP37]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3234; UNROLL-NEXT:    [[TMP41:%.*]] = sext <4 x i16> [[TMP39]] to <4 x i32>
3235; UNROLL-NEXT:    [[TMP42:%.*]] = sext <4 x i16> [[TMP40]] to <4 x i32>
3236; UNROLL-NEXT:    [[TMP43:%.*]] = sext <4 x i16> [[TMP30]] to <4 x i32>
3237; UNROLL-NEXT:    [[TMP44:%.*]] = sext <4 x i16> [[TMP38]] to <4 x i32>
3238; UNROLL-NEXT:    [[TMP45:%.*]] = mul nsw <4 x i32> [[TMP43]], [[TMP41]]
3239; UNROLL-NEXT:    [[TMP46:%.*]] = mul nsw <4 x i32> [[TMP44]], [[TMP42]]
3240; UNROLL-NEXT:    [[TMP47:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
3241; UNROLL-NEXT:    [[TMP48:%.*]] = bitcast i32* [[TMP47]] to <4 x i32>*
3242; UNROLL-NEXT:    store <4 x i32> [[TMP45]], <4 x i32>* [[TMP48]], align 4
3243; UNROLL-NEXT:    [[TMP49:%.*]] = getelementptr inbounds i32, i32* [[TMP47]], i64 4
3244; UNROLL-NEXT:    [[TMP50:%.*]] = bitcast i32* [[TMP49]] to <4 x i32>*
3245; UNROLL-NEXT:    store <4 x i32> [[TMP46]], <4 x i32>* [[TMP50]], align 4
3246; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
3247; UNROLL-NEXT:    [[TMP51:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3248; UNROLL-NEXT:    br i1 [[TMP51]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
3249; UNROLL:       middle.block:
3250; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
3251; UNROLL-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
3252; UNROLL:       scalar.ph:
3253; UNROLL-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP34]], [[MIDDLE_BLOCK]] ]
3254; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
3255; UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
3256; UNROLL:       for.body:
3257; UNROLL-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP52:%.*]], [[FOR_BODY]] ]
3258; UNROLL-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
3259; UNROLL-NEXT:    [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]]
3260; UNROLL-NEXT:    [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1
3261; UNROLL-NEXT:    store i32 7, i32* [[ARRAYCIDX]], align 4
3262; UNROLL-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
3263; UNROLL-NEXT:    [[TMP52]] = load i16, i16* [[CUR_INDEX]], align 2
3264; UNROLL-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP52]] to i32
3265; UNROLL-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
3266; UNROLL-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
3267; UNROLL-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
3268; UNROLL-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
3269; UNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
3270; UNROLL-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
3271; UNROLL:       for.end:
3272; UNROLL-NEXT:    ret void
3273;
3274; UNROLL-NO-IC-LABEL: @PR34711(
3275; UNROLL-NO-IC-NEXT:  entry:
3276; UNROLL-NO-IC-NEXT:    [[C1:%.*]] = bitcast i32* [[C:%.*]] to i8*
3277; UNROLL-NO-IC-NEXT:    [[B3:%.*]] = bitcast i32* [[B:%.*]] to i8*
3278; UNROLL-NO-IC-NEXT:    [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0
3279; UNROLL-NO-IC-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2
3280; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
3281; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
3282; UNROLL-NO-IC:       vector.memcheck:
3283; UNROLL-NO-IC-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[C]], i64 [[N]]
3284; UNROLL-NO-IC-NEXT:    [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
3285; UNROLL-NO-IC-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]]
3286; UNROLL-NO-IC-NEXT:    [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
3287; UNROLL-NO-IC-NEXT:    [[SCEVGEP6:%.*]] = getelementptr [2 x i16], [2 x i16]* [[A]], i64 0, i64 1
3288; UNROLL-NO-IC-NEXT:    [[SCEVGEP67:%.*]] = bitcast i16* [[SCEVGEP6]] to i8*
3289; UNROLL-NO-IC-NEXT:    [[SCEVGEP8:%.*]] = getelementptr [2 x i16], [2 x i16]* [[A]], i64 [[N]], i64 0
3290; UNROLL-NO-IC-NEXT:    [[SCEVGEP89:%.*]] = bitcast i16* [[SCEVGEP8]] to i8*
3291; UNROLL-NO-IC-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[C1]], [[SCEVGEP45]]
3292; UNROLL-NO-IC-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[B3]], [[SCEVGEP2]]
3293; UNROLL-NO-IC-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
3294; UNROLL-NO-IC-NEXT:    [[BOUND010:%.*]] = icmp ult i8* [[C1]], [[SCEVGEP89]]
3295; UNROLL-NO-IC-NEXT:    [[BOUND111:%.*]] = icmp ult i8* [[SCEVGEP67]], [[SCEVGEP2]]
3296; UNROLL-NO-IC-NEXT:    [[FOUND_CONFLICT12:%.*]] = and i1 [[BOUND010]], [[BOUND111]]
3297; UNROLL-NO-IC-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT12]]
3298; UNROLL-NO-IC-NEXT:    [[BOUND013:%.*]] = icmp ult i8* [[B3]], [[SCEVGEP89]]
3299; UNROLL-NO-IC-NEXT:    [[BOUND114:%.*]] = icmp ult i8* [[SCEVGEP67]], [[SCEVGEP45]]
3300; UNROLL-NO-IC-NEXT:    [[FOUND_CONFLICT15:%.*]] = and i1 [[BOUND013]], [[BOUND114]]
3301; UNROLL-NO-IC-NEXT:    [[CONFLICT_RDX16:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT15]]
3302; UNROLL-NO-IC-NEXT:    br i1 [[CONFLICT_RDX16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
3303; UNROLL-NO-IC:       vector.ph:
3304; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
3305; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
3306; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
3307; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
3308; UNROLL-NO-IC:       vector.body:
3309; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3310; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP37:%.*]], [[VECTOR_BODY]] ]
3311; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
3312; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
3313; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
3314; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
3315; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
3316; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
3317; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
3318; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
3319; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[TMP0]]
3320; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[TMP4]]
3321; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP0]], i64 1
3322; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP1]], i64 1
3323; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP2]], i64 1
3324; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP3]], i64 1
3325; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP4]], i64 1
3326; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP5]], i64 1
3327; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP6]], i64 1
3328; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP7]], i64 1
3329; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0
3330; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
3331; UNROLL-NO-IC-NEXT:    store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, <4 x i32>* [[TMP19]], align 4
3332; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 4
3333; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <4 x i32>*
3334; UNROLL-NO-IC-NEXT:    store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, <4 x i32>* [[TMP21]], align 4
3335; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = load i16, i16* [[TMP10]], align 2
3336; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = load i16, i16* [[TMP11]], align 2
3337; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = load i16, i16* [[TMP12]], align 2
3338; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = load i16, i16* [[TMP13]], align 2
3339; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = insertelement <4 x i16> poison, i16 [[TMP22]], i32 0
3340; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = insertelement <4 x i16> [[TMP26]], i16 [[TMP23]], i32 1
3341; UNROLL-NO-IC-NEXT:    [[TMP28:%.*]] = insertelement <4 x i16> [[TMP27]], i16 [[TMP24]], i32 2
3342; UNROLL-NO-IC-NEXT:    [[TMP29:%.*]] = insertelement <4 x i16> [[TMP28]], i16 [[TMP25]], i32 3
3343; UNROLL-NO-IC-NEXT:    [[TMP30:%.*]] = load i16, i16* [[TMP14]], align 2
3344; UNROLL-NO-IC-NEXT:    [[TMP31:%.*]] = load i16, i16* [[TMP15]], align 2
3345; UNROLL-NO-IC-NEXT:    [[TMP32:%.*]] = load i16, i16* [[TMP16]], align 2
3346; UNROLL-NO-IC-NEXT:    [[TMP33:%.*]] = load i16, i16* [[TMP17]], align 2
3347; UNROLL-NO-IC-NEXT:    [[TMP34:%.*]] = insertelement <4 x i16> poison, i16 [[TMP30]], i32 0
3348; UNROLL-NO-IC-NEXT:    [[TMP35:%.*]] = insertelement <4 x i16> [[TMP34]], i16 [[TMP31]], i32 1
3349; UNROLL-NO-IC-NEXT:    [[TMP36:%.*]] = insertelement <4 x i16> [[TMP35]], i16 [[TMP32]], i32 2
3350; UNROLL-NO-IC-NEXT:    [[TMP37]] = insertelement <4 x i16> [[TMP36]], i16 [[TMP33]], i32 3
3351; UNROLL-NO-IC-NEXT:    [[TMP38:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP29]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3352; UNROLL-NO-IC-NEXT:    [[TMP39:%.*]] = shufflevector <4 x i16> [[TMP29]], <4 x i16> [[TMP37]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3353; UNROLL-NO-IC-NEXT:    [[TMP40:%.*]] = sext <4 x i16> [[TMP38]] to <4 x i32>
3354; UNROLL-NO-IC-NEXT:    [[TMP41:%.*]] = sext <4 x i16> [[TMP39]] to <4 x i32>
3355; UNROLL-NO-IC-NEXT:    [[TMP42:%.*]] = sext <4 x i16> [[TMP29]] to <4 x i32>
3356; UNROLL-NO-IC-NEXT:    [[TMP43:%.*]] = sext <4 x i16> [[TMP37]] to <4 x i32>
3357; UNROLL-NO-IC-NEXT:    [[TMP44:%.*]] = mul nsw <4 x i32> [[TMP42]], [[TMP40]]
3358; UNROLL-NO-IC-NEXT:    [[TMP45:%.*]] = mul nsw <4 x i32> [[TMP43]], [[TMP41]]
3359; UNROLL-NO-IC-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]]
3360; UNROLL-NO-IC-NEXT:    [[TMP47:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP4]]
3361; UNROLL-NO-IC-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[TMP46]], i32 0
3362; UNROLL-NO-IC-NEXT:    [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>*
3363; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP44]], <4 x i32>* [[TMP49]], align 4
3364; UNROLL-NO-IC-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i32, i32* [[TMP46]], i32 4
3365; UNROLL-NO-IC-NEXT:    [[TMP51:%.*]] = bitcast i32* [[TMP50]] to <4 x i32>*
3366; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP45]], <4 x i32>* [[TMP51]], align 4
3367; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
3368; UNROLL-NO-IC-NEXT:    [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3369; UNROLL-NO-IC-NEXT:    br i1 [[TMP52]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
3370; UNROLL-NO-IC:       middle.block:
3371; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
3372; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP37]], i32 3
3373; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP37]], i32 2
3374; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
3375; UNROLL-NO-IC:       scalar.ph:
3376; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
3377; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ]
3378; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
3379; UNROLL-NO-IC:       for.body:
3380; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP53:%.*]], [[FOR_BODY]] ]
3381; UNROLL-NO-IC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
3382; UNROLL-NO-IC-NEXT:    [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]]
3383; UNROLL-NO-IC-NEXT:    [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1
3384; UNROLL-NO-IC-NEXT:    store i32 7, i32* [[ARRAYCIDX]], align 4
3385; UNROLL-NO-IC-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
3386; UNROLL-NO-IC-NEXT:    [[TMP53]] = load i16, i16* [[CUR_INDEX]], align 2
3387; UNROLL-NO-IC-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP53]] to i32
3388; UNROLL-NO-IC-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
3389; UNROLL-NO-IC-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
3390; UNROLL-NO-IC-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
3391; UNROLL-NO-IC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
3392; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
3393; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
3394; UNROLL-NO-IC:       for.end:
3395; UNROLL-NO-IC-NEXT:    ret void
3396;
3397; UNROLL-NO-VF-LABEL: @PR34711(
3398; UNROLL-NO-VF-NEXT:  entry:
3399; UNROLL-NO-VF-NEXT:    [[C1:%.*]] = bitcast i32* [[C:%.*]] to i8*
3400; UNROLL-NO-VF-NEXT:    [[B3:%.*]] = bitcast i32* [[B:%.*]] to i8*
3401; UNROLL-NO-VF-NEXT:    [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0
3402; UNROLL-NO-VF-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2
3403; UNROLL-NO-VF-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
3404; UNROLL-NO-VF-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
3405; UNROLL-NO-VF:       vector.memcheck:
3406; UNROLL-NO-VF-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[C]], i64 [[N]]
3407; UNROLL-NO-VF-NEXT:    [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
3408; UNROLL-NO-VF-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]]
3409; UNROLL-NO-VF-NEXT:    [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
3410; UNROLL-NO-VF-NEXT:    [[SCEVGEP6:%.*]] = getelementptr [2 x i16], [2 x i16]* [[A]], i64 0, i64 1
3411; UNROLL-NO-VF-NEXT:    [[SCEVGEP67:%.*]] = bitcast i16* [[SCEVGEP6]] to i8*
3412; UNROLL-NO-VF-NEXT:    [[SCEVGEP8:%.*]] = getelementptr [2 x i16], [2 x i16]* [[A]], i64 [[N]], i64 0
3413; UNROLL-NO-VF-NEXT:    [[SCEVGEP89:%.*]] = bitcast i16* [[SCEVGEP8]] to i8*
3414; UNROLL-NO-VF-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[C1]], [[SCEVGEP45]]
3415; UNROLL-NO-VF-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[B3]], [[SCEVGEP2]]
3416; UNROLL-NO-VF-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
3417; UNROLL-NO-VF-NEXT:    [[BOUND010:%.*]] = icmp ult i8* [[C1]], [[SCEVGEP89]]
3418; UNROLL-NO-VF-NEXT:    [[BOUND111:%.*]] = icmp ult i8* [[SCEVGEP67]], [[SCEVGEP2]]
3419; UNROLL-NO-VF-NEXT:    [[FOUND_CONFLICT12:%.*]] = and i1 [[BOUND010]], [[BOUND111]]
3420; UNROLL-NO-VF-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT12]]
3421; UNROLL-NO-VF-NEXT:    [[BOUND013:%.*]] = icmp ult i8* [[B3]], [[SCEVGEP89]]
3422; UNROLL-NO-VF-NEXT:    [[BOUND114:%.*]] = icmp ult i8* [[SCEVGEP67]], [[SCEVGEP45]]
3423; UNROLL-NO-VF-NEXT:    [[FOUND_CONFLICT15:%.*]] = and i1 [[BOUND013]], [[BOUND114]]
3424; UNROLL-NO-VF-NEXT:    [[CONFLICT_RDX16:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT15]]
3425; UNROLL-NO-VF-NEXT:    br i1 [[CONFLICT_RDX16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
3426; UNROLL-NO-VF:       vector.ph:
3427; UNROLL-NO-VF-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
3428; UNROLL-NO-VF-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
3429; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
3430; UNROLL-NO-VF:       vector.body:
3431; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3432; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
3433; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
3434; UNROLL-NO-VF-NEXT:    [[INDUCTION17:%.*]] = add i64 [[INDEX]], 1
3435; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDUCTION]]
3436; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDUCTION17]]
3437; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDUCTION]], i64 1
3438; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDUCTION17]], i64 1
3439; UNROLL-NO-VF-NEXT:    store i32 7, i32* [[TMP0]], align 4
3440; UNROLL-NO-VF-NEXT:    store i32 7, i32* [[TMP1]], align 4
3441; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 2
3442; UNROLL-NO-VF-NEXT:    [[TMP5]] = load i16, i16* [[TMP3]], align 2
3443; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = sext i16 [[VECTOR_RECUR]] to i32
3444; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = sext i16 [[TMP4]] to i32
3445; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = sext i16 [[TMP4]] to i32
3446; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = sext i16 [[TMP5]] to i32
3447; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = mul nsw i32 [[TMP8]], [[TMP6]]
3448; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = mul nsw i32 [[TMP9]], [[TMP7]]
3449; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION]]
3450; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION17]]
3451; UNROLL-NO-VF-NEXT:    store i32 [[TMP10]], i32* [[TMP12]], align 4
3452; UNROLL-NO-VF-NEXT:    store i32 [[TMP11]], i32* [[TMP13]], align 4
3453; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
3454; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3455; UNROLL-NO-VF-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP40:![0-9]+]]
3456; UNROLL-NO-VF:       middle.block:
3457; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
3458; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
3459; UNROLL-NO-VF:       scalar.ph:
3460; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
3461; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ]
3462; UNROLL-NO-VF-NEXT:    br label [[FOR_BODY:%.*]]
3463; UNROLL-NO-VF:       for.body:
3464; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[FOR_BODY]] ]
3465; UNROLL-NO-VF-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
3466; UNROLL-NO-VF-NEXT:    [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]]
3467; UNROLL-NO-VF-NEXT:    [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1
3468; UNROLL-NO-VF-NEXT:    store i32 7, i32* [[ARRAYCIDX]], align 4
3469; UNROLL-NO-VF-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
3470; UNROLL-NO-VF-NEXT:    [[TMP15]] = load i16, i16* [[CUR_INDEX]], align 2
3471; UNROLL-NO-VF-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP15]] to i32
3472; UNROLL-NO-VF-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
3473; UNROLL-NO-VF-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
3474; UNROLL-NO-VF-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
3475; UNROLL-NO-VF-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
3476; UNROLL-NO-VF-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
3477; UNROLL-NO-VF-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
3478; UNROLL-NO-VF:       for.end:
3479; UNROLL-NO-VF-NEXT:    ret void
3480;
3481; SINK-AFTER-LABEL: @PR34711(
3482; SINK-AFTER-NEXT:  entry:
3483; SINK-AFTER-NEXT:    [[C1:%.*]] = bitcast i32* [[C:%.*]] to i8*
3484; SINK-AFTER-NEXT:    [[B3:%.*]] = bitcast i32* [[B:%.*]] to i8*
3485; SINK-AFTER-NEXT:    [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0
3486; SINK-AFTER-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2
3487; SINK-AFTER-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
3488; SINK-AFTER-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
3489; SINK-AFTER:       vector.memcheck:
3490; SINK-AFTER-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[C]], i64 [[N]]
3491; SINK-AFTER-NEXT:    [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
3492; SINK-AFTER-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]]
3493; SINK-AFTER-NEXT:    [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
3494; SINK-AFTER-NEXT:    [[SCEVGEP6:%.*]] = getelementptr [2 x i16], [2 x i16]* [[A]], i64 0, i64 1
3495; SINK-AFTER-NEXT:    [[SCEVGEP67:%.*]] = bitcast i16* [[SCEVGEP6]] to i8*
3496; SINK-AFTER-NEXT:    [[SCEVGEP8:%.*]] = getelementptr [2 x i16], [2 x i16]* [[A]], i64 [[N]], i64 0
3497; SINK-AFTER-NEXT:    [[SCEVGEP89:%.*]] = bitcast i16* [[SCEVGEP8]] to i8*
3498; SINK-AFTER-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[C1]], [[SCEVGEP45]]
3499; SINK-AFTER-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[B3]], [[SCEVGEP2]]
3500; SINK-AFTER-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
3501; SINK-AFTER-NEXT:    [[BOUND010:%.*]] = icmp ult i8* [[C1]], [[SCEVGEP89]]
3502; SINK-AFTER-NEXT:    [[BOUND111:%.*]] = icmp ult i8* [[SCEVGEP67]], [[SCEVGEP2]]
3503; SINK-AFTER-NEXT:    [[FOUND_CONFLICT12:%.*]] = and i1 [[BOUND010]], [[BOUND111]]
3504; SINK-AFTER-NEXT:    [[CONFLICT_RDX:%.*]] = or i1 [[FOUND_CONFLICT]], [[FOUND_CONFLICT12]]
3505; SINK-AFTER-NEXT:    [[BOUND013:%.*]] = icmp ult i8* [[B3]], [[SCEVGEP89]]
3506; SINK-AFTER-NEXT:    [[BOUND114:%.*]] = icmp ult i8* [[SCEVGEP67]], [[SCEVGEP45]]
3507; SINK-AFTER-NEXT:    [[FOUND_CONFLICT15:%.*]] = and i1 [[BOUND013]], [[BOUND114]]
3508; SINK-AFTER-NEXT:    [[CONFLICT_RDX16:%.*]] = or i1 [[CONFLICT_RDX]], [[FOUND_CONFLICT15]]
3509; SINK-AFTER-NEXT:    br i1 [[CONFLICT_RDX16]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
3510; SINK-AFTER:       vector.ph:
3511; SINK-AFTER-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
3512; SINK-AFTER-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
3513; SINK-AFTER-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
3514; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
3515; SINK-AFTER:       vector.body:
3516; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3517; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
3518; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
3519; SINK-AFTER-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
3520; SINK-AFTER-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
3521; SINK-AFTER-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
3522; SINK-AFTER-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[TMP0]]
3523; SINK-AFTER-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP0]], i64 1
3524; SINK-AFTER-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP1]], i64 1
3525; SINK-AFTER-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP2]], i64 1
3526; SINK-AFTER-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP3]], i64 1
3527; SINK-AFTER-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
3528; SINK-AFTER-NEXT:    [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
3529; SINK-AFTER-NEXT:    store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, <4 x i32>* [[TMP10]], align 4
3530; SINK-AFTER-NEXT:    [[TMP11:%.*]] = load i16, i16* [[TMP5]], align 2
3531; SINK-AFTER-NEXT:    [[TMP12:%.*]] = load i16, i16* [[TMP6]], align 2
3532; SINK-AFTER-NEXT:    [[TMP13:%.*]] = load i16, i16* [[TMP7]], align 2
3533; SINK-AFTER-NEXT:    [[TMP14:%.*]] = load i16, i16* [[TMP8]], align 2
3534; SINK-AFTER-NEXT:    [[TMP15:%.*]] = insertelement <4 x i16> poison, i16 [[TMP11]], i32 0
3535; SINK-AFTER-NEXT:    [[TMP16:%.*]] = insertelement <4 x i16> [[TMP15]], i16 [[TMP12]], i32 1
3536; SINK-AFTER-NEXT:    [[TMP17:%.*]] = insertelement <4 x i16> [[TMP16]], i16 [[TMP13]], i32 2
3537; SINK-AFTER-NEXT:    [[TMP18]] = insertelement <4 x i16> [[TMP17]], i16 [[TMP14]], i32 3
3538; SINK-AFTER-NEXT:    [[TMP19:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP18]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3539; SINK-AFTER-NEXT:    [[TMP20:%.*]] = sext <4 x i16> [[TMP19]] to <4 x i32>
3540; SINK-AFTER-NEXT:    [[TMP21:%.*]] = sext <4 x i16> [[TMP18]] to <4 x i32>
3541; SINK-AFTER-NEXT:    [[TMP22:%.*]] = mul nsw <4 x i32> [[TMP21]], [[TMP20]]
3542; SINK-AFTER-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]]
3543; SINK-AFTER-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 0
3544; SINK-AFTER-NEXT:    [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <4 x i32>*
3545; SINK-AFTER-NEXT:    store <4 x i32> [[TMP22]], <4 x i32>* [[TMP25]], align 4
3546; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
3547; SINK-AFTER-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3548; SINK-AFTER-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP41:![0-9]+]]
3549; SINK-AFTER:       middle.block:
3550; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
3551; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP18]], i32 3
3552; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP18]], i32 2
3553; SINK-AFTER-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
3554; SINK-AFTER:       scalar.ph:
3555; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
3556; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ]
3557; SINK-AFTER-NEXT:    br label [[FOR_BODY:%.*]]
3558; SINK-AFTER:       for.body:
3559; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP27:%.*]], [[FOR_BODY]] ]
3560; SINK-AFTER-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
3561; SINK-AFTER-NEXT:    [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]]
3562; SINK-AFTER-NEXT:    [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1
3563; SINK-AFTER-NEXT:    store i32 7, i32* [[ARRAYCIDX]], align 4
3564; SINK-AFTER-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
3565; SINK-AFTER-NEXT:    [[TMP27]] = load i16, i16* [[CUR_INDEX]], align 2
3566; SINK-AFTER-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP27]] to i32
3567; SINK-AFTER-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
3568; SINK-AFTER-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
3569; SINK-AFTER-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
3570; SINK-AFTER-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
3571; SINK-AFTER-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
3572; SINK-AFTER-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP42:![0-9]+]]
3573; SINK-AFTER:       for.end:
3574; SINK-AFTER-NEXT:    ret void
3575;
3576entry:
3577  %pre.index = getelementptr inbounds [2 x i16], [2 x i16]* %a, i64 0, i64 0
3578  %.pre = load i16, i16* %pre.index
3579  br label %for.body
3580
3581for.body:
3582  %0 = phi i16 [ %.pre, %entry ], [ %1, %for.body ]
3583  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
3584  %arraycidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
3585  %cur.index = getelementptr inbounds [2 x i16], [2 x i16]* %a, i64 %indvars.iv, i64 1
3586  store i32 7, i32* %arraycidx   ; 1st instruction, to be widened.
3587  %conv = sext i16 %0 to i32     ; 2nd, cast to sink after third.
3588  %1 = load i16, i16* %cur.index ; 3rd, first-order-recurring load not widened.
3589  %conv3 = sext i16 %1 to i32
3590  %mul = mul nsw i32 %conv3, %conv
3591  %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
3592  store i32 %mul, i32* %arrayidx5
3593  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
3594  %exitcond = icmp eq i64 %indvars.iv.next, %n
3595  br i1 %exitcond, label %for.end, label %for.body
3596
3597for.end:
3598  ret void
3599}
3600
3601; void no_sink_after(short *a, int n, int *b) {
3602;   for(int i = 0; i < n; i++)
3603;     b[i] = ((a[i] + 2) * a[i + 1]);
3604; }
3605;
3606
3607;
3608define void @sink_after_with_multiple_users(i16* %a, i32* %b, i64 %n) {
3609; CHECK-LABEL: @sink_after_with_multiple_users(
3610; CHECK-NEXT:  entry:
3611; CHECK-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
3612; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
3613; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
3614; CHECK:       vector.memcheck:
3615; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[N]]
3616; CHECK-NEXT:    [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1
3617; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N]], 1
3618; CHECK-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]]
3619; CHECK-NEXT:    [[TMP1:%.*]] = bitcast i16* [[SCEVGEP5]] to i32*
3620; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ugt i32* [[TMP1]], [[B]]
3621; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[SCEVGEP]] to i16*
3622; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult i16* [[SCEVGEP3]], [[TMP2]]
3623; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
3624; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
3625; CHECK:       vector.ph:
3626; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[N]], -4
3627; CHECK-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i64 3
3628; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
3629; CHECK:       vector.body:
3630; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3631; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
3632; CHECK-NEXT:    [[TMP3:%.*]] = or i64 [[INDEX]], 1
3633; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP3]]
3634; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>*
3635; CHECK-NEXT:    [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2
3636; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3637; CHECK-NEXT:    [[TMP7:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32>
3638; CHECK-NEXT:    [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], <i32 2, i32 2, i32 2, i32 2>
3639; CHECK-NEXT:    [[TMP9:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
3640; CHECK-NEXT:    [[TMP10:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP9]]
3641; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
3642; CHECK-NEXT:    [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
3643; CHECK-NEXT:    store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4
3644; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
3645; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3646; CHECK-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
3647; CHECK:       middle.block:
3648; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
3649; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3
3650; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
3651; CHECK:       scalar.ph:
3652; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
3653; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
3654; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
3655; CHECK:       for.body:
3656; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP14:%.*]], [[FOR_BODY]] ]
3657; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
3658; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
3659; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 2
3660; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
3661; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
3662; CHECK-NEXT:    [[TMP14]] = load i16, i16* [[ARRAYIDX2]], align 2
3663; CHECK-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP14]] to i32
3664; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]]
3665; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
3666; CHECK-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
3667; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
3668; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]]
3669; CHECK:       for.end:
3670; CHECK-NEXT:    ret void
3671;
3672; UNROLL-LABEL: @sink_after_with_multiple_users(
3673; UNROLL-NEXT:  entry:
3674; UNROLL-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
3675; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
3676; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
3677; UNROLL:       vector.memcheck:
3678; UNROLL-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B:%.*]], i64 [[N]]
3679; UNROLL-NEXT:    [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1
3680; UNROLL-NEXT:    [[TMP0:%.*]] = add i64 [[N]], 1
3681; UNROLL-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]]
3682; UNROLL-NEXT:    [[TMP1:%.*]] = bitcast i16* [[SCEVGEP5]] to i32*
3683; UNROLL-NEXT:    [[BOUND0:%.*]] = icmp ugt i32* [[TMP1]], [[B]]
3684; UNROLL-NEXT:    [[TMP2:%.*]] = bitcast i32* [[SCEVGEP]] to i16*
3685; UNROLL-NEXT:    [[BOUND1:%.*]] = icmp ult i16* [[SCEVGEP3]], [[TMP2]]
3686; UNROLL-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
3687; UNROLL-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
3688; UNROLL:       vector.ph:
3689; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[N]], -8
3690; UNROLL-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i64 3
3691; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
3692; UNROLL:       vector.body:
3693; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3694; UNROLL-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD7:%.*]], [[VECTOR_BODY]] ]
3695; UNROLL-NEXT:    [[TMP3:%.*]] = or i64 [[INDEX]], 1
3696; UNROLL-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP3]]
3697; UNROLL-NEXT:    [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>*
3698; UNROLL-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2
3699; UNROLL-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i64 4
3700; UNROLL-NEXT:    [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <4 x i16>*
3701; UNROLL-NEXT:    [[WIDE_LOAD7]] = load <4 x i16>, <4 x i16>* [[TMP7]], align 2
3702; UNROLL-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3703; UNROLL-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD7]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3704; UNROLL-NEXT:    [[TMP10:%.*]] = sext <4 x i16> [[TMP8]] to <4 x i32>
3705; UNROLL-NEXT:    [[TMP11:%.*]] = sext <4 x i16> [[TMP9]] to <4 x i32>
3706; UNROLL-NEXT:    [[TMP12:%.*]] = add nsw <4 x i32> [[TMP10]], <i32 2, i32 2, i32 2, i32 2>
3707; UNROLL-NEXT:    [[TMP13:%.*]] = add nsw <4 x i32> [[TMP11]], <i32 2, i32 2, i32 2, i32 2>
3708; UNROLL-NEXT:    [[TMP14:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
3709; UNROLL-NEXT:    [[TMP15:%.*]] = sext <4 x i16> [[WIDE_LOAD7]] to <4 x i32>
3710; UNROLL-NEXT:    [[TMP16:%.*]] = mul nsw <4 x i32> [[TMP12]], [[TMP14]]
3711; UNROLL-NEXT:    [[TMP17:%.*]] = mul nsw <4 x i32> [[TMP13]], [[TMP15]]
3712; UNROLL-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDEX]]
3713; UNROLL-NEXT:    [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
3714; UNROLL-NEXT:    store <4 x i32> [[TMP16]], <4 x i32>* [[TMP19]], align 4
3715; UNROLL-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[TMP18]], i64 4
3716; UNROLL-NEXT:    [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <4 x i32>*
3717; UNROLL-NEXT:    store <4 x i32> [[TMP17]], <4 x i32>* [[TMP21]], align 4
3718; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
3719; UNROLL-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3720; UNROLL-NEXT:    br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
3721; UNROLL:       middle.block:
3722; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
3723; UNROLL-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD7]], i64 3
3724; UNROLL-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
3725; UNROLL:       scalar.ph:
3726; UNROLL-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
3727; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[VECTOR_MEMCHECK]] ], [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
3728; UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
3729; UNROLL:       for.body:
3730; UNROLL-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP23:%.*]], [[FOR_BODY]] ]
3731; UNROLL-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
3732; UNROLL-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
3733; UNROLL-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 2
3734; UNROLL-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
3735; UNROLL-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
3736; UNROLL-NEXT:    [[TMP23]] = load i16, i16* [[ARRAYIDX2]], align 2
3737; UNROLL-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP23]] to i32
3738; UNROLL-NEXT:    [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]]
3739; UNROLL-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
3740; UNROLL-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
3741; UNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
3742; UNROLL-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]]
3743; UNROLL:       for.end:
3744; UNROLL-NEXT:    ret void
3745;
3746; UNROLL-NO-IC-LABEL: @sink_after_with_multiple_users(
3747; UNROLL-NO-IC-NEXT:  entry:
3748; UNROLL-NO-IC-NEXT:    [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
3749; UNROLL-NO-IC-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
3750; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
3751; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
3752; UNROLL-NO-IC:       vector.memcheck:
3753; UNROLL-NO-IC-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]]
3754; UNROLL-NO-IC-NEXT:    [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
3755; UNROLL-NO-IC-NEXT:    [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1
3756; UNROLL-NO-IC-NEXT:    [[SCEVGEP34:%.*]] = bitcast i16* [[SCEVGEP3]] to i8*
3757; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i64 [[N]], 1
3758; UNROLL-NO-IC-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]]
3759; UNROLL-NO-IC-NEXT:    [[SCEVGEP56:%.*]] = bitcast i16* [[SCEVGEP5]] to i8*
3760; UNROLL-NO-IC-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]]
3761; UNROLL-NO-IC-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]]
3762; UNROLL-NO-IC-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
3763; UNROLL-NO-IC-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
3764; UNROLL-NO-IC:       vector.ph:
3765; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
3766; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
3767; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
3768; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
3769; UNROLL-NO-IC:       vector.body:
3770; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3771; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD7:%.*]], [[VECTOR_BODY]] ]
3772; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
3773; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 4
3774; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP1]], 1
3775; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add nuw nsw i64 [[TMP2]], 1
3776; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP3]]
3777; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP4]]
3778; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0
3779; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = bitcast i16* [[TMP7]] to <4 x i16>*
3780; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP8]], align 2
3781; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 4
3782; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = bitcast i16* [[TMP9]] to <4 x i16>*
3783; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD7]] = load <4 x i16>, <4 x i16>* [[TMP10]], align 2
3784; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3785; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD7]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3786; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = sext <4 x i16> [[TMP11]] to <4 x i32>
3787; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = sext <4 x i16> [[TMP12]] to <4 x i32>
3788; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 2, i32 2, i32 2, i32 2>
3789; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = add nsw <4 x i32> [[TMP14]], <i32 2, i32 2, i32 2, i32 2>
3790; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
3791; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = sext <4 x i16> [[WIDE_LOAD7]] to <4 x i32>
3792; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = mul nsw <4 x i32> [[TMP15]], [[TMP17]]
3793; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = mul nsw <4 x i32> [[TMP16]], [[TMP18]]
3794; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]]
3795; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]]
3796; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i32 0
3797; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = bitcast i32* [[TMP23]] to <4 x i32>*
3798; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP19]], <4 x i32>* [[TMP24]], align 4
3799; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i32 4
3800; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = bitcast i32* [[TMP25]] to <4 x i32>*
3801; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP20]], <4 x i32>* [[TMP26]], align 4
3802; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
3803; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3804; UNROLL-NO-IC-NEXT:    br i1 [[TMP27]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
3805; UNROLL-NO-IC:       middle.block:
3806; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
3807; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD7]], i32 3
3808; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD7]], i32 2
3809; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
3810; UNROLL-NO-IC:       scalar.ph:
3811; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
3812; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ]
3813; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
3814; UNROLL-NO-IC:       for.body:
3815; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP28:%.*]], [[FOR_BODY]] ]
3816; UNROLL-NO-IC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
3817; UNROLL-NO-IC-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
3818; UNROLL-NO-IC-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 2
3819; UNROLL-NO-IC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
3820; UNROLL-NO-IC-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
3821; UNROLL-NO-IC-NEXT:    [[TMP28]] = load i16, i16* [[ARRAYIDX2]], align 2
3822; UNROLL-NO-IC-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP28]] to i32
3823; UNROLL-NO-IC-NEXT:    [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]]
3824; UNROLL-NO-IC-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
3825; UNROLL-NO-IC-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
3826; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
3827; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]]
3828; UNROLL-NO-IC:       for.end:
3829; UNROLL-NO-IC-NEXT:    ret void
3830;
3831; UNROLL-NO-VF-LABEL: @sink_after_with_multiple_users(
3832; UNROLL-NO-VF-NEXT:  entry:
3833; UNROLL-NO-VF-NEXT:    [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
3834; UNROLL-NO-VF-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
3835; UNROLL-NO-VF-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
3836; UNROLL-NO-VF-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
3837; UNROLL-NO-VF:       vector.memcheck:
3838; UNROLL-NO-VF-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]]
3839; UNROLL-NO-VF-NEXT:    [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
3840; UNROLL-NO-VF-NEXT:    [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1
3841; UNROLL-NO-VF-NEXT:    [[SCEVGEP34:%.*]] = bitcast i16* [[SCEVGEP3]] to i8*
3842; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i64 [[N]], 1
3843; UNROLL-NO-VF-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]]
3844; UNROLL-NO-VF-NEXT:    [[SCEVGEP56:%.*]] = bitcast i16* [[SCEVGEP5]] to i8*
3845; UNROLL-NO-VF-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]]
3846; UNROLL-NO-VF-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]]
3847; UNROLL-NO-VF-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
3848; UNROLL-NO-VF-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
3849; UNROLL-NO-VF:       vector.ph:
3850; UNROLL-NO-VF-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
3851; UNROLL-NO-VF-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
3852; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
3853; UNROLL-NO-VF:       vector.body:
3854; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3855; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
3856; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
3857; UNROLL-NO-VF-NEXT:    [[INDUCTION7:%.*]] = add i64 [[INDEX]], 1
3858; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[INDUCTION]], 1
3859; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[INDUCTION7]], 1
3860; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP1]]
3861; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP2]]
3862; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = load i16, i16* [[TMP3]], align 2
3863; UNROLL-NO-VF-NEXT:    [[TMP6]] = load i16, i16* [[TMP4]], align 2
3864; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = sext i16 [[VECTOR_RECUR]] to i32
3865; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = sext i16 [[TMP5]] to i32
3866; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = add nsw i32 [[TMP7]], 2
3867; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = add nsw i32 [[TMP8]], 2
3868; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = sext i16 [[TMP5]] to i32
3869; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = sext i16 [[TMP6]] to i32
3870; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = mul nsw i32 [[TMP9]], [[TMP11]]
3871; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = mul nsw i32 [[TMP10]], [[TMP12]]
3872; UNROLL-NO-VF-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION]]
3873; UNROLL-NO-VF-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION7]]
3874; UNROLL-NO-VF-NEXT:    store i32 [[TMP13]], i32* [[TMP15]], align 4
3875; UNROLL-NO-VF-NEXT:    store i32 [[TMP14]], i32* [[TMP16]], align 4
3876; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
3877; UNROLL-NO-VF-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3878; UNROLL-NO-VF-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP47:![0-9]+]]
3879; UNROLL-NO-VF:       middle.block:
3880; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
3881; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
3882; UNROLL-NO-VF:       scalar.ph:
3883; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
3884; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ]
3885; UNROLL-NO-VF-NEXT:    br label [[FOR_BODY:%.*]]
3886; UNROLL-NO-VF:       for.body:
3887; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ]
3888; UNROLL-NO-VF-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
3889; UNROLL-NO-VF-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
3890; UNROLL-NO-VF-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 2
3891; UNROLL-NO-VF-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
3892; UNROLL-NO-VF-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
3893; UNROLL-NO-VF-NEXT:    [[TMP18]] = load i16, i16* [[ARRAYIDX2]], align 2
3894; UNROLL-NO-VF-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP18]] to i32
3895; UNROLL-NO-VF-NEXT:    [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]]
3896; UNROLL-NO-VF-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
3897; UNROLL-NO-VF-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
3898; UNROLL-NO-VF-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
3899; UNROLL-NO-VF-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
3900; UNROLL-NO-VF:       for.end:
3901; UNROLL-NO-VF-NEXT:    ret void
3902;
3903; SINK-AFTER-LABEL: @sink_after_with_multiple_users(
3904; SINK-AFTER-NEXT:  entry:
3905; SINK-AFTER-NEXT:    [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
3906; SINK-AFTER-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
3907; SINK-AFTER-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
3908; SINK-AFTER-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_MEMCHECK:%.*]]
3909; SINK-AFTER:       vector.memcheck:
3910; SINK-AFTER-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]]
3911; SINK-AFTER-NEXT:    [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
3912; SINK-AFTER-NEXT:    [[SCEVGEP3:%.*]] = getelementptr i16, i16* [[A]], i64 1
3913; SINK-AFTER-NEXT:    [[SCEVGEP34:%.*]] = bitcast i16* [[SCEVGEP3]] to i8*
3914; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i64 [[N]], 1
3915; SINK-AFTER-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i16, i16* [[A]], i64 [[TMP0]]
3916; SINK-AFTER-NEXT:    [[SCEVGEP56:%.*]] = bitcast i16* [[SCEVGEP5]] to i8*
3917; SINK-AFTER-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]]
3918; SINK-AFTER-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]]
3919; SINK-AFTER-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
3920; SINK-AFTER-NEXT:    br i1 [[FOUND_CONFLICT]], label [[SCALAR_PH]], label [[VECTOR_PH:%.*]]
3921; SINK-AFTER:       vector.ph:
3922; SINK-AFTER-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
3923; SINK-AFTER-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
3924; SINK-AFTER-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
3925; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
3926; SINK-AFTER:       vector.body:
3927; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3928; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
3929; SINK-AFTER-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 0
3930; SINK-AFTER-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
3931; SINK-AFTER-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP2]]
3932; SINK-AFTER-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 0
3933; SINK-AFTER-NEXT:    [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>*
3934; SINK-AFTER-NEXT:    [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2
3935; SINK-AFTER-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3936; SINK-AFTER-NEXT:    [[TMP7:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32>
3937; SINK-AFTER-NEXT:    [[TMP8:%.*]] = add nsw <4 x i32> [[TMP7]], <i32 2, i32 2, i32 2, i32 2>
3938; SINK-AFTER-NEXT:    [[TMP9:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
3939; SINK-AFTER-NEXT:    [[TMP10:%.*]] = mul nsw <4 x i32> [[TMP8]], [[TMP9]]
3940; SINK-AFTER-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]]
3941; SINK-AFTER-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[TMP11]], i32 0
3942; SINK-AFTER-NEXT:    [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <4 x i32>*
3943; SINK-AFTER-NEXT:    store <4 x i32> [[TMP10]], <4 x i32>* [[TMP13]], align 4
3944; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
3945; SINK-AFTER-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3946; SINK-AFTER-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP48:![0-9]+]]
3947; SINK-AFTER:       middle.block:
3948; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
3949; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
3950; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
3951; SINK-AFTER-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
3952; SINK-AFTER:       scalar.ph:
3953; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_MEMCHECK]] ], [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
3954; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ], [ 0, [[VECTOR_MEMCHECK]] ]
3955; SINK-AFTER-NEXT:    br label [[FOR_BODY:%.*]]
3956; SINK-AFTER:       for.body:
3957; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[FOR_BODY]] ]
3958; SINK-AFTER-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
3959; SINK-AFTER-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
3960; SINK-AFTER-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 2
3961; SINK-AFTER-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
3962; SINK-AFTER-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
3963; SINK-AFTER-NEXT:    [[TMP15]] = load i16, i16* [[ARRAYIDX2]], align 2
3964; SINK-AFTER-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP15]] to i32
3965; SINK-AFTER-NEXT:    [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]]
3966; SINK-AFTER-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
3967; SINK-AFTER-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
3968; SINK-AFTER-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
3969; SINK-AFTER-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]]
3970; SINK-AFTER:       for.end:
3971; SINK-AFTER-NEXT:    ret void
3972;
3973entry:
3974  %.pre = load i16, i16* %a
3975  br label %for.body
3976
3977for.body:
3978  %0 = phi i16 [ %.pre, %entry ], [ %1, %for.body ]
3979  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
3980  %conv = sext i16 %0 to i32
3981  %add = add nsw i32 %conv, 2
3982  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
3983  %arrayidx2 = getelementptr inbounds i16, i16* %a, i64 %indvars.iv.next
3984  %1 = load i16, i16* %arrayidx2
3985  %conv3 = sext i16 %1 to i32
3986  %mul = mul nsw i32 %add, %conv3
3987  %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
3988  store i32 %mul, i32* %arrayidx5
3989  %exitcond = icmp eq i64 %indvars.iv.next, %n
3990  br i1 %exitcond, label %for.end, label %for.body
3991
3992for.end:
3993  ret void
3994}
3995
3996; Do not sink branches: While branches are if-converted and do not require
3997; sinking, instructions with side effects (e.g. loads) conditioned by those
3998; branches will become users of the condition bit after vectorization and would
3999; need to be sunk if the loop is vectorized.
4000define void @do_not_sink_branch(i32 %x, i32* %in, i32* %out, i32 %tc) local_unnamed_addr #0 {
4001; CHECK-LABEL: @do_not_sink_branch(
4002; CHECK-NEXT:  entry:
4003; CHECK-NEXT:    [[CMP530:%.*]] = icmp sgt i32 [[TC:%.*]], 0
4004; CHECK-NEXT:    br label [[FOR_BODY4:%.*]]
4005; CHECK:       for.body4:
4006; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[COND_END:%.*]] ]
4007; CHECK-NEXT:    [[CMP534:%.*]] = phi i1 [ [[CMP530]], [[ENTRY]] ], [ [[CMP5:%.*]], [[COND_END]] ]
4008; CHECK-NEXT:    br i1 [[CMP534]], label [[COND_TRUE:%.*]], label [[COND_END]]
4009; CHECK:       cond.true:
4010; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[INDVARS_IV]] to i64
4011; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 [[TMP0]]
4012; CHECK-NEXT:    [[IN_VAL:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
4013; CHECK-NEXT:    br label [[COND_END]]
4014; CHECK:       cond.end:
4015; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[IN_VAL]], [[COND_TRUE]] ], [ 0, [[FOR_BODY4]] ]
4016; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[INDVARS_IV]] to i64
4017; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 [[TMP1]]
4018; CHECK-NEXT:    store i32 [[COND]], i32* [[ARRAYIDX8]], align 4
4019; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
4020; CHECK-NEXT:    [[CMP5]] = icmp slt i32 [[INDVARS_IV_NEXT]], [[TC]]
4021; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[X:%.*]]
4022; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END12_LOOPEXIT:%.*]], label [[FOR_BODY4]]
4023; CHECK:       for.end12.loopexit:
4024; CHECK-NEXT:    ret void
4025;
4026; UNROLL-LABEL: @do_not_sink_branch(
4027; UNROLL-NEXT:  entry:
4028; UNROLL-NEXT:    [[CMP530:%.*]] = icmp sgt i32 [[TC:%.*]], 0
4029; UNROLL-NEXT:    br label [[FOR_BODY4:%.*]]
4030; UNROLL:       for.body4:
4031; UNROLL-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[COND_END:%.*]] ]
4032; UNROLL-NEXT:    [[CMP534:%.*]] = phi i1 [ [[CMP530]], [[ENTRY]] ], [ [[CMP5:%.*]], [[COND_END]] ]
4033; UNROLL-NEXT:    br i1 [[CMP534]], label [[COND_TRUE:%.*]], label [[COND_END]]
4034; UNROLL:       cond.true:
4035; UNROLL-NEXT:    [[TMP0:%.*]] = zext i32 [[INDVARS_IV]] to i64
4036; UNROLL-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 [[TMP0]]
4037; UNROLL-NEXT:    [[IN_VAL:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
4038; UNROLL-NEXT:    br label [[COND_END]]
4039; UNROLL:       cond.end:
4040; UNROLL-NEXT:    [[COND:%.*]] = phi i32 [ [[IN_VAL]], [[COND_TRUE]] ], [ 0, [[FOR_BODY4]] ]
4041; UNROLL-NEXT:    [[TMP1:%.*]] = zext i32 [[INDVARS_IV]] to i64
4042; UNROLL-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 [[TMP1]]
4043; UNROLL-NEXT:    store i32 [[COND]], i32* [[ARRAYIDX8]], align 4
4044; UNROLL-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
4045; UNROLL-NEXT:    [[CMP5]] = icmp slt i32 [[INDVARS_IV_NEXT]], [[TC]]
4046; UNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[X:%.*]]
4047; UNROLL-NEXT:    br i1 [[EXITCOND]], label [[FOR_END12_LOOPEXIT:%.*]], label [[FOR_BODY4]]
4048; UNROLL:       for.end12.loopexit:
4049; UNROLL-NEXT:    ret void
4050;
4051; UNROLL-NO-IC-LABEL: @do_not_sink_branch(
4052; UNROLL-NO-IC-NEXT:  entry:
4053; UNROLL-NO-IC-NEXT:    [[CMP530:%.*]] = icmp slt i32 0, [[TC:%.*]]
4054; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY4:%.*]]
4055; UNROLL-NO-IC:       for.body4:
4056; UNROLL-NO-IC-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[COND_END:%.*]] ]
4057; UNROLL-NO-IC-NEXT:    [[CMP534:%.*]] = phi i1 [ [[CMP530]], [[ENTRY]] ], [ [[CMP5:%.*]], [[COND_END]] ]
4058; UNROLL-NO-IC-NEXT:    br i1 [[CMP534]], label [[COND_TRUE:%.*]], label [[COND_END]]
4059; UNROLL-NO-IC:       cond.true:
4060; UNROLL-NO-IC-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i32 [[INDVARS_IV]]
4061; UNROLL-NO-IC-NEXT:    [[IN_VAL:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
4062; UNROLL-NO-IC-NEXT:    br label [[COND_END]]
4063; UNROLL-NO-IC:       cond.end:
4064; UNROLL-NO-IC-NEXT:    [[COND:%.*]] = phi i32 [ [[IN_VAL]], [[COND_TRUE]] ], [ 0, [[FOR_BODY4]] ]
4065; UNROLL-NO-IC-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i32 [[INDVARS_IV]]
4066; UNROLL-NO-IC-NEXT:    store i32 [[COND]], i32* [[ARRAYIDX8]], align 4
4067; UNROLL-NO-IC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
4068; UNROLL-NO-IC-NEXT:    [[CMP5]] = icmp slt i32 [[INDVARS_IV_NEXT]], [[TC]]
4069; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[X:%.*]]
4070; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[FOR_END12_LOOPEXIT:%.*]], label [[FOR_BODY4]]
4071; UNROLL-NO-IC:       for.end12.loopexit:
4072; UNROLL-NO-IC-NEXT:    ret void
4073;
4074; UNROLL-NO-VF-LABEL: @do_not_sink_branch(
4075; UNROLL-NO-VF-NEXT:  entry:
4076; UNROLL-NO-VF-NEXT:    [[CMP530:%.*]] = icmp slt i32 0, [[TC:%.*]]
4077; UNROLL-NO-VF-NEXT:    br label [[FOR_BODY4:%.*]]
4078; UNROLL-NO-VF:       for.body4:
4079; UNROLL-NO-VF-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[COND_END:%.*]] ]
4080; UNROLL-NO-VF-NEXT:    [[CMP534:%.*]] = phi i1 [ [[CMP530]], [[ENTRY]] ], [ [[CMP5:%.*]], [[COND_END]] ]
4081; UNROLL-NO-VF-NEXT:    br i1 [[CMP534]], label [[COND_TRUE:%.*]], label [[COND_END]]
4082; UNROLL-NO-VF:       cond.true:
4083; UNROLL-NO-VF-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i32 [[INDVARS_IV]]
4084; UNROLL-NO-VF-NEXT:    [[IN_VAL:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
4085; UNROLL-NO-VF-NEXT:    br label [[COND_END]]
4086; UNROLL-NO-VF:       cond.end:
4087; UNROLL-NO-VF-NEXT:    [[COND:%.*]] = phi i32 [ [[IN_VAL]], [[COND_TRUE]] ], [ 0, [[FOR_BODY4]] ]
4088; UNROLL-NO-VF-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i32 [[INDVARS_IV]]
4089; UNROLL-NO-VF-NEXT:    store i32 [[COND]], i32* [[ARRAYIDX8]], align 4
4090; UNROLL-NO-VF-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
4091; UNROLL-NO-VF-NEXT:    [[CMP5]] = icmp slt i32 [[INDVARS_IV_NEXT]], [[TC]]
4092; UNROLL-NO-VF-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[X:%.*]]
4093; UNROLL-NO-VF-NEXT:    br i1 [[EXITCOND]], label [[FOR_END12_LOOPEXIT:%.*]], label [[FOR_BODY4]]
4094; UNROLL-NO-VF:       for.end12.loopexit:
4095; UNROLL-NO-VF-NEXT:    ret void
4096;
4097; SINK-AFTER-LABEL: @do_not_sink_branch(
4098; SINK-AFTER-NEXT:  entry:
4099; SINK-AFTER-NEXT:    [[CMP530:%.*]] = icmp slt i32 0, [[TC:%.*]]
4100; SINK-AFTER-NEXT:    br label [[FOR_BODY4:%.*]]
4101; SINK-AFTER:       for.body4:
4102; SINK-AFTER-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[COND_END:%.*]] ]
4103; SINK-AFTER-NEXT:    [[CMP534:%.*]] = phi i1 [ [[CMP530]], [[ENTRY]] ], [ [[CMP5:%.*]], [[COND_END]] ]
4104; SINK-AFTER-NEXT:    br i1 [[CMP534]], label [[COND_TRUE:%.*]], label [[COND_END]]
4105; SINK-AFTER:       cond.true:
4106; SINK-AFTER-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i32 [[INDVARS_IV]]
4107; SINK-AFTER-NEXT:    [[IN_VAL:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
4108; SINK-AFTER-NEXT:    br label [[COND_END]]
4109; SINK-AFTER:       cond.end:
4110; SINK-AFTER-NEXT:    [[COND:%.*]] = phi i32 [ [[IN_VAL]], [[COND_TRUE]] ], [ 0, [[FOR_BODY4]] ]
4111; SINK-AFTER-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i32 [[INDVARS_IV]]
4112; SINK-AFTER-NEXT:    store i32 [[COND]], i32* [[ARRAYIDX8]], align 4
4113; SINK-AFTER-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
4114; SINK-AFTER-NEXT:    [[CMP5]] = icmp slt i32 [[INDVARS_IV_NEXT]], [[TC]]
4115; SINK-AFTER-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[X:%.*]]
4116; SINK-AFTER-NEXT:    br i1 [[EXITCOND]], label [[FOR_END12_LOOPEXIT:%.*]], label [[FOR_BODY4]]
4117; SINK-AFTER:       for.end12.loopexit:
4118; SINK-AFTER-NEXT:    ret void
4119;
4120entry:
4121  %cmp530 = icmp slt i32 0, %tc
4122  br label %for.body4
4123
4124for.body4:                                        ; preds = %cond.end, %entry
4125  %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %cond.end ]
4126  %cmp534 = phi i1 [ %cmp530, %entry ], [ %cmp5, %cond.end ]
4127  br i1 %cmp534, label %cond.true, label %cond.end
4128
4129cond.true:                                        ; preds = %for.body4
4130  %arrayidx7 = getelementptr inbounds i32, i32* %in, i32 %indvars.iv
4131  %in.val = load i32, i32* %arrayidx7, align 4
4132  br label %cond.end
4133
4134cond.end:                                         ; preds = %for.body4, %cond.true
4135  %cond = phi i32 [ %in.val, %cond.true ], [ 0, %for.body4 ]
4136  %arrayidx8 = getelementptr inbounds i32, i32* %out, i32 %indvars.iv
4137  store i32 %cond, i32* %arrayidx8, align 4
4138  %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
4139  %cmp5 = icmp slt i32 %indvars.iv.next, %tc
4140  %exitcond = icmp eq i32 %indvars.iv.next, %x
4141  br i1 %exitcond, label %for.end12.loopexit, label %for.body4
4142
4143for.end12.loopexit:                               ; preds = %cond.end
4144  ret void
4145}
4146
4147; Dead instructions, like the exit condition are not part of the actual VPlan
4148; and do not need to be sunk. PR44634.
4149define void @sink_dead_inst() {
4150;
4151; CHECK-LABEL: @sink_dead_inst(
4152; CHECK-NEXT:  entry:
4153; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4154; CHECK:       vector.ph:
4155; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
4156; CHECK:       vector.body:
4157; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4158; CHECK-NEXT:    [[TMP0:%.*]] = phi i16 [ -24, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
4159; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
4160; CHECK-NEXT:    [[TMP1]] = add i16 [[TMP0]], 4
4161; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
4162; CHECK-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
4163; CHECK:       middle.block:
4164; CHECK-NEXT:    [[TMP3:%.*]] = or i16 [[TMP0]], 1
4165; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT2:%.*]] = zext i16 [[TMP3]] to i32
4166; CHECK-NEXT:    br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
4167; CHECK:       scalar.ph:
4168; CHECK-NEXT:    [[SCALAR_RECUR_INIT4:%.*]] = phi i32 [ -27, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ]
4169; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ -27, [[ENTRY]] ], [ 13, [[MIDDLE_BLOCK]] ]
4170; CHECK-NEXT:    br label [[FOR_COND:%.*]]
4171; CHECK:       for.cond:
4172; CHECK-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ]
4173; CHECK-NEXT:    [[SCALAR_RECUR5:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT4]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ]
4174; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR5]], 15
4175; CHECK-NEXT:    [[IV_NEXT]] = add i16 [[IV]], 1
4176; CHECK-NEXT:    [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32
4177; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]]
4178; CHECK:       for.end:
4179; CHECK-NEXT:    ret void
4180;
4181; UNROLL-LABEL: @sink_dead_inst(
4182; UNROLL-NEXT:  entry:
4183; UNROLL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4184; UNROLL:       vector.ph:
4185; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
4186; UNROLL:       vector.body:
4187; UNROLL-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4188; UNROLL-NEXT:    [[TMP0:%.*]] = phi i16 [ -24, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
4189; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
4190; UNROLL-NEXT:    [[TMP1]] = add i16 [[TMP0]], 8
4191; UNROLL-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
4192; UNROLL-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
4193; UNROLL:       middle.block:
4194; UNROLL-NEXT:    [[TMP3:%.*]] = or i16 [[TMP0]], 5
4195; UNROLL-NEXT:    [[VECTOR_RECUR_EXTRACT3:%.*]] = zext i16 [[TMP3]] to i32
4196; UNROLL-NEXT:    br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
4197; UNROLL:       scalar.ph:
4198; UNROLL-NEXT:    [[SCALAR_RECUR_INIT5:%.*]] = phi i32 [ -27, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT3]], [[MIDDLE_BLOCK]] ]
4199; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ -27, [[ENTRY]] ], [ 13, [[MIDDLE_BLOCK]] ]
4200; UNROLL-NEXT:    br label [[FOR_COND:%.*]]
4201; UNROLL:       for.cond:
4202; UNROLL-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ]
4203; UNROLL-NEXT:    [[SCALAR_RECUR6:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT5]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ]
4204; UNROLL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR6]], 15
4205; UNROLL-NEXT:    [[IV_NEXT]] = add i16 [[IV]], 1
4206; UNROLL-NEXT:    [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32
4207; UNROLL-NEXT:    br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]]
4208; UNROLL:       for.end:
4209; UNROLL-NEXT:    ret void
4210;
4211; UNROLL-NO-IC-LABEL: @sink_dead_inst(
4212; UNROLL-NO-IC-NEXT:  entry:
4213; UNROLL-NO-IC-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4214; UNROLL-NO-IC:       vector.ph:
4215; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
4216; UNROLL-NO-IC:       vector.body:
4217; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4218; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 -27, i16 -26, i16 -25, i16 -24>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4219; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
4220; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR2:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 -27>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
4221; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], <i16 4, i16 4, i16 4, i16 4>
4222; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add <4 x i16> [[VEC_IND]], <i16 1, i16 1, i16 1, i16 1>
4223; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add <4 x i16> [[STEP_ADD]], <i16 1, i16 1, i16 1, i16 1>
4224; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
4225; UNROLL-NO-IC-NEXT:    [[TMP3]] = zext <4 x i16> [[TMP1]] to <4 x i32>
4226; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR2]], <4 x i32> [[TMP2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
4227; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
4228; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = add <4 x i16> [[TMP0]], <i16 5, i16 5, i16 5, i16 5>
4229; UNROLL-NO-IC-NEXT:    [[TMP7]] = add <4 x i16> [[TMP1]], <i16 5, i16 5, i16 5, i16 5>
4230; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP6]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
4231; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
4232; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
4233; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], <i16 4, i16 4, i16 4, i16 4>
4234; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
4235; UNROLL-NO-IC-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
4236; UNROLL-NO-IC:       middle.block:
4237; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i32 43, 40
4238; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP7]], i32 3
4239; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP7]], i32 2
4240; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
4241; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2
4242; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
4243; UNROLL-NO-IC:       scalar.ph:
4244; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT5:%.*]] = phi i32 [ -27, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT3]], [[MIDDLE_BLOCK]] ]
4245; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, [[ENTRY]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
4246; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 13, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ]
4247; UNROLL-NO-IC-NEXT:    br label [[FOR_COND:%.*]]
4248; UNROLL-NO-IC:       for.cond:
4249; UNROLL-NO-IC-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ]
4250; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ]
4251; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR6:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT5]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ]
4252; UNROLL-NO-IC-NEXT:    [[USE_REC_1:%.*]] = sub i16 [[SCALAR_RECUR]], 10
4253; UNROLL-NO-IC-NEXT:    [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR6]], 15
4254; UNROLL-NO-IC-NEXT:    [[IV_NEXT]] = add i16 [[IV]], 1
4255; UNROLL-NO-IC-NEXT:    [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32
4256; UNROLL-NO-IC-NEXT:    [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5
4257; UNROLL-NO-IC-NEXT:    br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]]
4258; UNROLL-NO-IC:       for.end:
4259; UNROLL-NO-IC-NEXT:    ret void
4260;
4261; UNROLL-NO-VF-LABEL: @sink_dead_inst(
4262; UNROLL-NO-VF-NEXT:  entry:
4263; UNROLL-NO-VF-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4264; UNROLL-NO-VF:       vector.ph:
4265; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
4266; UNROLL-NO-VF:       vector.body:
4267; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4268; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
4269; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR2:%.*]] = phi i32 [ -27, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
4270; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16
4271; UNROLL-NO-VF-NEXT:    [[OFFSET_IDX:%.*]] = add i16 -27, [[TMP0]]
4272; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i16 [[OFFSET_IDX]], 0
4273; UNROLL-NO-VF-NEXT:    [[INDUCTION1:%.*]] = add i16 [[OFFSET_IDX]], 1
4274; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = add i16 [[INDUCTION]], 1
4275; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = add i16 [[INDUCTION1]], 1
4276; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = zext i16 [[TMP1]] to i32
4277; UNROLL-NO-VF-NEXT:    [[TMP4]] = zext i16 [[TMP2]] to i32
4278; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = add i16 [[TMP1]], 5
4279; UNROLL-NO-VF-NEXT:    [[TMP6]] = add i16 [[TMP2]], 5
4280; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
4281; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = icmp eq i32 [[INDEX_NEXT]], 42
4282; UNROLL-NO-VF-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP49:![0-9]+]]
4283; UNROLL-NO-VF:       middle.block:
4284; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i32 43, 42
4285; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
4286; UNROLL-NO-VF:       scalar.ph:
4287; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ -27, [[ENTRY:%.*]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
4288; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, [[ENTRY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
4289; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 15, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ]
4290; UNROLL-NO-VF-NEXT:    br label [[FOR_COND:%.*]]
4291; UNROLL-NO-VF:       for.cond:
4292; UNROLL-NO-VF-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ]
4293; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ]
4294; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR4:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT3]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ]
4295; UNROLL-NO-VF-NEXT:    [[USE_REC_1:%.*]] = sub i16 [[SCALAR_RECUR]], 10
4296; UNROLL-NO-VF-NEXT:    [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR4]], 15
4297; UNROLL-NO-VF-NEXT:    [[IV_NEXT]] = add i16 [[IV]], 1
4298; UNROLL-NO-VF-NEXT:    [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32
4299; UNROLL-NO-VF-NEXT:    [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5
4300; UNROLL-NO-VF-NEXT:    br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]]
4301; UNROLL-NO-VF:       for.end:
4302; UNROLL-NO-VF-NEXT:    ret void
4303;
4304; SINK-AFTER-LABEL: @sink_dead_inst(
4305; SINK-AFTER-NEXT:  entry:
4306; SINK-AFTER-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4307; SINK-AFTER:       vector.ph:
4308; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
4309; SINK-AFTER:       vector.body:
4310; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
4311; SINK-AFTER-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 -27, i16 -26, i16 -25, i16 -24>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
4312; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
4313; SINK-AFTER-NEXT:    [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 -27>, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
4314; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add <4 x i16> [[VEC_IND]], <i16 1, i16 1, i16 1, i16 1>
4315; SINK-AFTER-NEXT:    [[TMP1]] = zext <4 x i16> [[TMP0]] to <4 x i32>
4316; SINK-AFTER-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR1]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
4317; SINK-AFTER-NEXT:    [[TMP3]] = add <4 x i16> [[TMP0]], <i16 5, i16 5, i16 5, i16 5>
4318; SINK-AFTER-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
4319; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
4320; SINK-AFTER-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], <i16 4, i16 4, i16 4, i16 4>
4321; SINK-AFTER-NEXT:    [[TMP6:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
4322; SINK-AFTER-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP50:![0-9]+]]
4323; SINK-AFTER:       middle.block:
4324; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i32 43, 40
4325; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3
4326; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP3]], i32 2
4327; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
4328; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 2
4329; SINK-AFTER-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
4330; SINK-AFTER:       scalar.ph:
4331; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT4:%.*]] = phi i32 [ -27, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ]
4332; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, [[ENTRY]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
4333; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 13, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ]
4334; SINK-AFTER-NEXT:    br label [[FOR_COND:%.*]]
4335; SINK-AFTER:       for.cond:
4336; SINK-AFTER-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ]
4337; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ]
4338; SINK-AFTER-NEXT:    [[SCALAR_RECUR5:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT4]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ]
4339; SINK-AFTER-NEXT:    [[USE_REC_1:%.*]] = sub i16 [[SCALAR_RECUR]], 10
4340; SINK-AFTER-NEXT:    [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR5]], 15
4341; SINK-AFTER-NEXT:    [[IV_NEXT]] = add i16 [[IV]], 1
4342; SINK-AFTER-NEXT:    [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32
4343; SINK-AFTER-NEXT:    [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5
4344; SINK-AFTER-NEXT:    br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]]
4345; SINK-AFTER:       for.end:
4346; SINK-AFTER-NEXT:    ret void
4347;
4348entry:
4349  br label %for.cond
4350
4351for.cond:
4352  %iv = phi i16 [ -27, %entry ], [ %iv.next, %for.cond ]
4353  %rec.1 = phi i16 [ 0, %entry ], [ %rec.1.prev, %for.cond ]
4354  %rec.2 = phi i32 [ -27, %entry ], [ %rec.2.prev, %for.cond ]
4355  %use.rec.1 = sub i16 %rec.1, 10
4356  %cmp = icmp eq i32 %rec.2, 15
4357  %iv.next = add i16 %iv, 1
4358  %rec.2.prev = zext i16 %iv.next to i32
4359  %rec.1.prev = add i16 %iv.next, 5
4360  br i1 %cmp, label %for.end, label %for.cond
4361
4362for.end:
4363  ret void
4364}
4365
4366define i32 @sink_into_replication_region(i32 %y) {
4367;
4368; CHECK-LABEL: @sink_into_replication_region(
4369; CHECK-NEXT:  bb:
4370; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
4371; CHECK-NEXT:    [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
4372; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
4373; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4374; CHECK:       vector.ph:
4375; CHECK-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP1]], 3
4376; CHECK-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -4
4377; CHECK-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[TMP1]], -1
4378; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
4379; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
4380; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
4381; CHECK:       vector.body:
4382; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE8:%.*]] ]
4383; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[PRED_UDIV_CONTINUE8]] ]
4384; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[PRED_UDIV_CONTINUE8]] ]
4385; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
4386; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
4387; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer
4388; CHECK-NEXT:    [[VEC_IV:%.*]] = or <4 x i32> [[BROADCAST_SPLAT2]], <i32 0, i32 1, i32 2, i32 3>
4389; CHECK-NEXT:    [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV]], [[BROADCAST_SPLAT]]
4390; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0
4391; CHECK-NEXT:    br i1 [[TMP3]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
4392; CHECK:       pred.udiv.if:
4393; CHECK-NEXT:    [[TMP4:%.*]] = udiv i32 219220132, [[OFFSET_IDX]]
4394; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
4395; CHECK-NEXT:    br label [[PRED_UDIV_CONTINUE]]
4396; CHECK:       pred.udiv.continue:
4397; CHECK-NEXT:    [[TMP6:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_UDIV_IF]] ]
4398; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
4399; CHECK-NEXT:    br i1 [[TMP7]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]]
4400; CHECK:       pred.udiv.if3:
4401; CHECK-NEXT:    [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], -1
4402; CHECK-NEXT:    [[TMP9:%.*]] = udiv i32 219220132, [[TMP8]]
4403; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP9]], i64 1
4404; CHECK-NEXT:    br label [[PRED_UDIV_CONTINUE4]]
4405; CHECK:       pred.udiv.continue4:
4406; CHECK-NEXT:    [[TMP11:%.*]] = phi <4 x i32> [ [[TMP6]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP10]], [[PRED_UDIV_IF3]] ]
4407; CHECK-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP2]], i64 2
4408; CHECK-NEXT:    br i1 [[TMP12]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]]
4409; CHECK:       pred.udiv.if5:
4410; CHECK-NEXT:    [[TMP13:%.*]] = add i32 [[OFFSET_IDX]], -2
4411; CHECK-NEXT:    [[TMP14:%.*]] = udiv i32 219220132, [[TMP13]]
4412; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP14]], i64 2
4413; CHECK-NEXT:    br label [[PRED_UDIV_CONTINUE6]]
4414; CHECK:       pred.udiv.continue6:
4415; CHECK-NEXT:    [[TMP16:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_UDIV_CONTINUE4]] ], [ [[TMP15]], [[PRED_UDIV_IF5]] ]
4416; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <4 x i1> [[TMP2]], i64 3
4417; CHECK-NEXT:    br i1 [[TMP17]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8]]
4418; CHECK:       pred.udiv.if7:
4419; CHECK-NEXT:    [[TMP18:%.*]] = add i32 [[OFFSET_IDX]], -3
4420; CHECK-NEXT:    [[TMP19:%.*]] = udiv i32 219220132, [[TMP18]]
4421; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP19]], i64 3
4422; CHECK-NEXT:    br label [[PRED_UDIV_CONTINUE8]]
4423; CHECK:       pred.udiv.continue8:
4424; CHECK-NEXT:    [[TMP21]] = phi <4 x i32> [ [[TMP16]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP20]], [[PRED_UDIV_IF7]] ]
4425; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP21]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
4426; CHECK-NEXT:    [[TMP23]] = add <4 x i32> [[VEC_PHI]], [[TMP22]]
4427; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
4428; CHECK-NEXT:    [[TMP24:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4429; CHECK-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52:![0-9]+]], !llvm.loop [[LOOP53:![0-9]+]]
4430; CHECK:       middle.block:
4431; CHECK-NEXT:    [[TMP25:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP23]], <4 x i32> [[VEC_PHI]]
4432; CHECK-NEXT:    [[TMP26:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP25]])
4433; CHECK-NEXT:    br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
4434; CHECK:       scalar.ph:
4435; CHECK-NEXT:    br label [[BB2:%.*]]
4436; CHECK:       bb1:
4437; CHECK-NEXT:    [[VAR:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ]
4438; CHECK-NEXT:    ret i32 [[VAR]]
4439; CHECK:       bb2:
4440; CHECK-NEXT:    br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF54:![0-9]+]], !llvm.loop [[LOOP55:![0-9]+]]
4441;
4442; UNROLL-LABEL: @sink_into_replication_region(
4443; UNROLL-NEXT:  bb:
4444; UNROLL-NEXT:    [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
4445; UNROLL-NEXT:    [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
4446; UNROLL-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
4447; UNROLL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4448; UNROLL:       vector.ph:
4449; UNROLL-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP1]], 7
4450; UNROLL-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
4451; UNROLL-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[TMP1]], -1
4452; UNROLL-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
4453; UNROLL-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
4454; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
4455; UNROLL:       vector.body:
4456; UNROLL-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE19:%.*]] ]
4457; UNROLL-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_UDIV_CONTINUE19]] ]
4458; UNROLL-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_UDIV_CONTINUE19]] ]
4459; UNROLL-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_UDIV_CONTINUE19]] ]
4460; UNROLL-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
4461; UNROLL-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
4462; UNROLL-NEXT:    [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer
4463; UNROLL-NEXT:    [[VEC_IV:%.*]] = or <4 x i32> [[BROADCAST_SPLAT4]], <i32 0, i32 1, i32 2, i32 3>
4464; UNROLL-NEXT:    [[VEC_IV5:%.*]] = or <4 x i32> [[BROADCAST_SPLAT4]], <i32 4, i32 5, i32 6, i32 7>
4465; UNROLL-NEXT:    [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV]], [[BROADCAST_SPLAT]]
4466; UNROLL-NEXT:    [[TMP3:%.*]] = icmp ule <4 x i32> [[VEC_IV5]], [[BROADCAST_SPLAT]]
4467; UNROLL-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0
4468; UNROLL-NEXT:    br i1 [[TMP4]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
4469; UNROLL:       pred.udiv.if:
4470; UNROLL-NEXT:    [[TMP5:%.*]] = udiv i32 219220132, [[OFFSET_IDX]]
4471; UNROLL-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0
4472; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE]]
4473; UNROLL:       pred.udiv.continue:
4474; UNROLL-NEXT:    [[TMP7:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_UDIV_IF]] ]
4475; UNROLL-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
4476; UNROLL-NEXT:    br i1 [[TMP8]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]]
4477; UNROLL:       pred.udiv.if5:
4478; UNROLL-NEXT:    [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], -1
4479; UNROLL-NEXT:    [[TMP10:%.*]] = udiv i32 219220132, [[TMP9]]
4480; UNROLL-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP10]], i64 1
4481; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE7]]
4482; UNROLL:       pred.udiv.continue6:
4483; UNROLL-NEXT:    [[TMP12:%.*]] = phi <4 x i32> [ [[TMP7]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP11]], [[PRED_UDIV_IF6]] ]
4484; UNROLL-NEXT:    [[TMP13:%.*]] = extractelement <4 x i1> [[TMP2]], i64 2
4485; UNROLL-NEXT:    br i1 [[TMP13]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]]
4486; UNROLL:       pred.udiv.if7:
4487; UNROLL-NEXT:    [[TMP14:%.*]] = add i32 [[OFFSET_IDX]], -2
4488; UNROLL-NEXT:    [[TMP15:%.*]] = udiv i32 219220132, [[TMP14]]
4489; UNROLL-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP15]], i64 2
4490; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE9]]
4491; UNROLL:       pred.udiv.continue8:
4492; UNROLL-NEXT:    [[TMP17:%.*]] = phi <4 x i32> [ [[TMP12]], [[PRED_UDIV_CONTINUE7]] ], [ [[TMP16]], [[PRED_UDIV_IF8]] ]
4493; UNROLL-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP2]], i64 3
4494; UNROLL-NEXT:    br i1 [[TMP18]], label [[PRED_UDIV_IF10:%.*]], label [[PRED_UDIV_CONTINUE11:%.*]]
4495; UNROLL:       pred.udiv.if9:
4496; UNROLL-NEXT:    [[TMP19:%.*]] = add i32 [[OFFSET_IDX]], -3
4497; UNROLL-NEXT:    [[TMP20:%.*]] = udiv i32 219220132, [[TMP19]]
4498; UNROLL-NEXT:    [[TMP21:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP20]], i64 3
4499; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE11]]
4500; UNROLL:       pred.udiv.continue10:
4501; UNROLL-NEXT:    [[TMP22:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_UDIV_CONTINUE9]] ], [ [[TMP21]], [[PRED_UDIV_IF10]] ]
4502; UNROLL-NEXT:    [[TMP23:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0
4503; UNROLL-NEXT:    br i1 [[TMP23]], label [[PRED_UDIV_IF12:%.*]], label [[PRED_UDIV_CONTINUE13:%.*]]
4504; UNROLL:       pred.udiv.if11:
4505; UNROLL-NEXT:    [[TMP24:%.*]] = add i32 [[OFFSET_IDX]], -4
4506; UNROLL-NEXT:    [[TMP25:%.*]] = udiv i32 219220132, [[TMP24]]
4507; UNROLL-NEXT:    [[TMP26:%.*]] = insertelement <4 x i32> poison, i32 [[TMP25]], i64 0
4508; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE13]]
4509; UNROLL:       pred.udiv.continue12:
4510; UNROLL-NEXT:    [[TMP27:%.*]] = phi <4 x i32> [ poison, [[PRED_UDIV_CONTINUE11]] ], [ [[TMP26]], [[PRED_UDIV_IF12]] ]
4511; UNROLL-NEXT:    [[TMP28:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1
4512; UNROLL-NEXT:    br i1 [[TMP28]], label [[PRED_UDIV_IF14:%.*]], label [[PRED_UDIV_CONTINUE15:%.*]]
4513; UNROLL:       pred.udiv.if13:
4514; UNROLL-NEXT:    [[TMP29:%.*]] = add i32 [[OFFSET_IDX]], -5
4515; UNROLL-NEXT:    [[TMP30:%.*]] = udiv i32 219220132, [[TMP29]]
4516; UNROLL-NEXT:    [[TMP31:%.*]] = insertelement <4 x i32> [[TMP27]], i32 [[TMP30]], i64 1
4517; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE15]]
4518; UNROLL:       pred.udiv.continue14:
4519; UNROLL-NEXT:    [[TMP32:%.*]] = phi <4 x i32> [ [[TMP27]], [[PRED_UDIV_CONTINUE13]] ], [ [[TMP31]], [[PRED_UDIV_IF14]] ]
4520; UNROLL-NEXT:    [[TMP33:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2
4521; UNROLL-NEXT:    br i1 [[TMP33]], label [[PRED_UDIV_IF16:%.*]], label [[PRED_UDIV_CONTINUE17:%.*]]
4522; UNROLL:       pred.udiv.if15:
4523; UNROLL-NEXT:    [[TMP34:%.*]] = add i32 [[OFFSET_IDX]], -6
4524; UNROLL-NEXT:    [[TMP35:%.*]] = udiv i32 219220132, [[TMP34]]
4525; UNROLL-NEXT:    [[TMP36:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP35]], i64 2
4526; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE17]]
4527; UNROLL:       pred.udiv.continue16:
4528; UNROLL-NEXT:    [[TMP37:%.*]] = phi <4 x i32> [ [[TMP32]], [[PRED_UDIV_CONTINUE15]] ], [ [[TMP36]], [[PRED_UDIV_IF16]] ]
4529; UNROLL-NEXT:    [[TMP38:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3
4530; UNROLL-NEXT:    br i1 [[TMP38]], label [[PRED_UDIV_IF18:%.*]], label [[PRED_UDIV_CONTINUE19]]
4531; UNROLL:       pred.udiv.if17:
4532; UNROLL-NEXT:    [[TMP39:%.*]] = add i32 [[OFFSET_IDX]], -7
4533; UNROLL-NEXT:    [[TMP40:%.*]] = udiv i32 219220132, [[TMP39]]
4534; UNROLL-NEXT:    [[TMP41:%.*]] = insertelement <4 x i32> [[TMP37]], i32 [[TMP40]], i64 3
4535; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE19]]
4536; UNROLL:       pred.udiv.continue18:
4537; UNROLL-NEXT:    [[TMP42]] = phi <4 x i32> [ [[TMP37]], [[PRED_UDIV_CONTINUE17]] ], [ [[TMP41]], [[PRED_UDIV_IF18]] ]
4538; UNROLL-NEXT:    [[TMP43:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP22]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
4539; UNROLL-NEXT:    [[TMP44:%.*]] = shufflevector <4 x i32> [[TMP22]], <4 x i32> [[TMP42]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
4540; UNROLL-NEXT:    [[TMP45]] = add <4 x i32> [[VEC_PHI]], [[TMP43]]
4541; UNROLL-NEXT:    [[TMP46]] = add <4 x i32> [[VEC_PHI2]], [[TMP44]]
4542; UNROLL-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
4543; UNROLL-NEXT:    [[TMP47:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4544; UNROLL-NEXT:    br i1 [[TMP47]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52:![0-9]+]], !llvm.loop [[LOOP53:![0-9]+]]
4545; UNROLL:       middle.block:
4546; UNROLL-NEXT:    [[TMP48:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP46]], <4 x i32> [[VEC_PHI2]]
4547; UNROLL-NEXT:    [[TMP49:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI]]
4548; UNROLL-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP48]], [[TMP49]]
4549; UNROLL-NEXT:    [[TMP50:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
4550; UNROLL-NEXT:    br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
4551; UNROLL:       scalar.ph:
4552; UNROLL-NEXT:    br label [[BB2:%.*]]
4553; UNROLL:       bb1:
4554; UNROLL-NEXT:    [[VAR:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP50]], [[MIDDLE_BLOCK]] ]
4555; UNROLL-NEXT:    ret i32 [[VAR]]
4556; UNROLL:       bb2:
4557; UNROLL-NEXT:    br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF54:![0-9]+]], !llvm.loop [[LOOP55:![0-9]+]]
4558;
4559; UNROLL-NO-IC-LABEL: @sink_into_replication_region(
4560; UNROLL-NO-IC-NEXT:  bb:
4561; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
4562; UNROLL-NO-IC-NEXT:    [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
4563; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
4564; UNROLL-NO-IC-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4565; UNROLL-NO-IC:       vector.ph:
4566; UNROLL-NO-IC-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP1]], 7
4567; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 8
4568; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
4569; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]]
4570; UNROLL-NO-IC-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
4571; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
4572; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
4573; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
4574; UNROLL-NO-IC:       vector.body:
4575; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE19:%.*]] ]
4576; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_UDIV_CONTINUE19]] ]
4577; UNROLL-NO-IC-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_UDIV_CONTINUE19]] ]
4578; UNROLL-NO-IC-NEXT:    [[VEC_PHI2:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[PRED_UDIV_CONTINUE19]] ]
4579; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
4580; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT3:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0
4581; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT4:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT3]], <4 x i32> poison, <4 x i32> zeroinitializer
4582; UNROLL-NO-IC-NEXT:    [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT4]], <i32 0, i32 1, i32 2, i32 3>
4583; UNROLL-NO-IC-NEXT:    [[VEC_IV5:%.*]] = add <4 x i32> [[BROADCAST_SPLAT4]], <i32 4, i32 5, i32 6, i32 7>
4584; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV]], [[BROADCAST_SPLAT]]
4585; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = icmp ule <4 x i32> [[VEC_IV5]], [[BROADCAST_SPLAT]]
4586; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
4587; UNROLL-NO-IC-NEXT:    br i1 [[TMP4]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
4588; UNROLL-NO-IC:       pred.udiv.if:
4589; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], 0
4590; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = udiv i32 219220132, [[TMP5]]
4591; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
4592; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE]]
4593; UNROLL-NO-IC:       pred.udiv.continue:
4594; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_UDIV_IF]] ]
4595; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
4596; UNROLL-NO-IC-NEXT:    br i1 [[TMP9]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]]
4597; UNROLL-NO-IC:       pred.udiv.if5:
4598; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = add i32 [[OFFSET_IDX]], -1
4599; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = udiv i32 219220132, [[TMP10]]
4600; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP11]], i32 1
4601; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE7]]
4602; UNROLL-NO-IC:       pred.udiv.continue6:
4603; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP12]], [[PRED_UDIV_IF6]] ]
4604; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
4605; UNROLL-NO-IC-NEXT:    br i1 [[TMP14]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]]
4606; UNROLL-NO-IC:       pred.udiv.if7:
4607; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = add i32 [[OFFSET_IDX]], -2
4608; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = udiv i32 219220132, [[TMP15]]
4609; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP16]], i32 2
4610; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE9]]
4611; UNROLL-NO-IC:       pred.udiv.continue8:
4612; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_UDIV_CONTINUE7]] ], [ [[TMP17]], [[PRED_UDIV_IF8]] ]
4613; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
4614; UNROLL-NO-IC-NEXT:    br i1 [[TMP19]], label [[PRED_UDIV_IF10:%.*]], label [[PRED_UDIV_CONTINUE11:%.*]]
4615; UNROLL-NO-IC:       pred.udiv.if9:
4616; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = add i32 [[OFFSET_IDX]], -3
4617; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = udiv i32 219220132, [[TMP20]]
4618; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP21]], i32 3
4619; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE11]]
4620; UNROLL-NO-IC:       pred.udiv.continue10:
4621; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_UDIV_CONTINUE9]] ], [ [[TMP22]], [[PRED_UDIV_IF10]] ]
4622; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
4623; UNROLL-NO-IC-NEXT:    br i1 [[TMP24]], label [[PRED_UDIV_IF12:%.*]], label [[PRED_UDIV_CONTINUE13:%.*]]
4624; UNROLL-NO-IC:       pred.udiv.if11:
4625; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = add i32 [[OFFSET_IDX]], -4
4626; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = udiv i32 219220132, [[TMP25]]
4627; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0
4628; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE13]]
4629; UNROLL-NO-IC:       pred.udiv.continue12:
4630; UNROLL-NO-IC-NEXT:    [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_UDIV_CONTINUE11]] ], [ [[TMP27]], [[PRED_UDIV_IF12]] ]
4631; UNROLL-NO-IC-NEXT:    [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
4632; UNROLL-NO-IC-NEXT:    br i1 [[TMP29]], label [[PRED_UDIV_IF14:%.*]], label [[PRED_UDIV_CONTINUE15:%.*]]
4633; UNROLL-NO-IC:       pred.udiv.if13:
4634; UNROLL-NO-IC-NEXT:    [[TMP30:%.*]] = add i32 [[OFFSET_IDX]], -5
4635; UNROLL-NO-IC-NEXT:    [[TMP31:%.*]] = udiv i32 219220132, [[TMP30]]
4636; UNROLL-NO-IC-NEXT:    [[TMP32:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP31]], i32 1
4637; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE15]]
4638; UNROLL-NO-IC:       pred.udiv.continue14:
4639; UNROLL-NO-IC-NEXT:    [[TMP33:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_UDIV_CONTINUE13]] ], [ [[TMP32]], [[PRED_UDIV_IF14]] ]
4640; UNROLL-NO-IC-NEXT:    [[TMP34:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
4641; UNROLL-NO-IC-NEXT:    br i1 [[TMP34]], label [[PRED_UDIV_IF16:%.*]], label [[PRED_UDIV_CONTINUE17:%.*]]
4642; UNROLL-NO-IC:       pred.udiv.if15:
4643; UNROLL-NO-IC-NEXT:    [[TMP35:%.*]] = add i32 [[OFFSET_IDX]], -6
4644; UNROLL-NO-IC-NEXT:    [[TMP36:%.*]] = udiv i32 219220132, [[TMP35]]
4645; UNROLL-NO-IC-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i32 2
4646; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE17]]
4647; UNROLL-NO-IC:       pred.udiv.continue16:
4648; UNROLL-NO-IC-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_UDIV_CONTINUE15]] ], [ [[TMP37]], [[PRED_UDIV_IF16]] ]
4649; UNROLL-NO-IC-NEXT:    [[TMP39:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
4650; UNROLL-NO-IC-NEXT:    br i1 [[TMP39]], label [[PRED_UDIV_IF18:%.*]], label [[PRED_UDIV_CONTINUE19]]
4651; UNROLL-NO-IC:       pred.udiv.if17:
4652; UNROLL-NO-IC-NEXT:    [[TMP40:%.*]] = add i32 [[OFFSET_IDX]], -7
4653; UNROLL-NO-IC-NEXT:    [[TMP41:%.*]] = udiv i32 219220132, [[TMP40]]
4654; UNROLL-NO-IC-NEXT:    [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i32 3
4655; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE19]]
4656; UNROLL-NO-IC:       pred.udiv.continue18:
4657; UNROLL-NO-IC-NEXT:    [[TMP43]] = phi <4 x i32> [ [[TMP38]], [[PRED_UDIV_CONTINUE17]] ], [ [[TMP42]], [[PRED_UDIV_IF18]] ]
4658; UNROLL-NO-IC-NEXT:    [[TMP44:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP23]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
4659; UNROLL-NO-IC-NEXT:    [[TMP45:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> [[TMP43]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
4660; UNROLL-NO-IC-NEXT:    [[TMP46]] = add <4 x i32> [[VEC_PHI]], [[TMP44]]
4661; UNROLL-NO-IC-NEXT:    [[TMP47]] = add <4 x i32> [[VEC_PHI2]], [[TMP45]]
4662; UNROLL-NO-IC-NEXT:    [[TMP48:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP46]], <4 x i32> [[VEC_PHI]]
4663; UNROLL-NO-IC-NEXT:    [[TMP49:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP47]], <4 x i32> [[VEC_PHI2]]
4664; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
4665; UNROLL-NO-IC-NEXT:    [[TMP50:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4666; UNROLL-NO-IC-NEXT:    br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52:![0-9]+]], !llvm.loop [[LOOP53:![0-9]+]]
4667; UNROLL-NO-IC:       middle.block:
4668; UNROLL-NO-IC-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP49]], [[TMP48]]
4669; UNROLL-NO-IC-NEXT:    [[TMP51:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
4670; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP43]], i32 3
4671; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP43]], i32 2
4672; UNROLL-NO-IC-NEXT:    br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
4673; UNROLL-NO-IC:       scalar.ph:
4674; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
4675; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ]
4676; UNROLL-NO-IC-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ]
4677; UNROLL-NO-IC-NEXT:    br label [[BB2:%.*]]
4678; UNROLL-NO-IC:       bb1:
4679; UNROLL-NO-IC-NEXT:    [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ]
4680; UNROLL-NO-IC-NEXT:    ret i32 [[VAR]]
4681; UNROLL-NO-IC:       bb2:
4682; UNROLL-NO-IC-NEXT:    [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4683; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
4684; UNROLL-NO-IC-NEXT:    [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
4685; UNROLL-NO-IC-NEXT:    [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]]
4686; UNROLL-NO-IC-NEXT:    [[VAR7]] = udiv i32 219220132, [[VAR3]]
4687; UNROLL-NO-IC-NEXT:    [[VAR8]] = add nsw i32 [[VAR3]], -1
4688; UNROLL-NO-IC-NEXT:    [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
4689; UNROLL-NO-IC-NEXT:    br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF54:![0-9]+]], !llvm.loop [[LOOP55:![0-9]+]]
4690;
4691; UNROLL-NO-VF-LABEL: @sink_into_replication_region(
4692; UNROLL-NO-VF-NEXT:  bb:
4693; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
4694; UNROLL-NO-VF-NEXT:    [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
4695; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
4696; UNROLL-NO-VF-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4697; UNROLL-NO-VF:       vector.ph:
4698; UNROLL-NO-VF-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP1]], 1
4699; UNROLL-NO-VF-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2
4700; UNROLL-NO-VF-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
4701; UNROLL-NO-VF-NEXT:    [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]]
4702; UNROLL-NO-VF-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
4703; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
4704; UNROLL-NO-VF:       vector.body:
4705; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE5:%.*]] ]
4706; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[PRED_UDIV_CONTINUE5]] ]
4707; UNROLL-NO-VF-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[PRED_UDIV_CONTINUE5]] ]
4708; UNROLL-NO-VF-NEXT:    [[VEC_PHI2:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_UDIV_CONTINUE5]] ]
4709; UNROLL-NO-VF-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
4710; UNROLL-NO-VF-NEXT:    [[VEC_IV:%.*]] = add i32 [[INDEX]], 0
4711; UNROLL-NO-VF-NEXT:    [[VEC_IV3:%.*]] = add i32 [[INDEX]], 1
4712; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = icmp ule i32 [[VEC_IV]], [[TRIP_COUNT_MINUS_1]]
4713; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[VEC_IV3]], [[TRIP_COUNT_MINUS_1]]
4714; UNROLL-NO-VF-NEXT:    br i1 [[TMP2]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
4715; UNROLL-NO-VF:       pred.udiv.if:
4716; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i32 [[OFFSET_IDX]], 0
4717; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = udiv i32 219220132, [[INDUCTION]]
4718; UNROLL-NO-VF-NEXT:    br label [[PRED_UDIV_CONTINUE]]
4719; UNROLL-NO-VF:       pred.udiv.continue:
4720; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_UDIV_IF]] ]
4721; UNROLL-NO-VF-NEXT:    br i1 [[TMP3]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5]]
4722; UNROLL-NO-VF:       pred.udiv.if4:
4723; UNROLL-NO-VF-NEXT:    [[INDUCTION1:%.*]] = add i32 [[OFFSET_IDX]], -1
4724; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = udiv i32 219220132, [[INDUCTION1]]
4725; UNROLL-NO-VF-NEXT:    br label [[PRED_UDIV_CONTINUE5]]
4726; UNROLL-NO-VF:       pred.udiv.continue5:
4727; UNROLL-NO-VF-NEXT:    [[TMP7]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE]] ], [ [[TMP6]], [[PRED_UDIV_IF4]] ]
4728; UNROLL-NO-VF-NEXT:    [[TMP8]] = add i32 [[VEC_PHI]], [[VECTOR_RECUR]]
4729; UNROLL-NO-VF-NEXT:    [[TMP9]] = add i32 [[VEC_PHI2]], [[TMP5]]
4730; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = select i1 [[TMP2]], i32 [[TMP8]], i32 [[VEC_PHI]]
4731; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = select i1 [[TMP3]], i32 [[TMP9]], i32 [[VEC_PHI2]]
4732; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
4733; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4734; UNROLL-NO-VF-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF51:![0-9]+]], !llvm.loop [[LOOP52:![0-9]+]]
4735; UNROLL-NO-VF:       middle.block:
4736; UNROLL-NO-VF-NEXT:    [[BIN_RDX:%.*]] = add i32 [[TMP11]], [[TMP10]]
4737; UNROLL-NO-VF-NEXT:    br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
4738; UNROLL-NO-VF:       scalar.ph:
4739; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
4740; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ]
4741; UNROLL-NO-VF-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
4742; UNROLL-NO-VF-NEXT:    br label [[BB2:%.*]]
4743; UNROLL-NO-VF:       bb1:
4744; UNROLL-NO-VF-NEXT:    [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
4745; UNROLL-NO-VF-NEXT:    ret i32 [[VAR]]
4746; UNROLL-NO-VF:       bb2:
4747; UNROLL-NO-VF-NEXT:    [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4748; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
4749; UNROLL-NO-VF-NEXT:    [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
4750; UNROLL-NO-VF-NEXT:    [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]]
4751; UNROLL-NO-VF-NEXT:    [[VAR7]] = udiv i32 219220132, [[VAR3]]
4752; UNROLL-NO-VF-NEXT:    [[VAR8]] = add nsw i32 [[VAR3]], -1
4753; UNROLL-NO-VF-NEXT:    [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
4754; UNROLL-NO-VF-NEXT:    br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF53:![0-9]+]], !llvm.loop [[LOOP54:![0-9]+]]
4755;
4756; SINK-AFTER-LABEL: @sink_into_replication_region(
4757; SINK-AFTER-NEXT:  bb:
4758; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
4759; SINK-AFTER-NEXT:    [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
4760; SINK-AFTER-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
4761; SINK-AFTER-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4762; SINK-AFTER:       vector.ph:
4763; SINK-AFTER-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP1]], 3
4764; SINK-AFTER-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4
4765; SINK-AFTER-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
4766; SINK-AFTER-NEXT:    [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]]
4767; SINK-AFTER-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
4768; SINK-AFTER-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
4769; SINK-AFTER-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
4770; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
4771; SINK-AFTER:       vector.body:
4772; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE8:%.*]] ]
4773; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[PRED_UDIV_CONTINUE8]] ]
4774; SINK-AFTER-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[PRED_UDIV_CONTINUE8]] ]
4775; SINK-AFTER-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
4776; SINK-AFTER-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0
4777; SINK-AFTER-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer
4778; SINK-AFTER-NEXT:    [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT2]], <i32 0, i32 1, i32 2, i32 3>
4779; SINK-AFTER-NEXT:    [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV]], [[BROADCAST_SPLAT]]
4780; SINK-AFTER-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
4781; SINK-AFTER-NEXT:    br i1 [[TMP3]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
4782; SINK-AFTER:       pred.udiv.if:
4783; SINK-AFTER-NEXT:    [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 0
4784; SINK-AFTER-NEXT:    [[TMP5:%.*]] = udiv i32 219220132, [[TMP4]]
4785; SINK-AFTER-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i32 0
4786; SINK-AFTER-NEXT:    br label [[PRED_UDIV_CONTINUE]]
4787; SINK-AFTER:       pred.udiv.continue:
4788; SINK-AFTER-NEXT:    [[TMP7:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_UDIV_IF]] ]
4789; SINK-AFTER-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
4790; SINK-AFTER-NEXT:    br i1 [[TMP8]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]]
4791; SINK-AFTER:       pred.udiv.if3:
4792; SINK-AFTER-NEXT:    [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], -1
4793; SINK-AFTER-NEXT:    [[TMP10:%.*]] = udiv i32 219220132, [[TMP9]]
4794; SINK-AFTER-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP10]], i32 1
4795; SINK-AFTER-NEXT:    br label [[PRED_UDIV_CONTINUE4]]
4796; SINK-AFTER:       pred.udiv.continue4:
4797; SINK-AFTER-NEXT:    [[TMP12:%.*]] = phi <4 x i32> [ [[TMP7]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP11]], [[PRED_UDIV_IF3]] ]
4798; SINK-AFTER-NEXT:    [[TMP13:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
4799; SINK-AFTER-NEXT:    br i1 [[TMP13]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]]
4800; SINK-AFTER:       pred.udiv.if5:
4801; SINK-AFTER-NEXT:    [[TMP14:%.*]] = add i32 [[OFFSET_IDX]], -2
4802; SINK-AFTER-NEXT:    [[TMP15:%.*]] = udiv i32 219220132, [[TMP14]]
4803; SINK-AFTER-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP15]], i32 2
4804; SINK-AFTER-NEXT:    br label [[PRED_UDIV_CONTINUE6]]
4805; SINK-AFTER:       pred.udiv.continue6:
4806; SINK-AFTER-NEXT:    [[TMP17:%.*]] = phi <4 x i32> [ [[TMP12]], [[PRED_UDIV_CONTINUE4]] ], [ [[TMP16]], [[PRED_UDIV_IF5]] ]
4807; SINK-AFTER-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
4808; SINK-AFTER-NEXT:    br i1 [[TMP18]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8]]
4809; SINK-AFTER:       pred.udiv.if7:
4810; SINK-AFTER-NEXT:    [[TMP19:%.*]] = add i32 [[OFFSET_IDX]], -3
4811; SINK-AFTER-NEXT:    [[TMP20:%.*]] = udiv i32 219220132, [[TMP19]]
4812; SINK-AFTER-NEXT:    [[TMP21:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP20]], i32 3
4813; SINK-AFTER-NEXT:    br label [[PRED_UDIV_CONTINUE8]]
4814; SINK-AFTER:       pred.udiv.continue8:
4815; SINK-AFTER-NEXT:    [[TMP22]] = phi <4 x i32> [ [[TMP17]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP21]], [[PRED_UDIV_IF7]] ]
4816; SINK-AFTER-NEXT:    [[TMP23:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP22]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
4817; SINK-AFTER-NEXT:    [[TMP24]] = add <4 x i32> [[VEC_PHI]], [[TMP23]]
4818; SINK-AFTER-NEXT:    [[TMP25:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]]
4819; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
4820; SINK-AFTER-NEXT:    [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4821; SINK-AFTER-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52:![0-9]+]], !llvm.loop [[LOOP53:![0-9]+]]
4822; SINK-AFTER:       middle.block:
4823; SINK-AFTER-NEXT:    [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP25]])
4824; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP22]], i32 3
4825; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP22]], i32 2
4826; SINK-AFTER-NEXT:    br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
4827; SINK-AFTER:       scalar.ph:
4828; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
4829; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ]
4830; SINK-AFTER-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ]
4831; SINK-AFTER-NEXT:    br label [[BB2:%.*]]
4832; SINK-AFTER:       bb1:
4833; SINK-AFTER-NEXT:    [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ]
4834; SINK-AFTER-NEXT:    ret i32 [[VAR]]
4835; SINK-AFTER:       bb2:
4836; SINK-AFTER-NEXT:    [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4837; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
4838; SINK-AFTER-NEXT:    [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
4839; SINK-AFTER-NEXT:    [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]]
4840; SINK-AFTER-NEXT:    [[VAR7]] = udiv i32 219220132, [[VAR3]]
4841; SINK-AFTER-NEXT:    [[VAR8]] = add nsw i32 [[VAR3]], -1
4842; SINK-AFTER-NEXT:    [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
4843; SINK-AFTER-NEXT:    br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF54:![0-9]+]], !llvm.loop [[LOOP55:![0-9]+]]
4844;
4845bb:
4846  br label %bb2
4847
4848  bb1:                                              ; preds = %bb2
4849  %var = phi i32 [ %var6, %bb2 ]
4850  ret i32 %var
4851
4852  bb2:                                              ; preds = %bb2, %bb
4853  %var3 = phi i32 [ %var8, %bb2 ], [ %y, %bb ]
4854  %var4 = phi i32 [ %var7, %bb2 ], [ 0, %bb ]
4855  %var5 = phi i32 [ %var6, %bb2 ], [ 0, %bb ]
4856  %var6 = add i32 %var5, %var4
4857  %var7 = udiv i32 219220132, %var3
4858  %var8 = add nsw i32 %var3, -1
4859  %var9 = icmp slt i32 %var3, 2
4860  br i1 %var9, label %bb1, label %bb2, !prof !2
4861}
4862
4863define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) {
4864;
4865; CHECK-LABEL: @sink_into_replication_region_multiple(
4866; CHECK-NEXT:  bb:
4867; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
4868; CHECK-NEXT:    [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
4869; CHECK-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
4870; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4871; CHECK:       vector.ph:
4872; CHECK-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP1]], 3
4873; CHECK-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -4
4874; CHECK-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[TMP1]], -1
4875; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
4876; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
4877; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
4878; CHECK:       vector.body:
4879; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE15:%.*]] ]
4880; CHECK-NEXT:    [[VEC_IND2:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[PRED_STORE_CONTINUE15]] ]
4881; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[PRED_STORE_CONTINUE15]] ]
4882; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[PRED_STORE_CONTINUE15]] ]
4883; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
4884; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], -1
4885; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -2
4886; CHECK-NEXT:    [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], -3
4887; CHECK-NEXT:    [[TMP5:%.*]] = icmp ule <4 x i32> [[VEC_IND2]], [[BROADCAST_SPLAT]]
4888; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i64 0
4889; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
4890; CHECK:       pred.udiv.if:
4891; CHECK-NEXT:    [[TMP7:%.*]] = udiv i32 219220132, [[OFFSET_IDX]]
4892; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <4 x i32> poison, i32 [[TMP7]], i64 0
4893; CHECK-NEXT:    br label [[PRED_UDIV_CONTINUE]]
4894; CHECK:       pred.udiv.continue:
4895; CHECK-NEXT:    [[TMP9:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP8]], [[PRED_UDIV_IF]] ]
4896; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP5]], i64 1
4897; CHECK-NEXT:    br i1 [[TMP10]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]]
4898; CHECK:       pred.udiv.if2:
4899; CHECK-NEXT:    [[TMP11:%.*]] = udiv i32 219220132, [[TMP2]]
4900; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP9]], i32 [[TMP11]], i64 1
4901; CHECK-NEXT:    br label [[PRED_UDIV_CONTINUE5]]
4902; CHECK:       pred.udiv.continue3:
4903; CHECK-NEXT:    [[TMP13:%.*]] = phi <4 x i32> [ [[TMP9]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP12]], [[PRED_UDIV_IF4]] ]
4904; CHECK-NEXT:    [[TMP14:%.*]] = extractelement <4 x i1> [[TMP5]], i64 2
4905; CHECK-NEXT:    br i1 [[TMP14]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]]
4906; CHECK:       pred.udiv.if4:
4907; CHECK-NEXT:    [[TMP15:%.*]] = udiv i32 219220132, [[TMP3]]
4908; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP15]], i64 2
4909; CHECK-NEXT:    br label [[PRED_UDIV_CONTINUE7]]
4910; CHECK:       pred.udiv.continue5:
4911; CHECK-NEXT:    [[TMP17:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_UDIV_CONTINUE5]] ], [ [[TMP16]], [[PRED_UDIV_IF6]] ]
4912; CHECK-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP5]], i64 3
4913; CHECK-NEXT:    br i1 [[TMP18]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]]
4914; CHECK:       pred.udiv.if6:
4915; CHECK-NEXT:    [[TMP19:%.*]] = udiv i32 219220132, [[TMP4]]
4916; CHECK-NEXT:    [[TMP20:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP19]], i64 3
4917; CHECK-NEXT:    br label [[PRED_UDIV_CONTINUE9]]
4918; CHECK:       pred.udiv.continue7:
4919; CHECK-NEXT:    [[TMP21]] = phi <4 x i32> [ [[TMP17]], [[PRED_UDIV_CONTINUE7]] ], [ [[TMP20]], [[PRED_UDIV_IF8]] ]
4920; CHECK-NEXT:    [[TMP22:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP21]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
4921; CHECK-NEXT:    [[TMP23]] = add <4 x i32> [[VEC_PHI]], [[TMP22]]
4922; CHECK-NEXT:    [[TMP24:%.*]] = extractelement <4 x i1> [[TMP5]], i64 0
4923; CHECK-NEXT:    br i1 [[TMP24]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
4924; CHECK:       pred.store.if:
4925; CHECK-NEXT:    [[TMP25:%.*]] = sext i32 [[INDEX]] to i64
4926; CHECK-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 [[TMP25]]
4927; CHECK-NEXT:    store i32 [[OFFSET_IDX]], i32* [[TMP26]], align 4
4928; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
4929; CHECK:       pred.store.continue:
4930; CHECK-NEXT:    [[TMP27:%.*]] = extractelement <4 x i1> [[TMP5]], i64 1
4931; CHECK-NEXT:    br i1 [[TMP27]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]]
4932; CHECK:       pred.store.if8:
4933; CHECK-NEXT:    [[TMP28:%.*]] = or i32 [[INDEX]], 1
4934; CHECK-NEXT:    [[TMP29:%.*]] = sext i32 [[TMP28]] to i64
4935; CHECK-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP29]]
4936; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP30]], align 4
4937; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE11]]
4938; CHECK:       pred.store.continue9:
4939; CHECK-NEXT:    [[TMP31:%.*]] = extractelement <4 x i1> [[TMP5]], i64 2
4940; CHECK-NEXT:    br i1 [[TMP31]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]]
4941; CHECK:       pred.store.if10:
4942; CHECK-NEXT:    [[TMP32:%.*]] = or i32 [[INDEX]], 2
4943; CHECK-NEXT:    [[TMP33:%.*]] = sext i32 [[TMP32]] to i64
4944; CHECK-NEXT:    [[TMP34:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP33]]
4945; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP34]], align 4
4946; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE13]]
4947; CHECK:       pred.store.continue11:
4948; CHECK-NEXT:    [[TMP35:%.*]] = extractelement <4 x i1> [[TMP5]], i64 3
4949; CHECK-NEXT:    br i1 [[TMP35]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15]]
4950; CHECK:       pred.store.if12:
4951; CHECK-NEXT:    [[TMP36:%.*]] = or i32 [[INDEX]], 3
4952; CHECK-NEXT:    [[TMP37:%.*]] = sext i32 [[TMP36]] to i64
4953; CHECK-NEXT:    [[TMP38:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP37]]
4954; CHECK-NEXT:    store i32 [[TMP4]], i32* [[TMP38]], align 4
4955; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE15]]
4956; CHECK:       pred.store.continue13:
4957; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
4958; CHECK-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], <i32 4, i32 4, i32 4, i32 4>
4959; CHECK-NEXT:    [[TMP39:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4960; CHECK-NEXT:    br i1 [[TMP39]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52]], !llvm.loop [[LOOP56:![0-9]+]]
4961; CHECK:       middle.block:
4962; CHECK-NEXT:    [[TMP40:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP23]], <4 x i32> [[VEC_PHI]]
4963; CHECK-NEXT:    [[TMP41:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP40]])
4964; CHECK-NEXT:    br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
4965; CHECK:       scalar.ph:
4966; CHECK-NEXT:    br label [[BB2:%.*]]
4967; CHECK:       bb1:
4968; CHECK-NEXT:    [[VAR:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP41]], [[MIDDLE_BLOCK]] ]
4969; CHECK-NEXT:    ret i32 [[VAR]]
4970; CHECK:       bb2:
4971; CHECK-NEXT:    br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF54]], !llvm.loop [[LOOP57:![0-9]+]]
4972;
4973; UNROLL-LABEL: @sink_into_replication_region_multiple(
4974; UNROLL-NEXT:  bb:
4975; UNROLL-NEXT:    [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
4976; UNROLL-NEXT:    [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
4977; UNROLL-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
4978; UNROLL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4979; UNROLL:       vector.ph:
4980; UNROLL-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP1]], 7
4981; UNROLL-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
4982; UNROLL-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add i32 [[TMP1]], -1
4983; UNROLL-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
4984; UNROLL-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
4985; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
4986; UNROLL:       vector.body:
4987; UNROLL-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE35:%.*]] ]
4988; UNROLL-NEXT:    [[VEC_IND3:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[PRED_STORE_CONTINUE35]] ]
4989; UNROLL-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[PRED_STORE_CONTINUE35]] ]
4990; UNROLL-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_STORE_CONTINUE35]] ]
4991; UNROLL-NEXT:    [[VEC_PHI7:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_STORE_CONTINUE35]] ]
4992; UNROLL-NEXT:    [[STEP_ADD4:%.*]] = add <4 x i32> [[VEC_IND3]], <i32 4, i32 4, i32 4, i32 4>
4993; UNROLL-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
4994; UNROLL-NEXT:    [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], -1
4995; UNROLL-NEXT:    [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -2
4996; UNROLL-NEXT:    [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], -3
4997; UNROLL-NEXT:    [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], -4
4998; UNROLL-NEXT:    [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], -5
4999; UNROLL-NEXT:    [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], -6
5000; UNROLL-NEXT:    [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], -7
5001; UNROLL-NEXT:    [[TMP9:%.*]] = icmp ule <4 x i32> [[VEC_IND3]], [[BROADCAST_SPLAT]]
5002; UNROLL-NEXT:    [[TMP10:%.*]] = icmp ule <4 x i32> [[STEP_ADD4]], [[BROADCAST_SPLAT]]
5003; UNROLL-NEXT:    [[TMP11:%.*]] = extractelement <4 x i1> [[TMP9]], i64 0
5004; UNROLL-NEXT:    br i1 [[TMP11]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
5005; UNROLL:       pred.udiv.if:
5006; UNROLL-NEXT:    [[TMP12:%.*]] = udiv i32 219220132, [[OFFSET_IDX]]
5007; UNROLL-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> poison, i32 [[TMP12]], i64 0
5008; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE]]
5009; UNROLL:       pred.udiv.continue:
5010; UNROLL-NEXT:    [[TMP14:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP13]], [[PRED_UDIV_IF]] ]
5011; UNROLL-NEXT:    [[TMP15:%.*]] = extractelement <4 x i1> [[TMP9]], i64 1
5012; UNROLL-NEXT:    br i1 [[TMP15]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]]
5013; UNROLL:       pred.udiv.if4:
5014; UNROLL-NEXT:    [[TMP16:%.*]] = udiv i32 219220132, [[TMP2]]
5015; UNROLL-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP16]], i64 1
5016; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE9]]
5017; UNROLL:       pred.udiv.continue5:
5018; UNROLL-NEXT:    [[TMP18:%.*]] = phi <4 x i32> [ [[TMP14]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP17]], [[PRED_UDIV_IF8]] ]
5019; UNROLL-NEXT:    [[TMP19:%.*]] = extractelement <4 x i1> [[TMP9]], i64 2
5020; UNROLL-NEXT:    br i1 [[TMP19]], label [[PRED_UDIV_IF10:%.*]], label [[PRED_UDIV_CONTINUE11:%.*]]
5021; UNROLL:       pred.udiv.if6:
5022; UNROLL-NEXT:    [[TMP20:%.*]] = udiv i32 219220132, [[TMP3]]
5023; UNROLL-NEXT:    [[TMP21:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP20]], i64 2
5024; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE11]]
5025; UNROLL:       pred.udiv.continue7:
5026; UNROLL-NEXT:    [[TMP22:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_UDIV_CONTINUE9]] ], [ [[TMP21]], [[PRED_UDIV_IF10]] ]
5027; UNROLL-NEXT:    [[TMP23:%.*]] = extractelement <4 x i1> [[TMP9]], i64 3
5028; UNROLL-NEXT:    br i1 [[TMP23]], label [[PRED_UDIV_IF12:%.*]], label [[PRED_UDIV_CONTINUE13:%.*]]
5029; UNROLL:       pred.udiv.if8:
5030; UNROLL-NEXT:    [[TMP24:%.*]] = udiv i32 219220132, [[TMP4]]
5031; UNROLL-NEXT:    [[TMP25:%.*]] = insertelement <4 x i32> [[TMP22]], i32 [[TMP24]], i64 3
5032; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE13]]
5033; UNROLL:       pred.udiv.continue9:
5034; UNROLL-NEXT:    [[TMP26:%.*]] = phi <4 x i32> [ [[TMP22]], [[PRED_UDIV_CONTINUE11]] ], [ [[TMP25]], [[PRED_UDIV_IF12]] ]
5035; UNROLL-NEXT:    [[TMP27:%.*]] = extractelement <4 x i1> [[TMP10]], i64 0
5036; UNROLL-NEXT:    br i1 [[TMP27]], label [[PRED_UDIV_IF14:%.*]], label [[PRED_UDIV_CONTINUE15:%.*]]
5037; UNROLL:       pred.udiv.if10:
5038; UNROLL-NEXT:    [[TMP28:%.*]] = udiv i32 219220132, [[TMP5]]
5039; UNROLL-NEXT:    [[TMP29:%.*]] = insertelement <4 x i32> poison, i32 [[TMP28]], i64 0
5040; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE15]]
5041; UNROLL:       pred.udiv.continue11:
5042; UNROLL-NEXT:    [[TMP30:%.*]] = phi <4 x i32> [ poison, [[PRED_UDIV_CONTINUE13]] ], [ [[TMP29]], [[PRED_UDIV_IF14]] ]
5043; UNROLL-NEXT:    [[TMP31:%.*]] = extractelement <4 x i1> [[TMP10]], i64 1
5044; UNROLL-NEXT:    br i1 [[TMP31]], label [[PRED_UDIV_IF16:%.*]], label [[PRED_UDIV_CONTINUE17:%.*]]
5045; UNROLL:       pred.udiv.if12:
5046; UNROLL-NEXT:    [[TMP32:%.*]] = udiv i32 219220132, [[TMP6]]
5047; UNROLL-NEXT:    [[TMP33:%.*]] = insertelement <4 x i32> [[TMP30]], i32 [[TMP32]], i64 1
5048; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE17]]
5049; UNROLL:       pred.udiv.continue13:
5050; UNROLL-NEXT:    [[TMP34:%.*]] = phi <4 x i32> [ [[TMP30]], [[PRED_UDIV_CONTINUE15]] ], [ [[TMP33]], [[PRED_UDIV_IF16]] ]
5051; UNROLL-NEXT:    [[TMP35:%.*]] = extractelement <4 x i1> [[TMP10]], i64 2
5052; UNROLL-NEXT:    br i1 [[TMP35]], label [[PRED_UDIV_IF18:%.*]], label [[PRED_UDIV_CONTINUE19:%.*]]
5053; UNROLL:       pred.udiv.if14:
5054; UNROLL-NEXT:    [[TMP36:%.*]] = udiv i32 219220132, [[TMP7]]
5055; UNROLL-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP34]], i32 [[TMP36]], i64 2
5056; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE19]]
5057; UNROLL:       pred.udiv.continue15:
5058; UNROLL-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP34]], [[PRED_UDIV_CONTINUE17]] ], [ [[TMP37]], [[PRED_UDIV_IF18]] ]
5059; UNROLL-NEXT:    [[TMP39:%.*]] = extractelement <4 x i1> [[TMP10]], i64 3
5060; UNROLL-NEXT:    br i1 [[TMP39]], label [[PRED_UDIV_IF20:%.*]], label [[PRED_UDIV_CONTINUE21:%.*]]
5061; UNROLL:       pred.udiv.if16:
5062; UNROLL-NEXT:    [[TMP40:%.*]] = udiv i32 219220132, [[TMP8]]
5063; UNROLL-NEXT:    [[TMP41:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP40]], i64 3
5064; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE21]]
5065; UNROLL:       pred.udiv.continue17:
5066; UNROLL-NEXT:    [[TMP42]] = phi <4 x i32> [ [[TMP38]], [[PRED_UDIV_CONTINUE19]] ], [ [[TMP41]], [[PRED_UDIV_IF20]] ]
5067; UNROLL-NEXT:    [[TMP43:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP26]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
5068; UNROLL-NEXT:    [[TMP44:%.*]] = shufflevector <4 x i32> [[TMP26]], <4 x i32> [[TMP42]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
5069; UNROLL-NEXT:    [[TMP45]] = add <4 x i32> [[VEC_PHI]], [[TMP43]]
5070; UNROLL-NEXT:    [[TMP46]] = add <4 x i32> [[VEC_PHI7]], [[TMP44]]
5071; UNROLL-NEXT:    [[TMP47:%.*]] = extractelement <4 x i1> [[TMP9]], i64 0
5072; UNROLL-NEXT:    br i1 [[TMP47]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
5073; UNROLL:       pred.store.if:
5074; UNROLL-NEXT:    [[TMP48:%.*]] = sext i32 [[INDEX]] to i64
5075; UNROLL-NEXT:    [[TMP49:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 [[TMP48]]
5076; UNROLL-NEXT:    store i32 [[OFFSET_IDX]], i32* [[TMP49]], align 4
5077; UNROLL-NEXT:    br label [[PRED_STORE_CONTINUE]]
5078; UNROLL:       pred.store.continue:
5079; UNROLL-NEXT:    [[TMP50:%.*]] = extractelement <4 x i1> [[TMP9]], i64 1
5080; UNROLL-NEXT:    br i1 [[TMP50]], label [[PRED_STORE_IF22:%.*]], label [[PRED_STORE_CONTINUE23:%.*]]
5081; UNROLL:       pred.store.if18:
5082; UNROLL-NEXT:    [[TMP51:%.*]] = or i32 [[INDEX]], 1
5083; UNROLL-NEXT:    [[TMP52:%.*]] = sext i32 [[TMP51]] to i64
5084; UNROLL-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP52]]
5085; UNROLL-NEXT:    store i32 [[TMP2]], i32* [[TMP53]], align 4
5086; UNROLL-NEXT:    br label [[PRED_STORE_CONTINUE23]]
5087; UNROLL:       pred.store.continue19:
5088; UNROLL-NEXT:    [[TMP54:%.*]] = extractelement <4 x i1> [[TMP9]], i64 2
5089; UNROLL-NEXT:    br i1 [[TMP54]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE25:%.*]]
5090; UNROLL:       pred.store.if20:
5091; UNROLL-NEXT:    [[TMP55:%.*]] = or i32 [[INDEX]], 2
5092; UNROLL-NEXT:    [[TMP56:%.*]] = sext i32 [[TMP55]] to i64
5093; UNROLL-NEXT:    [[TMP57:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP56]]
5094; UNROLL-NEXT:    store i32 [[TMP3]], i32* [[TMP57]], align 4
5095; UNROLL-NEXT:    br label [[PRED_STORE_CONTINUE25]]
5096; UNROLL:       pred.store.continue21:
5097; UNROLL-NEXT:    [[TMP58:%.*]] = extractelement <4 x i1> [[TMP9]], i64 3
5098; UNROLL-NEXT:    br i1 [[TMP58]], label [[PRED_STORE_IF26:%.*]], label [[PRED_STORE_CONTINUE27:%.*]]
5099; UNROLL:       pred.store.if22:
5100; UNROLL-NEXT:    [[TMP59:%.*]] = or i32 [[INDEX]], 3
5101; UNROLL-NEXT:    [[TMP60:%.*]] = sext i32 [[TMP59]] to i64
5102; UNROLL-NEXT:    [[TMP61:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP60]]
5103; UNROLL-NEXT:    store i32 [[TMP4]], i32* [[TMP61]], align 4
5104; UNROLL-NEXT:    br label [[PRED_STORE_CONTINUE27]]
5105; UNROLL:       pred.store.continue23:
5106; UNROLL-NEXT:    [[TMP62:%.*]] = extractelement <4 x i1> [[TMP10]], i64 0
5107; UNROLL-NEXT:    br i1 [[TMP62]], label [[PRED_STORE_IF28:%.*]], label [[PRED_STORE_CONTINUE29:%.*]]
5108; UNROLL:       pred.store.if24:
5109; UNROLL-NEXT:    [[TMP63:%.*]] = or i32 [[INDEX]], 4
5110; UNROLL-NEXT:    [[TMP64:%.*]] = sext i32 [[TMP63]] to i64
5111; UNROLL-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP64]]
5112; UNROLL-NEXT:    store i32 [[TMP5]], i32* [[TMP65]], align 4
5113; UNROLL-NEXT:    br label [[PRED_STORE_CONTINUE29]]
5114; UNROLL:       pred.store.continue25:
5115; UNROLL-NEXT:    [[TMP66:%.*]] = extractelement <4 x i1> [[TMP10]], i64 1
5116; UNROLL-NEXT:    br i1 [[TMP66]], label [[PRED_STORE_IF30:%.*]], label [[PRED_STORE_CONTINUE31:%.*]]
5117; UNROLL:       pred.store.if26:
5118; UNROLL-NEXT:    [[TMP67:%.*]] = or i32 [[INDEX]], 5
5119; UNROLL-NEXT:    [[TMP68:%.*]] = sext i32 [[TMP67]] to i64
5120; UNROLL-NEXT:    [[TMP69:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP68]]
5121; UNROLL-NEXT:    store i32 [[TMP6]], i32* [[TMP69]], align 4
5122; UNROLL-NEXT:    br label [[PRED_STORE_CONTINUE31]]
5123; UNROLL:       pred.store.continue27:
5124; UNROLL-NEXT:    [[TMP70:%.*]] = extractelement <4 x i1> [[TMP10]], i64 2
5125; UNROLL-NEXT:    br i1 [[TMP70]], label [[PRED_STORE_IF32:%.*]], label [[PRED_STORE_CONTINUE33:%.*]]
5126; UNROLL:       pred.store.if28:
5127; UNROLL-NEXT:    [[TMP71:%.*]] = or i32 [[INDEX]], 6
5128; UNROLL-NEXT:    [[TMP72:%.*]] = sext i32 [[TMP71]] to i64
5129; UNROLL-NEXT:    [[TMP73:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP72]]
5130; UNROLL-NEXT:    store i32 [[TMP7]], i32* [[TMP73]], align 4
5131; UNROLL-NEXT:    br label [[PRED_STORE_CONTINUE33]]
5132; UNROLL:       pred.store.continue29:
5133; UNROLL-NEXT:    [[TMP74:%.*]] = extractelement <4 x i1> [[TMP10]], i64 3
5134; UNROLL-NEXT:    br i1 [[TMP74]], label [[PRED_STORE_IF34:%.*]], label [[PRED_STORE_CONTINUE35]]
5135; UNROLL:       pred.store.if30:
5136; UNROLL-NEXT:    [[TMP75:%.*]] = or i32 [[INDEX]], 7
5137; UNROLL-NEXT:    [[TMP76:%.*]] = sext i32 [[TMP75]] to i64
5138; UNROLL-NEXT:    [[TMP77:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP76]]
5139; UNROLL-NEXT:    store i32 [[TMP8]], i32* [[TMP77]], align 4
5140; UNROLL-NEXT:    br label [[PRED_STORE_CONTINUE35]]
5141; UNROLL:       pred.store.continue31:
5142; UNROLL-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
5143; UNROLL-NEXT:    [[VEC_IND_NEXT6]] = add <4 x i32> [[VEC_IND3]], <i32 8, i32 8, i32 8, i32 8>
5144; UNROLL-NEXT:    [[TMP78:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
5145; UNROLL-NEXT:    br i1 [[TMP78]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52]], !llvm.loop [[LOOP56:![0-9]+]]
5146; UNROLL:       middle.block:
5147; UNROLL-NEXT:    [[TMP79:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP46]], <4 x i32> [[VEC_PHI7]]
5148; UNROLL-NEXT:    [[TMP80:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI]]
5149; UNROLL-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP79]], [[TMP80]]
5150; UNROLL-NEXT:    [[TMP81:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
5151; UNROLL-NEXT:    br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
5152; UNROLL:       scalar.ph:
5153; UNROLL-NEXT:    br label [[BB2:%.*]]
5154; UNROLL:       bb1:
5155; UNROLL-NEXT:    [[VAR:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP81]], [[MIDDLE_BLOCK]] ]
5156; UNROLL-NEXT:    ret i32 [[VAR]]
5157; UNROLL:       bb2:
5158; UNROLL-NEXT:    br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF54]], !llvm.loop [[LOOP57:![0-9]+]]
5159;
5160; UNROLL-NO-IC-LABEL: @sink_into_replication_region_multiple(
5161; UNROLL-NO-IC-NEXT:  bb:
5162; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
5163; UNROLL-NO-IC-NEXT:    [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
5164; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
5165; UNROLL-NO-IC-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5166; UNROLL-NO-IC:       vector.ph:
5167; UNROLL-NO-IC-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP1]], 7
5168; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 8
5169; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
5170; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]]
5171; UNROLL-NO-IC-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
5172; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
5173; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
5174; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
5175; UNROLL-NO-IC:       vector.body:
5176; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE35:%.*]] ]
5177; UNROLL-NO-IC-NEXT:    [[VEC_IND3:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT6:%.*]], [[PRED_STORE_CONTINUE35]] ]
5178; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_STORE_CONTINUE35]] ]
5179; UNROLL-NO-IC-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_STORE_CONTINUE35]] ]
5180; UNROLL-NO-IC-NEXT:    [[VEC_PHI7:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[PRED_STORE_CONTINUE35]] ]
5181; UNROLL-NO-IC-NEXT:    [[STEP_ADD4:%.*]] = add <4 x i32> [[VEC_IND3]], <i32 4, i32 4, i32 4, i32 4>
5182; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
5183; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0
5184; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1
5185; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], -2
5186; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], -3
5187; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], -4
5188; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], -5
5189; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], -6
5190; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], -7
5191; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = icmp ule <4 x i32> [[VEC_IND3]], [[BROADCAST_SPLAT]]
5192; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = icmp ule <4 x i32> [[STEP_ADD4]], [[BROADCAST_SPLAT]]
5193; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0
5194; UNROLL-NO-IC-NEXT:    br i1 [[TMP12]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
5195; UNROLL-NO-IC:       pred.udiv.if:
5196; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = udiv i32 219220132, [[TMP2]]
5197; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> poison, i32 [[TMP13]], i32 0
5198; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE]]
5199; UNROLL-NO-IC:       pred.udiv.continue:
5200; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP14]], [[PRED_UDIV_IF]] ]
5201; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = extractelement <4 x i1> [[TMP10]], i32 1
5202; UNROLL-NO-IC-NEXT:    br i1 [[TMP16]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]]
5203; UNROLL-NO-IC:       pred.udiv.if4:
5204; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = udiv i32 219220132, [[TMP3]]
5205; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP17]], i32 1
5206; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE9]]
5207; UNROLL-NO-IC:       pred.udiv.continue5:
5208; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = phi <4 x i32> [ [[TMP15]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP18]], [[PRED_UDIV_IF8]] ]
5209; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = extractelement <4 x i1> [[TMP10]], i32 2
5210; UNROLL-NO-IC-NEXT:    br i1 [[TMP20]], label [[PRED_UDIV_IF10:%.*]], label [[PRED_UDIV_CONTINUE11:%.*]]
5211; UNROLL-NO-IC:       pred.udiv.if6:
5212; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = udiv i32 219220132, [[TMP4]]
5213; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP21]], i32 2
5214; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE11]]
5215; UNROLL-NO-IC:       pred.udiv.continue7:
5216; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_UDIV_CONTINUE9]] ], [ [[TMP22]], [[PRED_UDIV_IF10]] ]
5217; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = extractelement <4 x i1> [[TMP10]], i32 3
5218; UNROLL-NO-IC-NEXT:    br i1 [[TMP24]], label [[PRED_UDIV_IF12:%.*]], label [[PRED_UDIV_CONTINUE13:%.*]]
5219; UNROLL-NO-IC:       pred.udiv.if8:
5220; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = udiv i32 219220132, [[TMP5]]
5221; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = insertelement <4 x i32> [[TMP23]], i32 [[TMP25]], i32 3
5222; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE13]]
5223; UNROLL-NO-IC:       pred.udiv.continue9:
5224; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = phi <4 x i32> [ [[TMP23]], [[PRED_UDIV_CONTINUE11]] ], [ [[TMP26]], [[PRED_UDIV_IF12]] ]
5225; UNROLL-NO-IC-NEXT:    [[TMP28:%.*]] = extractelement <4 x i1> [[TMP11]], i32 0
5226; UNROLL-NO-IC-NEXT:    br i1 [[TMP28]], label [[PRED_UDIV_IF14:%.*]], label [[PRED_UDIV_CONTINUE15:%.*]]
5227; UNROLL-NO-IC:       pred.udiv.if10:
5228; UNROLL-NO-IC-NEXT:    [[TMP29:%.*]] = udiv i32 219220132, [[TMP6]]
5229; UNROLL-NO-IC-NEXT:    [[TMP30:%.*]] = insertelement <4 x i32> poison, i32 [[TMP29]], i32 0
5230; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE15]]
5231; UNROLL-NO-IC:       pred.udiv.continue11:
5232; UNROLL-NO-IC-NEXT:    [[TMP31:%.*]] = phi <4 x i32> [ poison, [[PRED_UDIV_CONTINUE13]] ], [ [[TMP30]], [[PRED_UDIV_IF14]] ]
5233; UNROLL-NO-IC-NEXT:    [[TMP32:%.*]] = extractelement <4 x i1> [[TMP11]], i32 1
5234; UNROLL-NO-IC-NEXT:    br i1 [[TMP32]], label [[PRED_UDIV_IF16:%.*]], label [[PRED_UDIV_CONTINUE17:%.*]]
5235; UNROLL-NO-IC:       pred.udiv.if12:
5236; UNROLL-NO-IC-NEXT:    [[TMP33:%.*]] = udiv i32 219220132, [[TMP7]]
5237; UNROLL-NO-IC-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP33]], i32 1
5238; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE17]]
5239; UNROLL-NO-IC:       pred.udiv.continue13:
5240; UNROLL-NO-IC-NEXT:    [[TMP35:%.*]] = phi <4 x i32> [ [[TMP31]], [[PRED_UDIV_CONTINUE15]] ], [ [[TMP34]], [[PRED_UDIV_IF16]] ]
5241; UNROLL-NO-IC-NEXT:    [[TMP36:%.*]] = extractelement <4 x i1> [[TMP11]], i32 2
5242; UNROLL-NO-IC-NEXT:    br i1 [[TMP36]], label [[PRED_UDIV_IF18:%.*]], label [[PRED_UDIV_CONTINUE19:%.*]]
5243; UNROLL-NO-IC:       pred.udiv.if14:
5244; UNROLL-NO-IC-NEXT:    [[TMP37:%.*]] = udiv i32 219220132, [[TMP8]]
5245; UNROLL-NO-IC-NEXT:    [[TMP38:%.*]] = insertelement <4 x i32> [[TMP35]], i32 [[TMP37]], i32 2
5246; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE19]]
5247; UNROLL-NO-IC:       pred.udiv.continue15:
5248; UNROLL-NO-IC-NEXT:    [[TMP39:%.*]] = phi <4 x i32> [ [[TMP35]], [[PRED_UDIV_CONTINUE17]] ], [ [[TMP38]], [[PRED_UDIV_IF18]] ]
5249; UNROLL-NO-IC-NEXT:    [[TMP40:%.*]] = extractelement <4 x i1> [[TMP11]], i32 3
5250; UNROLL-NO-IC-NEXT:    br i1 [[TMP40]], label [[PRED_UDIV_IF20:%.*]], label [[PRED_UDIV_CONTINUE21:%.*]]
5251; UNROLL-NO-IC:       pred.udiv.if16:
5252; UNROLL-NO-IC-NEXT:    [[TMP41:%.*]] = udiv i32 219220132, [[TMP9]]
5253; UNROLL-NO-IC-NEXT:    [[TMP42:%.*]] = insertelement <4 x i32> [[TMP39]], i32 [[TMP41]], i32 3
5254; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE21]]
5255; UNROLL-NO-IC:       pred.udiv.continue17:
5256; UNROLL-NO-IC-NEXT:    [[TMP43]] = phi <4 x i32> [ [[TMP39]], [[PRED_UDIV_CONTINUE19]] ], [ [[TMP42]], [[PRED_UDIV_IF20]] ]
5257; UNROLL-NO-IC-NEXT:    [[TMP44:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP27]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
5258; UNROLL-NO-IC-NEXT:    [[TMP45:%.*]] = shufflevector <4 x i32> [[TMP27]], <4 x i32> [[TMP43]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
5259; UNROLL-NO-IC-NEXT:    [[TMP46]] = add <4 x i32> [[VEC_PHI]], [[TMP44]]
5260; UNROLL-NO-IC-NEXT:    [[TMP47]] = add <4 x i32> [[VEC_PHI7]], [[TMP45]]
5261; UNROLL-NO-IC-NEXT:    [[TMP48:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0
5262; UNROLL-NO-IC-NEXT:    br i1 [[TMP48]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
5263; UNROLL-NO-IC:       pred.store.if:
5264; UNROLL-NO-IC-NEXT:    [[TMP49:%.*]] = add i32 [[INDEX]], 0
5265; UNROLL-NO-IC-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP49]]
5266; UNROLL-NO-IC-NEXT:    store i32 [[TMP2]], i32* [[TMP50]], align 4
5267; UNROLL-NO-IC-NEXT:    br label [[PRED_STORE_CONTINUE]]
5268; UNROLL-NO-IC:       pred.store.continue:
5269; UNROLL-NO-IC-NEXT:    [[TMP51:%.*]] = extractelement <4 x i1> [[TMP10]], i32 1
5270; UNROLL-NO-IC-NEXT:    br i1 [[TMP51]], label [[PRED_STORE_IF22:%.*]], label [[PRED_STORE_CONTINUE23:%.*]]
5271; UNROLL-NO-IC:       pred.store.if18:
5272; UNROLL-NO-IC-NEXT:    [[TMP52:%.*]] = add i32 [[INDEX]], 1
5273; UNROLL-NO-IC-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP52]]
5274; UNROLL-NO-IC-NEXT:    store i32 [[TMP3]], i32* [[TMP53]], align 4
5275; UNROLL-NO-IC-NEXT:    br label [[PRED_STORE_CONTINUE23]]
5276; UNROLL-NO-IC:       pred.store.continue19:
5277; UNROLL-NO-IC-NEXT:    [[TMP54:%.*]] = extractelement <4 x i1> [[TMP10]], i32 2
5278; UNROLL-NO-IC-NEXT:    br i1 [[TMP54]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE25:%.*]]
5279; UNROLL-NO-IC:       pred.store.if20:
5280; UNROLL-NO-IC-NEXT:    [[TMP55:%.*]] = add i32 [[INDEX]], 2
5281; UNROLL-NO-IC-NEXT:    [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP55]]
5282; UNROLL-NO-IC-NEXT:    store i32 [[TMP4]], i32* [[TMP56]], align 4
5283; UNROLL-NO-IC-NEXT:    br label [[PRED_STORE_CONTINUE25]]
5284; UNROLL-NO-IC:       pred.store.continue21:
5285; UNROLL-NO-IC-NEXT:    [[TMP57:%.*]] = extractelement <4 x i1> [[TMP10]], i32 3
5286; UNROLL-NO-IC-NEXT:    br i1 [[TMP57]], label [[PRED_STORE_IF26:%.*]], label [[PRED_STORE_CONTINUE27:%.*]]
5287; UNROLL-NO-IC:       pred.store.if22:
5288; UNROLL-NO-IC-NEXT:    [[TMP58:%.*]] = add i32 [[INDEX]], 3
5289; UNROLL-NO-IC-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP58]]
5290; UNROLL-NO-IC-NEXT:    store i32 [[TMP5]], i32* [[TMP59]], align 4
5291; UNROLL-NO-IC-NEXT:    br label [[PRED_STORE_CONTINUE27]]
5292; UNROLL-NO-IC:       pred.store.continue23:
5293; UNROLL-NO-IC-NEXT:    [[TMP60:%.*]] = extractelement <4 x i1> [[TMP11]], i32 0
5294; UNROLL-NO-IC-NEXT:    br i1 [[TMP60]], label [[PRED_STORE_IF28:%.*]], label [[PRED_STORE_CONTINUE29:%.*]]
5295; UNROLL-NO-IC:       pred.store.if24:
5296; UNROLL-NO-IC-NEXT:    [[TMP61:%.*]] = add i32 [[INDEX]], 4
5297; UNROLL-NO-IC-NEXT:    [[TMP62:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP61]]
5298; UNROLL-NO-IC-NEXT:    store i32 [[TMP6]], i32* [[TMP62]], align 4
5299; UNROLL-NO-IC-NEXT:    br label [[PRED_STORE_CONTINUE29]]
5300; UNROLL-NO-IC:       pred.store.continue25:
5301; UNROLL-NO-IC-NEXT:    [[TMP63:%.*]] = extractelement <4 x i1> [[TMP11]], i32 1
5302; UNROLL-NO-IC-NEXT:    br i1 [[TMP63]], label [[PRED_STORE_IF30:%.*]], label [[PRED_STORE_CONTINUE31:%.*]]
5303; UNROLL-NO-IC:       pred.store.if26:
5304; UNROLL-NO-IC-NEXT:    [[TMP64:%.*]] = add i32 [[INDEX]], 5
5305; UNROLL-NO-IC-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP64]]
5306; UNROLL-NO-IC-NEXT:    store i32 [[TMP7]], i32* [[TMP65]], align 4
5307; UNROLL-NO-IC-NEXT:    br label [[PRED_STORE_CONTINUE31]]
5308; UNROLL-NO-IC:       pred.store.continue27:
5309; UNROLL-NO-IC-NEXT:    [[TMP66:%.*]] = extractelement <4 x i1> [[TMP11]], i32 2
5310; UNROLL-NO-IC-NEXT:    br i1 [[TMP66]], label [[PRED_STORE_IF32:%.*]], label [[PRED_STORE_CONTINUE33:%.*]]
5311; UNROLL-NO-IC:       pred.store.if28:
5312; UNROLL-NO-IC-NEXT:    [[TMP67:%.*]] = add i32 [[INDEX]], 6
5313; UNROLL-NO-IC-NEXT:    [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP67]]
5314; UNROLL-NO-IC-NEXT:    store i32 [[TMP8]], i32* [[TMP68]], align 4
5315; UNROLL-NO-IC-NEXT:    br label [[PRED_STORE_CONTINUE33]]
5316; UNROLL-NO-IC:       pred.store.continue29:
5317; UNROLL-NO-IC-NEXT:    [[TMP69:%.*]] = extractelement <4 x i1> [[TMP11]], i32 3
5318; UNROLL-NO-IC-NEXT:    br i1 [[TMP69]], label [[PRED_STORE_IF34:%.*]], label [[PRED_STORE_CONTINUE35]]
5319; UNROLL-NO-IC:       pred.store.if30:
5320; UNROLL-NO-IC-NEXT:    [[TMP70:%.*]] = add i32 [[INDEX]], 7
5321; UNROLL-NO-IC-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP70]]
5322; UNROLL-NO-IC-NEXT:    store i32 [[TMP9]], i32* [[TMP71]], align 4
5323; UNROLL-NO-IC-NEXT:    br label [[PRED_STORE_CONTINUE35]]
5324; UNROLL-NO-IC:       pred.store.continue31:
5325; UNROLL-NO-IC-NEXT:    [[TMP72:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP46]], <4 x i32> [[VEC_PHI]]
5326; UNROLL-NO-IC-NEXT:    [[TMP73:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP47]], <4 x i32> [[VEC_PHI7]]
5327; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
5328; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT6]] = add <4 x i32> [[STEP_ADD4]], <i32 4, i32 4, i32 4, i32 4>
5329; UNROLL-NO-IC-NEXT:    [[TMP74:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
5330; UNROLL-NO-IC-NEXT:    br i1 [[TMP74]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52]], !llvm.loop [[LOOP56:![0-9]+]]
5331; UNROLL-NO-IC:       middle.block:
5332; UNROLL-NO-IC-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP73]], [[TMP72]]
5333; UNROLL-NO-IC-NEXT:    [[TMP75:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
5334; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP43]], i32 3
5335; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP43]], i32 2
5336; UNROLL-NO-IC-NEXT:    br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
5337; UNROLL-NO-IC:       scalar.ph:
5338; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
5339; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ]
5340; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ]
5341; UNROLL-NO-IC-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP75]], [[MIDDLE_BLOCK]] ]
5342; UNROLL-NO-IC-NEXT:    br label [[BB2:%.*]]
5343; UNROLL-NO-IC:       bb1:
5344; UNROLL-NO-IC-NEXT:    [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[TMP75]], [[MIDDLE_BLOCK]] ]
5345; UNROLL-NO-IC-NEXT:    ret i32 [[VAR]]
5346; UNROLL-NO-IC:       bb2:
5347; UNROLL-NO-IC-NEXT:    [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
5348; UNROLL-NO-IC-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
5349; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
5350; UNROLL-NO-IC-NEXT:    [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
5351; UNROLL-NO-IC-NEXT:    [[G:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[IV]]
5352; UNROLL-NO-IC-NEXT:    [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]]
5353; UNROLL-NO-IC-NEXT:    [[VAR7]] = udiv i32 219220132, [[VAR3]]
5354; UNROLL-NO-IC-NEXT:    store i32 [[VAR3]], i32* [[G]], align 4
5355; UNROLL-NO-IC-NEXT:    [[VAR8]] = add nsw i32 [[VAR3]], -1
5356; UNROLL-NO-IC-NEXT:    [[IV_NEXT]] = add nsw i32 [[IV]], 1
5357; UNROLL-NO-IC-NEXT:    [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
5358; UNROLL-NO-IC-NEXT:    br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF54]], !llvm.loop [[LOOP57:![0-9]+]]
5359;
5360; UNROLL-NO-VF-LABEL: @sink_into_replication_region_multiple(
5361; UNROLL-NO-VF-NEXT:  bb:
5362; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
5363; UNROLL-NO-VF-NEXT:    [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
5364; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
5365; UNROLL-NO-VF-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5366; UNROLL-NO-VF:       vector.ph:
5367; UNROLL-NO-VF-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP1]], 1
5368; UNROLL-NO-VF-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2
5369; UNROLL-NO-VF-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
5370; UNROLL-NO-VF-NEXT:    [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]]
5371; UNROLL-NO-VF-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
5372; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
5373; UNROLL-NO-VF:       vector.body:
5374; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE10:%.*]] ]
5375; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[PRED_STORE_CONTINUE10]] ]
5376; UNROLL-NO-VF-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[PRED_STORE_CONTINUE10]] ]
5377; UNROLL-NO-VF-NEXT:    [[VEC_PHI5:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_STORE_CONTINUE10]] ]
5378; UNROLL-NO-VF-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
5379; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i32 [[OFFSET_IDX]], 0
5380; UNROLL-NO-VF-NEXT:    [[INDUCTION2:%.*]] = add i32 [[OFFSET_IDX]], -1
5381; UNROLL-NO-VF-NEXT:    [[VEC_IV:%.*]] = add i32 [[INDEX]], 0
5382; UNROLL-NO-VF-NEXT:    [[VEC_IV6:%.*]] = add i32 [[INDEX]], 1
5383; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = icmp ule i32 [[VEC_IV]], [[TRIP_COUNT_MINUS_1]]
5384; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[VEC_IV6]], [[TRIP_COUNT_MINUS_1]]
5385; UNROLL-NO-VF-NEXT:    br i1 [[TMP2]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
5386; UNROLL-NO-VF:       pred.udiv.if:
5387; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = udiv i32 219220132, [[INDUCTION]]
5388; UNROLL-NO-VF-NEXT:    br label [[PRED_UDIV_CONTINUE]]
5389; UNROLL-NO-VF:       pred.udiv.continue:
5390; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_UDIV_IF]] ]
5391; UNROLL-NO-VF-NEXT:    br i1 [[TMP3]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8:%.*]]
5392; UNROLL-NO-VF:       pred.udiv.if7:
5393; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = udiv i32 219220132, [[INDUCTION2]]
5394; UNROLL-NO-VF-NEXT:    br label [[PRED_UDIV_CONTINUE8]]
5395; UNROLL-NO-VF:       pred.udiv.continue8:
5396; UNROLL-NO-VF-NEXT:    [[TMP7]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE]] ], [ [[TMP6]], [[PRED_UDIV_IF7]] ]
5397; UNROLL-NO-VF-NEXT:    [[TMP8]] = add i32 [[VEC_PHI]], [[VECTOR_RECUR]]
5398; UNROLL-NO-VF-NEXT:    [[TMP9]] = add i32 [[VEC_PHI5]], [[TMP5]]
5399; UNROLL-NO-VF-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
5400; UNROLL-NO-VF:       pred.store.if:
5401; UNROLL-NO-VF-NEXT:    [[INDUCTION3:%.*]] = add i32 [[INDEX]], 0
5402; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[INDUCTION3]]
5403; UNROLL-NO-VF-NEXT:    store i32 [[INDUCTION]], i32* [[TMP10]], align 4
5404; UNROLL-NO-VF-NEXT:    br label [[PRED_STORE_CONTINUE]]
5405; UNROLL-NO-VF:       pred.store.continue:
5406; UNROLL-NO-VF-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10]]
5407; UNROLL-NO-VF:       pred.store.if9:
5408; UNROLL-NO-VF-NEXT:    [[INDUCTION4:%.*]] = add i32 [[INDEX]], 1
5409; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[INDUCTION4]]
5410; UNROLL-NO-VF-NEXT:    store i32 [[INDUCTION2]], i32* [[TMP11]], align 4
5411; UNROLL-NO-VF-NEXT:    br label [[PRED_STORE_CONTINUE10]]
5412; UNROLL-NO-VF:       pred.store.continue10:
5413; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = select i1 [[TMP2]], i32 [[TMP8]], i32 [[VEC_PHI]]
5414; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = select i1 [[TMP3]], i32 [[TMP9]], i32 [[VEC_PHI5]]
5415; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
5416; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
5417; UNROLL-NO-VF-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF51]], !llvm.loop [[LOOP55:![0-9]+]]
5418; UNROLL-NO-VF:       middle.block:
5419; UNROLL-NO-VF-NEXT:    [[BIN_RDX:%.*]] = add i32 [[TMP13]], [[TMP12]]
5420; UNROLL-NO-VF-NEXT:    br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
5421; UNROLL-NO-VF:       scalar.ph:
5422; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
5423; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ]
5424; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ]
5425; UNROLL-NO-VF-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
5426; UNROLL-NO-VF-NEXT:    br label [[BB2:%.*]]
5427; UNROLL-NO-VF:       bb1:
5428; UNROLL-NO-VF-NEXT:    [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
5429; UNROLL-NO-VF-NEXT:    ret i32 [[VAR]]
5430; UNROLL-NO-VF:       bb2:
5431; UNROLL-NO-VF-NEXT:    [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
5432; UNROLL-NO-VF-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
5433; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
5434; UNROLL-NO-VF-NEXT:    [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
5435; UNROLL-NO-VF-NEXT:    [[G:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[IV]]
5436; UNROLL-NO-VF-NEXT:    [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]]
5437; UNROLL-NO-VF-NEXT:    [[VAR7]] = udiv i32 219220132, [[VAR3]]
5438; UNROLL-NO-VF-NEXT:    store i32 [[VAR3]], i32* [[G]], align 4
5439; UNROLL-NO-VF-NEXT:    [[VAR8]] = add nsw i32 [[VAR3]], -1
5440; UNROLL-NO-VF-NEXT:    [[IV_NEXT]] = add nsw i32 [[IV]], 1
5441; UNROLL-NO-VF-NEXT:    [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
5442; UNROLL-NO-VF-NEXT:    br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF53]], !llvm.loop [[LOOP56:![0-9]+]]
5443;
5444; SINK-AFTER-LABEL: @sink_into_replication_region_multiple(
5445; SINK-AFTER-NEXT:  bb:
5446; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
5447; SINK-AFTER-NEXT:    [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
5448; SINK-AFTER-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
5449; SINK-AFTER-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5450; SINK-AFTER:       vector.ph:
5451; SINK-AFTER-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP1]], 3
5452; SINK-AFTER-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4
5453; SINK-AFTER-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
5454; SINK-AFTER-NEXT:    [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]]
5455; SINK-AFTER-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
5456; SINK-AFTER-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
5457; SINK-AFTER-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
5458; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
5459; SINK-AFTER:       vector.body:
5460; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE15:%.*]] ]
5461; SINK-AFTER-NEXT:    [[VEC_IND2:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[PRED_STORE_CONTINUE15]] ]
5462; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[PRED_STORE_CONTINUE15]] ]
5463; SINK-AFTER-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[PRED_STORE_CONTINUE15]] ]
5464; SINK-AFTER-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
5465; SINK-AFTER-NEXT:    [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0
5466; SINK-AFTER-NEXT:    [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1
5467; SINK-AFTER-NEXT:    [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], -2
5468; SINK-AFTER-NEXT:    [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], -3
5469; SINK-AFTER-NEXT:    [[TMP6:%.*]] = icmp ule <4 x i32> [[VEC_IND2]], [[BROADCAST_SPLAT]]
5470; SINK-AFTER-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
5471; SINK-AFTER-NEXT:    br i1 [[TMP7]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
5472; SINK-AFTER:       pred.udiv.if:
5473; SINK-AFTER-NEXT:    [[TMP8:%.*]] = udiv i32 219220132, [[TMP2]]
5474; SINK-AFTER-NEXT:    [[TMP9:%.*]] = insertelement <4 x i32> poison, i32 [[TMP8]], i32 0
5475; SINK-AFTER-NEXT:    br label [[PRED_UDIV_CONTINUE]]
5476; SINK-AFTER:       pred.udiv.continue:
5477; SINK-AFTER-NEXT:    [[TMP10:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_UDIV_IF]] ]
5478; SINK-AFTER-NEXT:    [[TMP11:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1
5479; SINK-AFTER-NEXT:    br i1 [[TMP11]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]]
5480; SINK-AFTER:       pred.udiv.if2:
5481; SINK-AFTER-NEXT:    [[TMP12:%.*]] = udiv i32 219220132, [[TMP3]]
5482; SINK-AFTER-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP12]], i32 1
5483; SINK-AFTER-NEXT:    br label [[PRED_UDIV_CONTINUE5]]
5484; SINK-AFTER:       pred.udiv.continue3:
5485; SINK-AFTER-NEXT:    [[TMP14:%.*]] = phi <4 x i32> [ [[TMP10]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP13]], [[PRED_UDIV_IF4]] ]
5486; SINK-AFTER-NEXT:    [[TMP15:%.*]] = extractelement <4 x i1> [[TMP6]], i32 2
5487; SINK-AFTER-NEXT:    br i1 [[TMP15]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]]
5488; SINK-AFTER:       pred.udiv.if4:
5489; SINK-AFTER-NEXT:    [[TMP16:%.*]] = udiv i32 219220132, [[TMP4]]
5490; SINK-AFTER-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP16]], i32 2
5491; SINK-AFTER-NEXT:    br label [[PRED_UDIV_CONTINUE7]]
5492; SINK-AFTER:       pred.udiv.continue5:
5493; SINK-AFTER-NEXT:    [[TMP18:%.*]] = phi <4 x i32> [ [[TMP14]], [[PRED_UDIV_CONTINUE5]] ], [ [[TMP17]], [[PRED_UDIV_IF6]] ]
5494; SINK-AFTER-NEXT:    [[TMP19:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3
5495; SINK-AFTER-NEXT:    br i1 [[TMP19]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]]
5496; SINK-AFTER:       pred.udiv.if6:
5497; SINK-AFTER-NEXT:    [[TMP20:%.*]] = udiv i32 219220132, [[TMP5]]
5498; SINK-AFTER-NEXT:    [[TMP21:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP20]], i32 3
5499; SINK-AFTER-NEXT:    br label [[PRED_UDIV_CONTINUE9]]
5500; SINK-AFTER:       pred.udiv.continue7:
5501; SINK-AFTER-NEXT:    [[TMP22]] = phi <4 x i32> [ [[TMP18]], [[PRED_UDIV_CONTINUE7]] ], [ [[TMP21]], [[PRED_UDIV_IF8]] ]
5502; SINK-AFTER-NEXT:    [[TMP23:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP22]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
5503; SINK-AFTER-NEXT:    [[TMP24]] = add <4 x i32> [[VEC_PHI]], [[TMP23]]
5504; SINK-AFTER-NEXT:    [[TMP25:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
5505; SINK-AFTER-NEXT:    br i1 [[TMP25]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
5506; SINK-AFTER:       pred.store.if:
5507; SINK-AFTER-NEXT:    [[TMP26:%.*]] = add i32 [[INDEX]], 0
5508; SINK-AFTER-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP26]]
5509; SINK-AFTER-NEXT:    store i32 [[TMP2]], i32* [[TMP27]], align 4
5510; SINK-AFTER-NEXT:    br label [[PRED_STORE_CONTINUE]]
5511; SINK-AFTER:       pred.store.continue:
5512; SINK-AFTER-NEXT:    [[TMP28:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1
5513; SINK-AFTER-NEXT:    br i1 [[TMP28]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]]
5514; SINK-AFTER:       pred.store.if8:
5515; SINK-AFTER-NEXT:    [[TMP29:%.*]] = add i32 [[INDEX]], 1
5516; SINK-AFTER-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP29]]
5517; SINK-AFTER-NEXT:    store i32 [[TMP3]], i32* [[TMP30]], align 4
5518; SINK-AFTER-NEXT:    br label [[PRED_STORE_CONTINUE11]]
5519; SINK-AFTER:       pred.store.continue9:
5520; SINK-AFTER-NEXT:    [[TMP31:%.*]] = extractelement <4 x i1> [[TMP6]], i32 2
5521; SINK-AFTER-NEXT:    br i1 [[TMP31]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]]
5522; SINK-AFTER:       pred.store.if10:
5523; SINK-AFTER-NEXT:    [[TMP32:%.*]] = add i32 [[INDEX]], 2
5524; SINK-AFTER-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP32]]
5525; SINK-AFTER-NEXT:    store i32 [[TMP4]], i32* [[TMP33]], align 4
5526; SINK-AFTER-NEXT:    br label [[PRED_STORE_CONTINUE13]]
5527; SINK-AFTER:       pred.store.continue11:
5528; SINK-AFTER-NEXT:    [[TMP34:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3
5529; SINK-AFTER-NEXT:    br i1 [[TMP34]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15]]
5530; SINK-AFTER:       pred.store.if12:
5531; SINK-AFTER-NEXT:    [[TMP35:%.*]] = add i32 [[INDEX]], 3
5532; SINK-AFTER-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP35]]
5533; SINK-AFTER-NEXT:    store i32 [[TMP5]], i32* [[TMP36]], align 4
5534; SINK-AFTER-NEXT:    br label [[PRED_STORE_CONTINUE15]]
5535; SINK-AFTER:       pred.store.continue13:
5536; SINK-AFTER-NEXT:    [[TMP37:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]]
5537; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
5538; SINK-AFTER-NEXT:    [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], <i32 4, i32 4, i32 4, i32 4>
5539; SINK-AFTER-NEXT:    [[TMP38:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
5540; SINK-AFTER-NEXT:    br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF52]], !llvm.loop [[LOOP56:![0-9]+]]
5541; SINK-AFTER:       middle.block:
5542; SINK-AFTER-NEXT:    [[TMP39:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP37]])
5543; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP22]], i32 3
5544; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP22]], i32 2
5545; SINK-AFTER-NEXT:    br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
5546; SINK-AFTER:       scalar.ph:
5547; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
5548; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ]
5549; SINK-AFTER-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ]
5550; SINK-AFTER-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ]
5551; SINK-AFTER-NEXT:    br label [[BB2:%.*]]
5552; SINK-AFTER:       bb1:
5553; SINK-AFTER-NEXT:    [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ]
5554; SINK-AFTER-NEXT:    ret i32 [[VAR]]
5555; SINK-AFTER:       bb2:
5556; SINK-AFTER-NEXT:    [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
5557; SINK-AFTER-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
5558; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
5559; SINK-AFTER-NEXT:    [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
5560; SINK-AFTER-NEXT:    [[G:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[IV]]
5561; SINK-AFTER-NEXT:    [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]]
5562; SINK-AFTER-NEXT:    [[VAR7]] = udiv i32 219220132, [[VAR3]]
5563; SINK-AFTER-NEXT:    store i32 [[VAR3]], i32* [[G]], align 4
5564; SINK-AFTER-NEXT:    [[VAR8]] = add nsw i32 [[VAR3]], -1
5565; SINK-AFTER-NEXT:    [[IV_NEXT]] = add nsw i32 [[IV]], 1
5566; SINK-AFTER-NEXT:    [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
5567; SINK-AFTER-NEXT:    br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF54]], !llvm.loop [[LOOP57:![0-9]+]]
5568;
5569bb:
5570  br label %bb2
5571
5572  bb1:                                              ; preds = %bb2
5573  %var = phi i32 [ %var6, %bb2 ]
5574  ret i32 %var
5575
5576  bb2:                                              ; preds = %bb2, %bb
5577  %var3 = phi i32 [ %var8, %bb2 ], [ %y, %bb ]
5578  %iv = phi i32 [ %iv.next, %bb2 ], [ 0, %bb ]
5579  %var4 = phi i32 [ %var7, %bb2 ], [ 0, %bb ]
5580  %var5 = phi i32 [ %var6, %bb2 ], [ 0, %bb ]
5581  %g = getelementptr inbounds i32, i32* %x, i32 %iv
5582  %var6 = add i32 %var5, %var4
5583  %var7 = udiv i32 219220132, %var3
5584  store i32 %var3, i32* %g, align 4
5585  %var8 = add nsw i32 %var3, -1
5586  %iv.next = add nsw i32 %iv, 1
5587  %var9 = icmp slt i32 %var3, 2
5588  br i1 %var9, label %bb1, label %bb2, !prof !2
5589}
5590
5591; %vec.dead will be marked as dead instruction in the vector loop and no recipe
5592; will be created for it. Make sure a valid sink target is used.
5593define void @sink_after_dead_inst(i32* %A.ptr) {
5594; CHECK-LABEL: @sink_after_dead_inst(
5595; CHECK-NEXT:  entry:
5596; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5597; CHECK:       vector.ph:
5598; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
5599; CHECK:       vector.body:
5600; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5601; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = zext i32 [[INDEX]] to i64
5602; CHECK-NEXT:    [[SEXT:%.*]] = shl i64 [[OFFSET_IDX]], 48
5603; CHECK-NEXT:    [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 48
5604; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, i32* [[A_PTR:%.*]], i64 [[TMP0]]
5605; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>*
5606; CHECK-NEXT:    store <4 x i32> zeroinitializer, <4 x i32>* [[TMP2]], align 4
5607; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
5608; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
5609; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP58:![0-9]+]]
5610; CHECK:       middle.block:
5611; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
5612; CHECK:       scalar.ph:
5613; CHECK-NEXT:    br label [[LOOP:%.*]]
5614; CHECK:       loop:
5615; CHECK-NEXT:    br i1 undef, label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP59:![0-9]+]]
5616; CHECK:       for.end:
5617; CHECK-NEXT:    ret void
5618;
5619; UNROLL-LABEL: @sink_after_dead_inst(
5620; UNROLL-NEXT:  entry:
5621; UNROLL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5622; UNROLL:       vector.ph:
5623; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
5624; UNROLL:       vector.body:
5625; UNROLL-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5626; UNROLL-NEXT:    [[OFFSET_IDX:%.*]] = zext i32 [[INDEX]] to i64
5627; UNROLL-NEXT:    [[SEXT:%.*]] = shl i64 [[OFFSET_IDX]], 48
5628; UNROLL-NEXT:    [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 48
5629; UNROLL-NEXT:    [[TMP1:%.*]] = getelementptr i32, i32* [[A_PTR:%.*]], i64 [[TMP0]]
5630; UNROLL-NEXT:    [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>*
5631; UNROLL-NEXT:    store <4 x i32> zeroinitializer, <4 x i32>* [[TMP2]], align 4
5632; UNROLL-NEXT:    [[TMP3:%.*]] = getelementptr i32, i32* [[TMP1]], i64 4
5633; UNROLL-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
5634; UNROLL-NEXT:    store <4 x i32> zeroinitializer, <4 x i32>* [[TMP4]], align 4
5635; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
5636; UNROLL-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
5637; UNROLL-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP58:![0-9]+]]
5638; UNROLL:       middle.block:
5639; UNROLL-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
5640; UNROLL:       scalar.ph:
5641; UNROLL-NEXT:    br label [[LOOP:%.*]]
5642; UNROLL:       loop:
5643; UNROLL-NEXT:    br i1 undef, label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP59:![0-9]+]]
5644; UNROLL:       for.end:
5645; UNROLL-NEXT:    ret void
5646;
5647; UNROLL-NO-IC-LABEL: @sink_after_dead_inst(
5648; UNROLL-NO-IC-NEXT:  entry:
5649; UNROLL-NO-IC-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5650; UNROLL-NO-IC:       vector.ph:
5651; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
5652; UNROLL-NO-IC:       vector.body:
5653; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5654; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
5655; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
5656; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], <i16 4, i16 4, i16 4, i16 4>
5657; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
5658; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0
5659; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add i16 [[OFFSET_IDX]], 4
5660; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = add <4 x i16> [[VEC_IND]], <i16 1, i16 1, i16 1, i16 1>
5661; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = add <4 x i16> [[STEP_ADD]], <i16 1, i16 1, i16 1, i16 1>
5662; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = or <4 x i16> [[TMP8]], [[TMP8]]
5663; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = or <4 x i16> [[TMP9]], [[TMP9]]
5664; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = zext <4 x i16> [[TMP10]] to <4 x i32>
5665; UNROLL-NO-IC-NEXT:    [[TMP13]] = zext <4 x i16> [[TMP11]] to <4 x i32>
5666; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP12]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
5667; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = shufflevector <4 x i32> [[TMP12]], <4 x i32> [[TMP13]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
5668; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = getelementptr i32, i32* [[A_PTR:%.*]], i16 [[TMP0]]
5669; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[TMP4]]
5670; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = getelementptr i32, i32* [[TMP24]], i32 0
5671; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = bitcast i32* [[TMP26]] to <4 x i32>*
5672; UNROLL-NO-IC-NEXT:    store <4 x i32> zeroinitializer, <4 x i32>* [[TMP27]], align 4
5673; UNROLL-NO-IC-NEXT:    [[TMP28:%.*]] = getelementptr i32, i32* [[TMP24]], i32 4
5674; UNROLL-NO-IC-NEXT:    [[TMP29:%.*]] = bitcast i32* [[TMP28]] to <4 x i32>*
5675; UNROLL-NO-IC-NEXT:    store <4 x i32> zeroinitializer, <4 x i32>* [[TMP29]], align 4
5676; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
5677; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], <i16 4, i16 4, i16 4, i16 4>
5678; UNROLL-NO-IC-NEXT:    [[TMP30:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
5679; UNROLL-NO-IC-NEXT:    br i1 [[TMP30]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP58:![0-9]+]]
5680; UNROLL-NO-IC:       middle.block:
5681; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i32 16, 16
5682; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP13]], i32 3
5683; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP13]], i32 2
5684; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
5685; UNROLL-NO-IC:       scalar.ph:
5686; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
5687; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
5688; UNROLL-NO-IC-NEXT:    br label [[LOOP:%.*]]
5689; UNROLL-NO-IC:       loop:
5690; UNROLL-NO-IC-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
5691; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ]
5692; UNROLL-NO-IC-NEXT:    [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR]], 15
5693; UNROLL-NO-IC-NEXT:    [[C:%.*]] = icmp eq i1 [[CMP]], true
5694; UNROLL-NO-IC-NEXT:    [[VEC_DEAD:%.*]] = and i1 [[C]], true
5695; UNROLL-NO-IC-NEXT:    [[IV_NEXT]] = add i16 [[IV]], 1
5696; UNROLL-NO-IC-NEXT:    [[B1:%.*]] = or i16 [[IV_NEXT]], [[IV_NEXT]]
5697; UNROLL-NO-IC-NEXT:    [[B3:%.*]] = and i1 [[CMP]], [[C]]
5698; UNROLL-NO-IC-NEXT:    [[FOR_PREV]] = zext i16 [[B1]] to i32
5699; UNROLL-NO-IC-NEXT:    [[EXT:%.*]] = zext i1 [[B3]] to i32
5700; UNROLL-NO-IC-NEXT:    [[A_GEP:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[IV]]
5701; UNROLL-NO-IC-NEXT:    store i32 0, i32* [[A_GEP]], align 4
5702; UNROLL-NO-IC-NEXT:    br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP59:![0-9]+]]
5703; UNROLL-NO-IC:       for.end:
5704; UNROLL-NO-IC-NEXT:    ret void
5705;
5706; UNROLL-NO-VF-LABEL: @sink_after_dead_inst(
5707; UNROLL-NO-VF-NEXT:  entry:
5708; UNROLL-NO-VF-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5709; UNROLL-NO-VF:       vector.ph:
5710; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
5711; UNROLL-NO-VF:       vector.body:
5712; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5713; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
5714; UNROLL-NO-VF-NEXT:    [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
5715; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i16 [[OFFSET_IDX]], 0
5716; UNROLL-NO-VF-NEXT:    [[INDUCTION1:%.*]] = add i16 [[OFFSET_IDX]], 1
5717; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i16 [[INDUCTION]], 1
5718; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = add i16 [[INDUCTION1]], 1
5719; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = or i16 [[TMP0]], [[TMP0]]
5720; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = or i16 [[TMP1]], [[TMP1]]
5721; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = zext i16 [[TMP2]] to i32
5722; UNROLL-NO-VF-NEXT:    [[TMP5]] = zext i16 [[TMP3]] to i32
5723; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = getelementptr i32, i32* [[A_PTR:%.*]], i16 [[INDUCTION]]
5724; UNROLL-NO-VF-NEXT:    [[TMP15:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[INDUCTION1]]
5725; UNROLL-NO-VF-NEXT:    store i32 0, i32* [[TMP14]], align 4
5726; UNROLL-NO-VF-NEXT:    store i32 0, i32* [[TMP15]], align 4
5727; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
5728; UNROLL-NO-VF-NEXT:    [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
5729; UNROLL-NO-VF-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP57:![0-9]+]]
5730; UNROLL-NO-VF:       middle.block:
5731; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i32 16, 16
5732; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
5733; UNROLL-NO-VF:       scalar.ph:
5734; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
5735; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
5736; UNROLL-NO-VF-NEXT:    br label [[LOOP:%.*]]
5737; UNROLL-NO-VF:       loop:
5738; UNROLL-NO-VF-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
5739; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ]
5740; UNROLL-NO-VF-NEXT:    [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR]], 15
5741; UNROLL-NO-VF-NEXT:    [[C:%.*]] = icmp eq i1 [[CMP]], true
5742; UNROLL-NO-VF-NEXT:    [[VEC_DEAD:%.*]] = and i1 [[C]], true
5743; UNROLL-NO-VF-NEXT:    [[IV_NEXT]] = add i16 [[IV]], 1
5744; UNROLL-NO-VF-NEXT:    [[B1:%.*]] = or i16 [[IV_NEXT]], [[IV_NEXT]]
5745; UNROLL-NO-VF-NEXT:    [[B3:%.*]] = and i1 [[CMP]], [[C]]
5746; UNROLL-NO-VF-NEXT:    [[FOR_PREV]] = zext i16 [[B1]] to i32
5747; UNROLL-NO-VF-NEXT:    [[EXT:%.*]] = zext i1 [[B3]] to i32
5748; UNROLL-NO-VF-NEXT:    [[A_GEP:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[IV]]
5749; UNROLL-NO-VF-NEXT:    store i32 0, i32* [[A_GEP]], align 4
5750; UNROLL-NO-VF-NEXT:    br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP58:![0-9]+]]
5751; UNROLL-NO-VF:       for.end:
5752; UNROLL-NO-VF-NEXT:    ret void
5753;
5754; SINK-AFTER-LABEL: @sink_after_dead_inst(
5755; SINK-AFTER-NEXT:  entry:
5756; SINK-AFTER-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5757; SINK-AFTER:       vector.ph:
5758; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
5759; SINK-AFTER:       vector.body:
5760; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5761; SINK-AFTER-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
5762; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
5763; SINK-AFTER-NEXT:    [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
5764; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0
5765; SINK-AFTER-NEXT:    [[TMP4:%.*]] = add <4 x i16> [[VEC_IND]], <i16 1, i16 1, i16 1, i16 1>
5766; SINK-AFTER-NEXT:    [[TMP5:%.*]] = or <4 x i16> [[TMP4]], [[TMP4]]
5767; SINK-AFTER-NEXT:    [[TMP6]] = zext <4 x i16> [[TMP5]] to <4 x i32>
5768; SINK-AFTER-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP6]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
5769; SINK-AFTER-NEXT:    [[TMP12:%.*]] = getelementptr i32, i32* [[A_PTR:%.*]], i16 [[TMP0]]
5770; SINK-AFTER-NEXT:    [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 0
5771; SINK-AFTER-NEXT:    [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
5772; SINK-AFTER-NEXT:    store <4 x i32> zeroinitializer, <4 x i32>* [[TMP14]], align 4
5773; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
5774; SINK-AFTER-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], <i16 4, i16 4, i16 4, i16 4>
5775; SINK-AFTER-NEXT:    [[TMP15:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
5776; SINK-AFTER-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP58:![0-9]+]]
5777; SINK-AFTER:       middle.block:
5778; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i32 16, 16
5779; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP6]], i32 3
5780; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP6]], i32 2
5781; SINK-AFTER-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
5782; SINK-AFTER:       scalar.ph:
5783; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
5784; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
5785; SINK-AFTER-NEXT:    br label [[LOOP:%.*]]
5786; SINK-AFTER:       loop:
5787; SINK-AFTER-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
5788; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ]
5789; SINK-AFTER-NEXT:    [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR]], 15
5790; SINK-AFTER-NEXT:    [[C:%.*]] = icmp eq i1 [[CMP]], true
5791; SINK-AFTER-NEXT:    [[VEC_DEAD:%.*]] = and i1 [[C]], true
5792; SINK-AFTER-NEXT:    [[IV_NEXT]] = add i16 [[IV]], 1
5793; SINK-AFTER-NEXT:    [[B1:%.*]] = or i16 [[IV_NEXT]], [[IV_NEXT]]
5794; SINK-AFTER-NEXT:    [[B3:%.*]] = and i1 [[CMP]], [[C]]
5795; SINK-AFTER-NEXT:    [[FOR_PREV]] = zext i16 [[B1]] to i32
5796; SINK-AFTER-NEXT:    [[EXT:%.*]] = zext i1 [[B3]] to i32
5797; SINK-AFTER-NEXT:    [[A_GEP:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[IV]]
5798; SINK-AFTER-NEXT:    store i32 0, i32* [[A_GEP]], align 4
5799; SINK-AFTER-NEXT:    br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP59:![0-9]+]]
5800; SINK-AFTER:       for.end:
5801; SINK-AFTER-NEXT:    ret void
5802entry:
5803  br label %loop
5804
5805loop:
5806  %iv = phi i16 [ 0, %entry ], [ %iv.next, %loop ]
5807  %for = phi i32 [ 0, %entry ], [ %for.prev, %loop ]
5808  %cmp = icmp eq i32 %for, 15
5809  %C = icmp eq i1 %cmp, true
5810  %vec.dead = and i1 %C, 1
5811  %iv.next = add i16 %iv, 1
5812  %B1 = or i16 %iv.next, %iv.next
5813  %B3 = and i1 %cmp, %C
5814  %for.prev = zext i16 %B1 to i32
5815
5816  %ext = zext i1 %B3 to i32
5817  %A.gep = getelementptr i32, i32* %A.ptr, i16 %iv
5818  store i32 0, i32* %A.gep
5819  br i1 %vec.dead, label %for.end, label %loop
5820
5821for.end:
5822  ret void
5823}
5824
5825!2 = !{!"branch_weights", i32 1, i32 1}
5826