1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -dce -instcombine -S | FileCheck %s
3; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -dce -instcombine -S | FileCheck %s --check-prefix=UNROLL
4; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=2 -S | FileCheck %s --check-prefix=UNROLL-NO-IC
5; RUN: opt < %s -loop-vectorize -force-vector-width=1 -force-vector-interleave=2 -S | FileCheck %s --check-prefix=UNROLL-NO-VF
6; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s --check-prefix=SINK-AFTER
7
8target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
9
10; void recurrence_1(int *a, int *b, int n) {
11;   for(int i = 0; i < n; i++)
12;     b[i] =  a[i] + a[i - 1]
13; }
14;
15;
16;
17define void @recurrence_1(i32* readonly noalias %a, i32* noalias %b, i32 %n) {
18; CHECK-LABEL: @recurrence_1(
19; CHECK-NEXT:  entry:
20; CHECK-NEXT:    br label [[FOR_PREHEADER:%.*]]
21; CHECK:       for.preheader:
22; CHECK-NEXT:    [[PRE_LOAD:%.*]] = load i32, i32* [[A:%.*]], align 4
23; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
24; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
25; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
26; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
27; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
28; CHECK:       vector.ph:
29; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
30; CHECK-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD]], i64 3
31; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
32; CHECK:       vector.body:
33; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
34; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
35; CHECK-NEXT:    [[TMP3:%.*]] = or i64 [[INDEX]], 1
36; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]]
37; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>*
38; CHECK-NEXT:    [[WIDE_LOAD]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 4
39; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
40; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
41; CHECK-NEXT:    [[TMP8:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP6]]
42; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>*
43; CHECK-NEXT:    store <4 x i32> [[TMP8]], <4 x i32>* [[TMP9]], align 4
44; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
45; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
46; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
47; CHECK:       middle.block:
48; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
49; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 3
50; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
51; CHECK:       scalar.ph:
52; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
53; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[FOR_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
54; CHECK-NEXT:    br label [[SCALAR_BODY:%.*]]
55; CHECK:       scalar.body:
56; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP11:%.*]], [[SCALAR_BODY]] ]
57; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
58; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
59; CHECK-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
60; CHECK-NEXT:    [[TMP11]] = load i32, i32* [[ARRAYIDX32]], align 4
61; CHECK-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
62; CHECK-NEXT:    [[ADD35:%.*]] = add i32 [[TMP11]], [[SCALAR_RECUR]]
63; CHECK-NEXT:    store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4
64; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
65; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
66; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
67; CHECK:       for.exit:
68; CHECK-NEXT:    ret void
69;
70; UNROLL-LABEL: @recurrence_1(
71; UNROLL-NEXT:  entry:
72; UNROLL-NEXT:    br label [[FOR_PREHEADER:%.*]]
73; UNROLL:       for.preheader:
74; UNROLL-NEXT:    [[PRE_LOAD:%.*]] = load i32, i32* [[A:%.*]], align 4
75; UNROLL-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
76; UNROLL-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
77; UNROLL-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
78; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7
79; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
80; UNROLL:       vector.ph:
81; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584
82; UNROLL-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD]], i64 3
83; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
84; UNROLL:       vector.body:
85; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
86; UNROLL-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ]
87; UNROLL-NEXT:    [[TMP3:%.*]] = or i64 [[INDEX]], 1
88; UNROLL-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]]
89; UNROLL-NEXT:    [[TMP5:%.*]] = bitcast i32* [[TMP4]] to <4 x i32>*
90; UNROLL-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP5]], align 4
91; UNROLL-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i64 4
92; UNROLL-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
93; UNROLL-NEXT:    [[WIDE_LOAD1]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4
94; UNROLL-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
95; UNROLL-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
96; UNROLL-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
97; UNROLL-NEXT:    [[TMP11:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP8]]
98; UNROLL-NEXT:    [[TMP12:%.*]] = add <4 x i32> [[WIDE_LOAD1]], [[TMP9]]
99; UNROLL-NEXT:    [[TMP13:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>*
100; UNROLL-NEXT:    store <4 x i32> [[TMP11]], <4 x i32>* [[TMP13]], align 4
101; UNROLL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i64 4
102; UNROLL-NEXT:    [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <4 x i32>*
103; UNROLL-NEXT:    store <4 x i32> [[TMP12]], <4 x i32>* [[TMP15]], align 4
104; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
105; UNROLL-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
106; UNROLL-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
107; UNROLL:       middle.block:
108; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
109; UNROLL-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD1]], i64 3
110; UNROLL-NEXT:    br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
111; UNROLL:       scalar.ph:
112; UNROLL-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
113; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[FOR_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
114; UNROLL-NEXT:    br label [[SCALAR_BODY:%.*]]
115; UNROLL:       scalar.body:
116; UNROLL-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP17:%.*]], [[SCALAR_BODY]] ]
117; UNROLL-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
118; UNROLL-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
119; UNROLL-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
120; UNROLL-NEXT:    [[TMP17]] = load i32, i32* [[ARRAYIDX32]], align 4
121; UNROLL-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
122; UNROLL-NEXT:    [[ADD35:%.*]] = add i32 [[TMP17]], [[SCALAR_RECUR]]
123; UNROLL-NEXT:    store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4
124; UNROLL-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
125; UNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
126; UNROLL-NEXT:    br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
127; UNROLL:       for.exit:
128; UNROLL-NEXT:    ret void
129;
130; UNROLL-NO-IC-LABEL: @recurrence_1(
131; UNROLL-NO-IC-NEXT:  entry:
132; UNROLL-NO-IC-NEXT:    br label [[FOR_PREHEADER:%.*]]
133; UNROLL-NO-IC:       for.preheader:
134; UNROLL-NO-IC-NEXT:    [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 0
135; UNROLL-NO-IC-NEXT:    [[PRE_LOAD:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4
136; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
137; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
138; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
139; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8
140; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
141; UNROLL-NO-IC:       vector.ph:
142; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8
143; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
144; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD]], i32 3
145; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
146; UNROLL-NO-IC:       vector.body:
147; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
148; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ]
149; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
150; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
151; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = add nuw nsw i64 [[TMP3]], 1
152; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = add nuw nsw i64 [[TMP4]], 1
153; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]]
154; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]]
155; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP7]], i32 0
156; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
157; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4
158; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP7]], i32 4
159; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
160; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD1]] = load <4 x i32>, <4 x i32>* [[TMP12]], align 4
161; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
162; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
163; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP3]]
164; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP4]]
165; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP13]]
166; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = add <4 x i32> [[WIDE_LOAD1]], [[TMP14]]
167; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i32 0
168; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = bitcast i32* [[TMP19]] to <4 x i32>*
169; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP17]], <4 x i32>* [[TMP20]], align 4
170; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i32 4
171; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
172; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP18]], <4 x i32>* [[TMP22]], align 4
173; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
174; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
175; UNROLL-NO-IC-NEXT:    br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
176; UNROLL-NO-IC:       middle.block:
177; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
178; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD1]], i32 3
179; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[WIDE_LOAD1]], i32 2
180; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
181; UNROLL-NO-IC:       scalar.ph:
182; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
183; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
184; UNROLL-NO-IC-NEXT:    br label [[SCALAR_BODY:%.*]]
185; UNROLL-NO-IC:       scalar.body:
186; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP24:%.*]], [[SCALAR_BODY]] ]
187; UNROLL-NO-IC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
188; UNROLL-NO-IC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
189; UNROLL-NO-IC-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
190; UNROLL-NO-IC-NEXT:    [[TMP24]] = load i32, i32* [[ARRAYIDX32]], align 4
191; UNROLL-NO-IC-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
192; UNROLL-NO-IC-NEXT:    [[ADD35:%.*]] = add i32 [[TMP24]], [[SCALAR_RECUR]]
193; UNROLL-NO-IC-NEXT:    store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4
194; UNROLL-NO-IC-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
195; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
196; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
197; UNROLL-NO-IC:       for.exit:
198; UNROLL-NO-IC-NEXT:    ret void
199;
200; UNROLL-NO-VF-LABEL: @recurrence_1(
201; UNROLL-NO-VF-NEXT:  entry:
202; UNROLL-NO-VF-NEXT:    br label [[FOR_PREHEADER:%.*]]
203; UNROLL-NO-VF:       for.preheader:
204; UNROLL-NO-VF-NEXT:    [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 0
205; UNROLL-NO-VF-NEXT:    [[PRE_LOAD:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4
206; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
207; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
208; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
209; UNROLL-NO-VF-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2
210; UNROLL-NO-VF-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
211; UNROLL-NO-VF:       vector.ph:
212; UNROLL-NO-VF-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2
213; UNROLL-NO-VF-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
214; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
215; UNROLL-NO-VF:       vector.body:
216; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
217; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ [[PRE_LOAD]], [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
218; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
219; UNROLL-NO-VF-NEXT:    [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1
220; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[INDUCTION]], 1
221; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = add nuw nsw i64 [[INDUCTION1]], 1
222; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]]
223; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
224; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
225; UNROLL-NO-VF-NEXT:    [[TMP8]] = load i32, i32* [[TMP6]], align 4
226; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDUCTION]]
227; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION1]]
228; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = add i32 [[TMP7]], [[VECTOR_RECUR]]
229; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = add i32 [[TMP8]], [[TMP7]]
230; UNROLL-NO-VF-NEXT:    store i32 [[TMP11]], i32* [[TMP9]], align 4
231; UNROLL-NO-VF-NEXT:    store i32 [[TMP12]], i32* [[TMP10]], align 4
232; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
233; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
234; UNROLL-NO-VF-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
235; UNROLL-NO-VF:       middle.block:
236; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
237; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
238; UNROLL-NO-VF:       scalar.ph:
239; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ]
240; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
241; UNROLL-NO-VF-NEXT:    br label [[SCALAR_BODY:%.*]]
242; UNROLL-NO-VF:       scalar.body:
243; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP14:%.*]], [[SCALAR_BODY]] ]
244; UNROLL-NO-VF-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
245; UNROLL-NO-VF-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
246; UNROLL-NO-VF-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
247; UNROLL-NO-VF-NEXT:    [[TMP14]] = load i32, i32* [[ARRAYIDX32]], align 4
248; UNROLL-NO-VF-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
249; UNROLL-NO-VF-NEXT:    [[ADD35:%.*]] = add i32 [[TMP14]], [[SCALAR_RECUR]]
250; UNROLL-NO-VF-NEXT:    store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4
251; UNROLL-NO-VF-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
252; UNROLL-NO-VF-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
253; UNROLL-NO-VF-NEXT:    br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
254; UNROLL-NO-VF:       for.exit:
255; UNROLL-NO-VF-NEXT:    ret void
256;
257; SINK-AFTER-LABEL: @recurrence_1(
258; SINK-AFTER-NEXT:  entry:
259; SINK-AFTER-NEXT:    br label [[FOR_PREHEADER:%.*]]
260; SINK-AFTER:       for.preheader:
261; SINK-AFTER-NEXT:    [[ARRAYIDX_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 0
262; SINK-AFTER-NEXT:    [[PRE_LOAD:%.*]] = load i32, i32* [[ARRAYIDX_PHI_TRANS_INSERT]], align 4
263; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i32 [[N:%.*]], -1
264; SINK-AFTER-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
265; SINK-AFTER-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
266; SINK-AFTER-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
267; SINK-AFTER-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
268; SINK-AFTER:       vector.ph:
269; SINK-AFTER-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
270; SINK-AFTER-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
271; SINK-AFTER-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD]], i32 3
272; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
273; SINK-AFTER:       vector.body:
274; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
275; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
276; SINK-AFTER-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
277; SINK-AFTER-NEXT:    [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1
278; SINK-AFTER-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
279; SINK-AFTER-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
280; SINK-AFTER-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
281; SINK-AFTER-NEXT:    [[WIDE_LOAD]] = load <4 x i32>, <4 x i32>* [[TMP7]], align 4
282; SINK-AFTER-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
283; SINK-AFTER-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP3]]
284; SINK-AFTER-NEXT:    [[TMP10:%.*]] = add <4 x i32> [[WIDE_LOAD]], [[TMP8]]
285; SINK-AFTER-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0
286; SINK-AFTER-NEXT:    [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
287; SINK-AFTER-NEXT:    store <4 x i32> [[TMP10]], <4 x i32>* [[TMP12]], align 4
288; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
289; SINK-AFTER-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
290; SINK-AFTER-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
291; SINK-AFTER:       middle.block:
292; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
293; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3
294; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 2
295; SINK-AFTER-NEXT:    br i1 [[CMP_N]], label [[FOR_EXIT:%.*]], label [[SCALAR_PH]]
296; SINK-AFTER:       scalar.ph:
297; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
298; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
299; SINK-AFTER-NEXT:    br label [[SCALAR_BODY:%.*]]
300; SINK-AFTER:       scalar.body:
301; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP14:%.*]], [[SCALAR_BODY]] ]
302; SINK-AFTER-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
303; SINK-AFTER-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
304; SINK-AFTER-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV_NEXT]]
305; SINK-AFTER-NEXT:    [[TMP14]] = load i32, i32* [[ARRAYIDX32]], align 4
306; SINK-AFTER-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
307; SINK-AFTER-NEXT:    [[ADD35:%.*]] = add i32 [[TMP14]], [[SCALAR_RECUR]]
308; SINK-AFTER-NEXT:    store i32 [[ADD35]], i32* [[ARRAYIDX34]], align 4
309; SINK-AFTER-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
310; SINK-AFTER-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
311; SINK-AFTER-NEXT:    br i1 [[EXITCOND]], label [[FOR_EXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
312; SINK-AFTER:       for.exit:
313; SINK-AFTER-NEXT:    ret void
314;
315entry:
316  br label %for.preheader
317
318for.preheader:
319  %arrayidx.phi.trans.insert = getelementptr inbounds i32, i32* %a, i64 0
320  %pre_load = load i32, i32* %arrayidx.phi.trans.insert
321  br label %scalar.body
322
323scalar.body:
324  %0 = phi i32 [ %pre_load, %for.preheader ], [ %1, %scalar.body ]
325  %indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %scalar.body ]
326  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
327  %arrayidx32 = getelementptr inbounds i32, i32* %a, i64 %indvars.iv.next
328  %1 = load i32, i32* %arrayidx32
329  %arrayidx34 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
330  %add35 = add i32 %1, %0
331  store i32 %add35, i32* %arrayidx34
332  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
333  %exitcond = icmp eq i32 %lftr.wideiv, %n
334  br i1 %exitcond, label %for.exit, label %scalar.body
335
336for.exit:
337  ret void
338}
339
340; int recurrence_2(int *a, int n) {
341;   int minmax;
342;   for (int i = 0; i < n; ++i)
343;     minmax = min(minmax, max(a[i] - a[i-1], 0));
344;   return minmax;
345; }
346;
347;
348;
349define i32 @recurrence_2(i32* nocapture readonly %a, i32 %n) {
350; CHECK-LABEL: @recurrence_2(
351; CHECK-NEXT:  entry:
352; CHECK-NEXT:    [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0
353; CHECK-NEXT:    br i1 [[CMP27]], label [[FOR_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
354; CHECK:       for.preheader:
355; CHECK-NEXT:    [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 -1
356; CHECK-NEXT:    [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4
357; CHECK-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
358; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
359; CHECK-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
360; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
361; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
362; CHECK:       vector.ph:
363; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
364; CHECK-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i64 3
365; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
366; CHECK:       vector.body:
367; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
368; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
369; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
370; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDEX]]
371; CHECK-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
372; CHECK-NEXT:    [[WIDE_LOAD]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
373; CHECK-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
374; CHECK-NEXT:    [[TMP6:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD]], [[TMP5]]
375; CHECK-NEXT:    [[TMP7:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP6]], <4 x i32> zeroinitializer)
376; CHECK-NEXT:    [[TMP8]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP7]])
377; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
378; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
379; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
380; CHECK:       middle.block:
381; CHECK-NEXT:    [[TMP10:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP8]])
382; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
383; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i64 3
384; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
385; CHECK:       scalar.ph:
386; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
387; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[FOR_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
388; CHECK-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
389; CHECK-NEXT:    br label [[SCALAR_BODY:%.*]]
390; CHECK:       for.cond.cleanup.loopexit:
391; CHECK-NEXT:    [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[TMP13:%.*]], [[SCALAR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ]
392; CHECK-NEXT:    br label [[FOR_COND_CLEANUP]]
393; CHECK:       for.cond.cleanup:
394; CHECK-NEXT:    [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
395; CHECK-NEXT:    ret i32 [[MINMAX_0_LCSSA]]
396; CHECK:       scalar.body:
397; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP11:%.*]], [[SCALAR_BODY]] ]
398; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
399; CHECK-NEXT:    [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP13]], [[SCALAR_BODY]] ]
400; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
401; CHECK-NEXT:    [[TMP11]] = load i32, i32* [[ARRAYIDX]], align 4
402; CHECK-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[TMP11]], [[SCALAR_RECUR]]
403; CHECK-NEXT:    [[TMP12:%.*]] = call i32 @llvm.smax.i32(i32 [[SUB3]], i32 0)
404; CHECK-NEXT:    [[TMP13]] = call i32 @llvm.smin.i32(i32 [[MINMAX_028]], i32 [[TMP12]])
405; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
406; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
407; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
408; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
409;
410; UNROLL-LABEL: @recurrence_2(
411; UNROLL-NEXT:  entry:
412; UNROLL-NEXT:    [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0
413; UNROLL-NEXT:    br i1 [[CMP27]], label [[FOR_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
414; UNROLL:       for.preheader:
415; UNROLL-NEXT:    [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 -1
416; UNROLL-NEXT:    [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4
417; UNROLL-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
418; UNROLL-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
419; UNROLL-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
420; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7
421; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
422; UNROLL:       vector.ph:
423; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584
424; UNROLL-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i64 3
425; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
426; UNROLL:       vector.body:
427; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
428; UNROLL-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD2:%.*]], [[VECTOR_BODY]] ]
429; UNROLL-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP13:%.*]], [[VECTOR_BODY]] ]
430; UNROLL-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
431; UNROLL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDEX]]
432; UNROLL-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
433; UNROLL-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4
434; UNROLL-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP3]], i64 4
435; UNROLL-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
436; UNROLL-NEXT:    [[WIDE_LOAD2]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
437; UNROLL-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
438; UNROLL-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
439; UNROLL-NEXT:    [[TMP9:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD]], [[TMP7]]
440; UNROLL-NEXT:    [[TMP10:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD2]], [[TMP8]]
441; UNROLL-NEXT:    [[TMP11:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP9]], <4 x i32> zeroinitializer)
442; UNROLL-NEXT:    [[TMP12:%.*]] = call <4 x i32> @llvm.smax.v4i32(<4 x i32> [[TMP10]], <4 x i32> zeroinitializer)
443; UNROLL-NEXT:    [[TMP13]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI]], <4 x i32> [[TMP11]])
444; UNROLL-NEXT:    [[TMP14]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[VEC_PHI1]], <4 x i32> [[TMP12]])
445; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
446; UNROLL-NEXT:    [[TMP15:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
447; UNROLL-NEXT:    br i1 [[TMP15]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
448; UNROLL:       middle.block:
449; UNROLL-NEXT:    [[TMP16:%.*]] = call <4 x i32> @llvm.smin.v4i32(<4 x i32> [[TMP13]], <4 x i32> [[TMP14]])
450; UNROLL-NEXT:    [[TMP17:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP16]])
451; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
452; UNROLL-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i64 3
453; UNROLL-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
454; UNROLL:       scalar.ph:
455; UNROLL-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
456; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[FOR_PREHEADER]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
457; UNROLL-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ]
458; UNROLL-NEXT:    br label [[SCALAR_BODY:%.*]]
459; UNROLL:       for.cond.cleanup.loopexit:
460; UNROLL-NEXT:    [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[TMP20:%.*]], [[SCALAR_BODY]] ], [ [[TMP17]], [[MIDDLE_BLOCK]] ]
461; UNROLL-NEXT:    br label [[FOR_COND_CLEANUP]]
462; UNROLL:       for.cond.cleanup:
463; UNROLL-NEXT:    [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
464; UNROLL-NEXT:    ret i32 [[MINMAX_0_LCSSA]]
465; UNROLL:       scalar.body:
466; UNROLL-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[SCALAR_BODY]] ]
467; UNROLL-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
468; UNROLL-NEXT:    [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[TMP20]], [[SCALAR_BODY]] ]
469; UNROLL-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
470; UNROLL-NEXT:    [[TMP18]] = load i32, i32* [[ARRAYIDX]], align 4
471; UNROLL-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[TMP18]], [[SCALAR_RECUR]]
472; UNROLL-NEXT:    [[TMP19:%.*]] = call i32 @llvm.smax.i32(i32 [[SUB3]], i32 0)
473; UNROLL-NEXT:    [[TMP20]] = call i32 @llvm.smin.i32(i32 [[MINMAX_028]], i32 [[TMP19]])
474; UNROLL-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
475; UNROLL-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
476; UNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
477; UNROLL-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
478;
479; UNROLL-NO-IC-LABEL: @recurrence_2(
480; UNROLL-NO-IC-NEXT:  entry:
481; UNROLL-NO-IC-NEXT:    [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0
482; UNROLL-NO-IC-NEXT:    br i1 [[CMP27]], label [[FOR_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
483; UNROLL-NO-IC:       for.preheader:
484; UNROLL-NO-IC-NEXT:    [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 -1
485; UNROLL-NO-IC-NEXT:    [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4
486; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
487; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
488; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
489; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8
490; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
491; UNROLL-NO-IC:       vector.ph:
492; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8
493; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
494; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
495; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
496; UNROLL-NO-IC:       vector.body:
497; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
498; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD2:%.*]], [[VECTOR_BODY]] ]
499; UNROLL-NO-IC-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ]
500; UNROLL-NO-IC-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
501; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
502; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
503; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]]
504; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
505; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 0
506; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>*
507; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4
508; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP5]], i32 4
509; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
510; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD2]] = load <4 x i32>, <4 x i32>* [[TMP10]], align 4
511; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
512; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i32> [[WIDE_LOAD]], <4 x i32> [[WIDE_LOAD2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
513; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD]], [[TMP11]]
514; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD2]], [[TMP12]]
515; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = icmp sgt <4 x i32> [[TMP13]], zeroinitializer
516; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = icmp sgt <4 x i32> [[TMP14]], zeroinitializer
517; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = select <4 x i1> [[TMP15]], <4 x i32> [[TMP13]], <4 x i32> zeroinitializer
518; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = select <4 x i1> [[TMP16]], <4 x i32> [[TMP14]], <4 x i32> zeroinitializer
519; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[TMP17]]
520; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = icmp slt <4 x i32> [[VEC_PHI1]], [[TMP18]]
521; UNROLL-NO-IC-NEXT:    [[TMP21]] = select <4 x i1> [[TMP19]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP17]]
522; UNROLL-NO-IC-NEXT:    [[TMP22]] = select <4 x i1> [[TMP20]], <4 x i32> [[VEC_PHI1]], <4 x i32> [[TMP18]]
523; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
524; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
525; UNROLL-NO-IC-NEXT:    br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
526; UNROLL-NO-IC:       middle.block:
527; UNROLL-NO-IC-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP21]], [[TMP22]]
528; UNROLL-NO-IC-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP21]], <4 x i32> [[TMP22]]
529; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[RDX_MINMAX_SELECT]])
530; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
531; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i32 3
532; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[WIDE_LOAD2]], i32 2
533; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
534; UNROLL-NO-IC:       scalar.ph:
535; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
536; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
537; UNROLL-NO-IC-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[TMP24]], [[MIDDLE_BLOCK]] ]
538; UNROLL-NO-IC-NEXT:    br label [[SCALAR_BODY:%.*]]
539; UNROLL-NO-IC:       for.cond.cleanup.loopexit:
540; UNROLL-NO-IC-NEXT:    [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[TMP24]], [[MIDDLE_BLOCK]] ]
541; UNROLL-NO-IC-NEXT:    br label [[FOR_COND_CLEANUP]]
542; UNROLL-NO-IC:       for.cond.cleanup:
543; UNROLL-NO-IC-NEXT:    [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
544; UNROLL-NO-IC-NEXT:    ret i32 [[MINMAX_0_LCSSA]]
545; UNROLL-NO-IC:       scalar.body:
546; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP25:%.*]], [[SCALAR_BODY]] ]
547; UNROLL-NO-IC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
548; UNROLL-NO-IC-NEXT:    [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MINMAX_0_COND]], [[SCALAR_BODY]] ]
549; UNROLL-NO-IC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
550; UNROLL-NO-IC-NEXT:    [[TMP25]] = load i32, i32* [[ARRAYIDX]], align 4
551; UNROLL-NO-IC-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[TMP25]], [[SCALAR_RECUR]]
552; UNROLL-NO-IC-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[SUB3]], 0
553; UNROLL-NO-IC-NEXT:    [[COND:%.*]] = select i1 [[CMP4]], i32 [[SUB3]], i32 0
554; UNROLL-NO-IC-NEXT:    [[CMP5:%.*]] = icmp slt i32 [[MINMAX_028]], [[COND]]
555; UNROLL-NO-IC-NEXT:    [[MINMAX_0_COND]] = select i1 [[CMP5]], i32 [[MINMAX_028]], i32 [[COND]]
556; UNROLL-NO-IC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
557; UNROLL-NO-IC-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
558; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
559; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
560;
561; UNROLL-NO-VF-LABEL: @recurrence_2(
562; UNROLL-NO-VF-NEXT:  entry:
563; UNROLL-NO-VF-NEXT:    [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0
564; UNROLL-NO-VF-NEXT:    br i1 [[CMP27]], label [[FOR_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
565; UNROLL-NO-VF:       for.preheader:
566; UNROLL-NO-VF-NEXT:    [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 -1
567; UNROLL-NO-VF-NEXT:    [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4
568; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
569; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
570; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
571; UNROLL-NO-VF-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2
572; UNROLL-NO-VF-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
573; UNROLL-NO-VF:       vector.ph:
574; UNROLL-NO-VF-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2
575; UNROLL-NO-VF-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
576; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
577; UNROLL-NO-VF:       vector.body:
578; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
579; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
580; UNROLL-NO-VF-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ poison, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
581; UNROLL-NO-VF-NEXT:    [[VEC_PHI1:%.*]] = phi i32 [ poison, [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
582; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
583; UNROLL-NO-VF-NEXT:    [[INDUCTION2:%.*]] = add i64 [[INDEX]], 1
584; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDUCTION]]
585; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDUCTION2]]
586; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = load i32, i32* [[TMP3]], align 4
587; UNROLL-NO-VF-NEXT:    [[TMP6]] = load i32, i32* [[TMP4]], align 4
588; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = sub nsw i32 [[TMP5]], [[VECTOR_RECUR]]
589; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = sub nsw i32 [[TMP6]], [[TMP5]]
590; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = icmp sgt i32 [[TMP7]], 0
591; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = icmp sgt i32 [[TMP8]], 0
592; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = select i1 [[TMP9]], i32 [[TMP7]], i32 0
593; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = select i1 [[TMP10]], i32 [[TMP8]], i32 0
594; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = icmp slt i32 [[VEC_PHI]], [[TMP11]]
595; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = icmp slt i32 [[VEC_PHI1]], [[TMP12]]
596; UNROLL-NO-VF-NEXT:    [[TMP15]] = select i1 [[TMP13]], i32 [[VEC_PHI]], i32 [[TMP11]]
597; UNROLL-NO-VF-NEXT:    [[TMP16]] = select i1 [[TMP14]], i32 [[VEC_PHI1]], i32 [[TMP12]]
598; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
599; UNROLL-NO-VF-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
600; UNROLL-NO-VF-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
601; UNROLL-NO-VF:       middle.block:
602; UNROLL-NO-VF-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp slt i32 [[TMP15]], [[TMP16]]
603; UNROLL-NO-VF-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select i1 [[RDX_MINMAX_CMP]], i32 [[TMP15]], i32 [[TMP16]]
604; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
605; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
606; UNROLL-NO-VF:       scalar.ph:
607; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
608; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
609; UNROLL-NO-VF-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ]
610; UNROLL-NO-VF-NEXT:    br label [[SCALAR_BODY:%.*]]
611; UNROLL-NO-VF:       for.cond.cleanup.loopexit:
612; UNROLL-NO-VF-NEXT:    [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ]
613; UNROLL-NO-VF-NEXT:    br label [[FOR_COND_CLEANUP]]
614; UNROLL-NO-VF:       for.cond.cleanup:
615; UNROLL-NO-VF-NEXT:    [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
616; UNROLL-NO-VF-NEXT:    ret i32 [[MINMAX_0_LCSSA]]
617; UNROLL-NO-VF:       scalar.body:
618; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[SCALAR_BODY]] ]
619; UNROLL-NO-VF-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
620; UNROLL-NO-VF-NEXT:    [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MINMAX_0_COND]], [[SCALAR_BODY]] ]
621; UNROLL-NO-VF-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
622; UNROLL-NO-VF-NEXT:    [[TMP18]] = load i32, i32* [[ARRAYIDX]], align 4
623; UNROLL-NO-VF-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[TMP18]], [[SCALAR_RECUR]]
624; UNROLL-NO-VF-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[SUB3]], 0
625; UNROLL-NO-VF-NEXT:    [[COND:%.*]] = select i1 [[CMP4]], i32 [[SUB3]], i32 0
626; UNROLL-NO-VF-NEXT:    [[CMP5:%.*]] = icmp slt i32 [[MINMAX_028]], [[COND]]
627; UNROLL-NO-VF-NEXT:    [[MINMAX_0_COND]] = select i1 [[CMP5]], i32 [[MINMAX_028]], i32 [[COND]]
628; UNROLL-NO-VF-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
629; UNROLL-NO-VF-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
630; UNROLL-NO-VF-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
631; UNROLL-NO-VF-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
632;
633; SINK-AFTER-LABEL: @recurrence_2(
634; SINK-AFTER-NEXT:  entry:
635; SINK-AFTER-NEXT:    [[CMP27:%.*]] = icmp sgt i32 [[N:%.*]], 0
636; SINK-AFTER-NEXT:    br i1 [[CMP27]], label [[FOR_PREHEADER:%.*]], label [[FOR_COND_CLEANUP:%.*]]
637; SINK-AFTER:       for.preheader:
638; SINK-AFTER-NEXT:    [[ARRAYIDX2_PHI_TRANS_INSERT:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 -1
639; SINK-AFTER-NEXT:    [[DOTPRE:%.*]] = load i32, i32* [[ARRAYIDX2_PHI_TRANS_INSERT]], align 4
640; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
641; SINK-AFTER-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
642; SINK-AFTER-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
643; SINK-AFTER-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
644; SINK-AFTER-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
645; SINK-AFTER:       vector.ph:
646; SINK-AFTER-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
647; SINK-AFTER-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
648; SINK-AFTER-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[DOTPRE]], i32 3
649; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
650; SINK-AFTER:       vector.body:
651; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
652; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
653; SINK-AFTER-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ poison, [[VECTOR_PH]] ], [ [[TMP12:%.*]], [[VECTOR_BODY]] ]
654; SINK-AFTER-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 0
655; SINK-AFTER-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP3]]
656; SINK-AFTER-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
657; SINK-AFTER-NEXT:    [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>*
658; SINK-AFTER-NEXT:    [[WIDE_LOAD]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4
659; SINK-AFTER-NEXT:    [[TMP7:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
660; SINK-AFTER-NEXT:    [[TMP8:%.*]] = sub nsw <4 x i32> [[WIDE_LOAD]], [[TMP7]]
661; SINK-AFTER-NEXT:    [[TMP9:%.*]] = icmp sgt <4 x i32> [[TMP8]], zeroinitializer
662; SINK-AFTER-NEXT:    [[TMP10:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP8]], <4 x i32> zeroinitializer
663; SINK-AFTER-NEXT:    [[TMP11:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[TMP10]]
664; SINK-AFTER-NEXT:    [[TMP12]] = select <4 x i1> [[TMP11]], <4 x i32> [[VEC_PHI]], <4 x i32> [[TMP10]]
665; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
666; SINK-AFTER-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
667; SINK-AFTER-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
668; SINK-AFTER:       middle.block:
669; SINK-AFTER-NEXT:    [[TMP14:%.*]] = call i32 @llvm.vector.reduce.smin.v4i32(<4 x i32> [[TMP12]])
670; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
671; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 3
672; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[WIDE_LOAD]], i32 2
673; SINK-AFTER-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP_LOOPEXIT:%.*]], label [[SCALAR_PH]]
674; SINK-AFTER:       scalar.ph:
675; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[DOTPRE]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
676; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_PREHEADER]] ]
677; SINK-AFTER-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ poison, [[FOR_PREHEADER]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
678; SINK-AFTER-NEXT:    br label [[SCALAR_BODY:%.*]]
679; SINK-AFTER:       for.cond.cleanup.loopexit:
680; SINK-AFTER-NEXT:    [[MINMAX_0_COND_LCSSA:%.*]] = phi i32 [ [[MINMAX_0_COND:%.*]], [[SCALAR_BODY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ]
681; SINK-AFTER-NEXT:    br label [[FOR_COND_CLEANUP]]
682; SINK-AFTER:       for.cond.cleanup:
683; SINK-AFTER-NEXT:    [[MINMAX_0_LCSSA:%.*]] = phi i32 [ poison, [[ENTRY:%.*]] ], [ [[MINMAX_0_COND_LCSSA]], [[FOR_COND_CLEANUP_LOOPEXIT]] ]
684; SINK-AFTER-NEXT:    ret i32 [[MINMAX_0_LCSSA]]
685; SINK-AFTER:       scalar.body:
686; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[SCALAR_BODY]] ]
687; SINK-AFTER-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
688; SINK-AFTER-NEXT:    [[MINMAX_028:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[MINMAX_0_COND]], [[SCALAR_BODY]] ]
689; SINK-AFTER-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]]
690; SINK-AFTER-NEXT:    [[TMP15]] = load i32, i32* [[ARRAYIDX]], align 4
691; SINK-AFTER-NEXT:    [[SUB3:%.*]] = sub nsw i32 [[TMP15]], [[SCALAR_RECUR]]
692; SINK-AFTER-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[SUB3]], 0
693; SINK-AFTER-NEXT:    [[COND:%.*]] = select i1 [[CMP4]], i32 [[SUB3]], i32 0
694; SINK-AFTER-NEXT:    [[CMP5:%.*]] = icmp slt i32 [[MINMAX_028]], [[COND]]
695; SINK-AFTER-NEXT:    [[MINMAX_0_COND]] = select i1 [[CMP5]], i32 [[MINMAX_028]], i32 [[COND]]
696; SINK-AFTER-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
697; SINK-AFTER-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
698; SINK-AFTER-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
699; SINK-AFTER-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
700;
701entry:
702  %cmp27 = icmp sgt i32 %n, 0
703  br i1 %cmp27, label %for.preheader, label %for.cond.cleanup
704
705for.preheader:
706  %arrayidx2.phi.trans.insert = getelementptr inbounds i32, i32* %a, i64 -1
707  %.pre = load i32, i32* %arrayidx2.phi.trans.insert, align 4
708  br label %scalar.body
709
710for.cond.cleanup.loopexit:
711  %minmax.0.cond.lcssa = phi i32 [ %minmax.0.cond, %scalar.body ]
712  br label %for.cond.cleanup
713
714for.cond.cleanup:
715  %minmax.0.lcssa = phi i32 [ poison, %entry ], [ %minmax.0.cond.lcssa, %for.cond.cleanup.loopexit ]
716  ret i32 %minmax.0.lcssa
717
718scalar.body:
719  %0 = phi i32 [ %.pre, %for.preheader ], [ %1, %scalar.body ]
720  %indvars.iv = phi i64 [ 0, %for.preheader ], [ %indvars.iv.next, %scalar.body ]
721  %minmax.028 = phi i32 [ poison, %for.preheader ], [ %minmax.0.cond, %scalar.body ]
722  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %indvars.iv
723  %1 = load i32, i32* %arrayidx, align 4
724  %sub3 = sub nsw i32 %1, %0
725  %cmp4 = icmp sgt i32 %sub3, 0
726  %cond = select i1 %cmp4, i32 %sub3, i32 0
727  %cmp5 = icmp slt i32 %minmax.028, %cond
728  %minmax.0.cond = select i1 %cmp5, i32 %minmax.028, i32 %cond
729  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
730  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
731  %exitcond = icmp eq i32 %lftr.wideiv, %n
732  br i1 %exitcond, label %for.cond.cleanup.loopexit, label %scalar.body
733}
734
735; void recurrence_3(short *a, double *b, int n, float f, short p) {
736;   b[0] = (double)a[0] - f * (double)p;
737;   for (int i = 1; i < n; i++)
738;     b[i] = (double)a[i] - f * (double)a[i - 1];
739; }
740;
741; Check also that the casts were not moved needlessly.
742;
743;
744define void @recurrence_3(i16* readonly noalias %a, double* noalias %b, i32 %n, float %f, i16 %p) {
745; CHECK-LABEL: @recurrence_3(
746; CHECK-NEXT:  entry:
747; CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
748; CHECK-NEXT:    [[CONV:%.*]] = sitofp i16 [[TMP0]] to double
749; CHECK-NEXT:    [[CONV1:%.*]] = fpext float [[F:%.*]] to double
750; CHECK-NEXT:    [[CONV2:%.*]] = sitofp i16 [[P:%.*]] to double
751; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[CONV2]], [[CONV1]]
752; CHECK-NEXT:    [[SUB:%.*]] = fsub fast double [[CONV]], [[MUL]]
753; CHECK-NEXT:    store double [[SUB]], double* [[B:%.*]], align 8
754; CHECK-NEXT:    [[CMP25:%.*]] = icmp sgt i32 [[N:%.*]], 1
755; CHECK-NEXT:    br i1 [[CMP25]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]]
756; CHECK:       for.preheader:
757; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[N]], -2
758; CHECK-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
759; CHECK-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
760; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 3
761; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
762; CHECK:       vector.ph:
763; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP3]], 8589934588
764; CHECK-NEXT:    [[IND_END:%.*]] = or i64 [[N_VEC]], 1
765; CHECK-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 3
766; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i64 0
767; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
768; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
769; CHECK:       vector.body:
770; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
771; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
772; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = or i64 [[INDEX]], 1
773; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[OFFSET_IDX]]
774; CHECK-NEXT:    [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>*
775; CHECK-NEXT:    [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2
776; CHECK-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
777; CHECK-NEXT:    [[TMP7:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double>
778; CHECK-NEXT:    [[TMP8:%.*]] = sitofp <4 x i16> [[TMP6]] to <4 x double>
779; CHECK-NEXT:    [[TMP9:%.*]] = fmul fast <4 x double> [[BROADCAST_SPLAT]], [[TMP8]]
780; CHECK-NEXT:    [[TMP10:%.*]] = fsub fast <4 x double> [[TMP7]], [[TMP9]]
781; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[OFFSET_IDX]]
782; CHECK-NEXT:    [[TMP12:%.*]] = bitcast double* [[TMP11]] to <4 x double>*
783; CHECK-NEXT:    store <4 x double> [[TMP10]], <4 x double>* [[TMP12]], align 8
784; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
785; CHECK-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
786; CHECK-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
787; CHECK:       middle.block:
788; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
789; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3
790; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
791; CHECK:       scalar.ph:
792; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
793; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1, [[FOR_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
794; CHECK-NEXT:    br label [[SCALAR_BODY:%.*]]
795; CHECK:       scalar.body:
796; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP14:%.*]], [[SCALAR_BODY]] ]
797; CHECK-NEXT:    [[ADVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
798; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[ADVARS_IV]]
799; CHECK-NEXT:    [[TMP14]] = load i16, i16* [[ARRAYIDX5]], align 2
800; CHECK-NEXT:    [[CONV6:%.*]] = sitofp i16 [[TMP14]] to double
801; CHECK-NEXT:    [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double
802; CHECK-NEXT:    [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]]
803; CHECK-NEXT:    [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]]
804; CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[ADVARS_IV]]
805; CHECK-NEXT:    store double [[SUB13]], double* [[ARRAYIDX15]], align 8
806; CHECK-NEXT:    [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1
807; CHECK-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32
808; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
809; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
810; CHECK:       for.end.loopexit:
811; CHECK-NEXT:    br label [[FOR_END]]
812; CHECK:       for.end:
813; CHECK-NEXT:    ret void
814;
815; UNROLL-LABEL: @recurrence_3(
816; UNROLL-NEXT:  entry:
817; UNROLL-NEXT:    [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
818; UNROLL-NEXT:    [[CONV:%.*]] = sitofp i16 [[TMP0]] to double
819; UNROLL-NEXT:    [[CONV1:%.*]] = fpext float [[F:%.*]] to double
820; UNROLL-NEXT:    [[CONV2:%.*]] = sitofp i16 [[P:%.*]] to double
821; UNROLL-NEXT:    [[MUL:%.*]] = fmul fast double [[CONV2]], [[CONV1]]
822; UNROLL-NEXT:    [[SUB:%.*]] = fsub fast double [[CONV]], [[MUL]]
823; UNROLL-NEXT:    store double [[SUB]], double* [[B:%.*]], align 8
824; UNROLL-NEXT:    [[CMP25:%.*]] = icmp sgt i32 [[N:%.*]], 1
825; UNROLL-NEXT:    br i1 [[CMP25]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]]
826; UNROLL:       for.preheader:
827; UNROLL-NEXT:    [[TMP1:%.*]] = add i32 [[N]], -2
828; UNROLL-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
829; UNROLL-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
830; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 7
831; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
832; UNROLL:       vector.ph:
833; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP3]], 8589934584
834; UNROLL-NEXT:    [[IND_END:%.*]] = or i64 [[N_VEC]], 1
835; UNROLL-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i64 3
836; UNROLL-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i64 0
837; UNROLL-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
838; UNROLL-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i64 0
839; UNROLL-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT2]], <4 x double> poison, <4 x i32> zeroinitializer
840; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
841; UNROLL:       vector.body:
842; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
843; UNROLL-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ]
844; UNROLL-NEXT:    [[OFFSET_IDX:%.*]] = or i64 [[INDEX]], 1
845; UNROLL-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[OFFSET_IDX]]
846; UNROLL-NEXT:    [[TMP5:%.*]] = bitcast i16* [[TMP4]] to <4 x i16>*
847; UNROLL-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP5]], align 2
848; UNROLL-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i64 4
849; UNROLL-NEXT:    [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <4 x i16>*
850; UNROLL-NEXT:    [[WIDE_LOAD1]] = load <4 x i16>, <4 x i16>* [[TMP7]], align 2
851; UNROLL-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
852; UNROLL-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
853; UNROLL-NEXT:    [[TMP10:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double>
854; UNROLL-NEXT:    [[TMP11:%.*]] = sitofp <4 x i16> [[WIDE_LOAD1]] to <4 x double>
855; UNROLL-NEXT:    [[TMP12:%.*]] = sitofp <4 x i16> [[TMP8]] to <4 x double>
856; UNROLL-NEXT:    [[TMP13:%.*]] = sitofp <4 x i16> [[TMP9]] to <4 x double>
857; UNROLL-NEXT:    [[TMP14:%.*]] = fmul fast <4 x double> [[BROADCAST_SPLAT]], [[TMP12]]
858; UNROLL-NEXT:    [[TMP15:%.*]] = fmul fast <4 x double> [[BROADCAST_SPLAT3]], [[TMP13]]
859; UNROLL-NEXT:    [[TMP16:%.*]] = fsub fast <4 x double> [[TMP10]], [[TMP14]]
860; UNROLL-NEXT:    [[TMP17:%.*]] = fsub fast <4 x double> [[TMP11]], [[TMP15]]
861; UNROLL-NEXT:    [[TMP18:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[OFFSET_IDX]]
862; UNROLL-NEXT:    [[TMP19:%.*]] = bitcast double* [[TMP18]] to <4 x double>*
863; UNROLL-NEXT:    store <4 x double> [[TMP16]], <4 x double>* [[TMP19]], align 8
864; UNROLL-NEXT:    [[TMP20:%.*]] = getelementptr inbounds double, double* [[TMP18]], i64 4
865; UNROLL-NEXT:    [[TMP21:%.*]] = bitcast double* [[TMP20]] to <4 x double>*
866; UNROLL-NEXT:    store <4 x double> [[TMP17]], <4 x double>* [[TMP21]], align 8
867; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
868; UNROLL-NEXT:    [[TMP22:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
869; UNROLL-NEXT:    br i1 [[TMP22]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
870; UNROLL:       middle.block:
871; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
872; UNROLL-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i64 3
873; UNROLL-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
874; UNROLL:       scalar.ph:
875; UNROLL-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
876; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 1, [[FOR_PREHEADER]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ]
877; UNROLL-NEXT:    br label [[SCALAR_BODY:%.*]]
878; UNROLL:       scalar.body:
879; UNROLL-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP23:%.*]], [[SCALAR_BODY]] ]
880; UNROLL-NEXT:    [[ADVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ]
881; UNROLL-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[ADVARS_IV]]
882; UNROLL-NEXT:    [[TMP23]] = load i16, i16* [[ARRAYIDX5]], align 2
883; UNROLL-NEXT:    [[CONV6:%.*]] = sitofp i16 [[TMP23]] to double
884; UNROLL-NEXT:    [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double
885; UNROLL-NEXT:    [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]]
886; UNROLL-NEXT:    [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]]
887; UNROLL-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[ADVARS_IV]]
888; UNROLL-NEXT:    store double [[SUB13]], double* [[ARRAYIDX15]], align 8
889; UNROLL-NEXT:    [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1
890; UNROLL-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32
891; UNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
892; UNROLL-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
893; UNROLL:       for.end.loopexit:
894; UNROLL-NEXT:    br label [[FOR_END]]
895; UNROLL:       for.end:
896; UNROLL-NEXT:    ret void
897;
898; UNROLL-NO-IC-LABEL: @recurrence_3(
899; UNROLL-NO-IC-NEXT:  entry:
900; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
901; UNROLL-NO-IC-NEXT:    [[CONV:%.*]] = sitofp i16 [[TMP0]] to double
902; UNROLL-NO-IC-NEXT:    [[CONV1:%.*]] = fpext float [[F:%.*]] to double
903; UNROLL-NO-IC-NEXT:    [[CONV2:%.*]] = sitofp i16 [[P:%.*]] to double
904; UNROLL-NO-IC-NEXT:    [[MUL:%.*]] = fmul fast double [[CONV2]], [[CONV1]]
905; UNROLL-NO-IC-NEXT:    [[SUB:%.*]] = fsub fast double [[CONV]], [[MUL]]
906; UNROLL-NO-IC-NEXT:    store double [[SUB]], double* [[B:%.*]], align 8
907; UNROLL-NO-IC-NEXT:    [[CMP25:%.*]] = icmp sgt i32 [[N:%.*]], 1
908; UNROLL-NO-IC-NEXT:    br i1 [[CMP25]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]]
909; UNROLL-NO-IC:       for.preheader:
910; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add i32 [[N]], -2
911; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
912; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
913; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 8
914; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
915; UNROLL-NO-IC:       vector.ph:
916; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 8
917; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
918; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = add i64 1, [[N_VEC]]
919; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 3
920; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i32 0
921; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
922; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i32 0
923; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT2]], <4 x double> poison, <4 x i32> zeroinitializer
924; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
925; UNROLL-NO-IC:       vector.body:
926; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
927; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ]
928; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
929; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0
930; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 4
931; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP4]]
932; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP5]]
933; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 0
934; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP8]] to <4 x i16>*
935; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP9]], align 2
936; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i16, i16* [[TMP6]], i32 4
937; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = bitcast i16* [[TMP10]] to <4 x i16>*
938; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD1]] = load <4 x i16>, <4 x i16>* [[TMP11]], align 2
939; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
940; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
941; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double>
942; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = sitofp <4 x i16> [[WIDE_LOAD1]] to <4 x double>
943; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = sitofp <4 x i16> [[TMP12]] to <4 x double>
944; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = sitofp <4 x i16> [[TMP13]] to <4 x double>
945; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = fmul fast <4 x double> [[TMP16]], [[BROADCAST_SPLAT]]
946; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = fmul fast <4 x double> [[TMP17]], [[BROADCAST_SPLAT3]]
947; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = fsub fast <4 x double> [[TMP14]], [[TMP18]]
948; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = fsub fast <4 x double> [[TMP15]], [[TMP19]]
949; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP4]]
950; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP5]]
951; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = getelementptr inbounds double, double* [[TMP22]], i32 0
952; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = bitcast double* [[TMP24]] to <4 x double>*
953; UNROLL-NO-IC-NEXT:    store <4 x double> [[TMP20]], <4 x double>* [[TMP25]], align 8
954; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = getelementptr inbounds double, double* [[TMP22]], i32 4
955; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = bitcast double* [[TMP26]] to <4 x double>*
956; UNROLL-NO-IC-NEXT:    store <4 x double> [[TMP21]], <4 x double>* [[TMP27]], align 8
957; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
958; UNROLL-NO-IC-NEXT:    [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
959; UNROLL-NO-IC-NEXT:    br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
960; UNROLL-NO-IC:       middle.block:
961; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
962; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 3
963; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 2
964; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
965; UNROLL-NO-IC:       scalar.ph:
966; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
967; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ]
968; UNROLL-NO-IC-NEXT:    br label [[SCALAR_BODY:%.*]]
969; UNROLL-NO-IC:       scalar.body:
970; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP29:%.*]], [[SCALAR_BODY]] ]
971; UNROLL-NO-IC-NEXT:    [[ADVARS_IV:%.*]] = phi i64 [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
972; UNROLL-NO-IC-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[ADVARS_IV]]
973; UNROLL-NO-IC-NEXT:    [[TMP29]] = load i16, i16* [[ARRAYIDX5]], align 2
974; UNROLL-NO-IC-NEXT:    [[CONV6:%.*]] = sitofp i16 [[TMP29]] to double
975; UNROLL-NO-IC-NEXT:    [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double
976; UNROLL-NO-IC-NEXT:    [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]]
977; UNROLL-NO-IC-NEXT:    [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]]
978; UNROLL-NO-IC-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[ADVARS_IV]]
979; UNROLL-NO-IC-NEXT:    store double [[SUB13]], double* [[ARRAYIDX15]], align 8
980; UNROLL-NO-IC-NEXT:    [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1
981; UNROLL-NO-IC-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32
982; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
983; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
984; UNROLL-NO-IC:       for.end.loopexit:
985; UNROLL-NO-IC-NEXT:    br label [[FOR_END]]
986; UNROLL-NO-IC:       for.end:
987; UNROLL-NO-IC-NEXT:    ret void
988;
989; UNROLL-NO-VF-LABEL: @recurrence_3(
990; UNROLL-NO-VF-NEXT:  entry:
991; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
992; UNROLL-NO-VF-NEXT:    [[CONV:%.*]] = sitofp i16 [[TMP0]] to double
993; UNROLL-NO-VF-NEXT:    [[CONV1:%.*]] = fpext float [[F:%.*]] to double
994; UNROLL-NO-VF-NEXT:    [[CONV2:%.*]] = sitofp i16 [[P:%.*]] to double
995; UNROLL-NO-VF-NEXT:    [[MUL:%.*]] = fmul fast double [[CONV2]], [[CONV1]]
996; UNROLL-NO-VF-NEXT:    [[SUB:%.*]] = fsub fast double [[CONV]], [[MUL]]
997; UNROLL-NO-VF-NEXT:    store double [[SUB]], double* [[B:%.*]], align 8
998; UNROLL-NO-VF-NEXT:    [[CMP25:%.*]] = icmp sgt i32 [[N:%.*]], 1
999; UNROLL-NO-VF-NEXT:    br i1 [[CMP25]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]]
1000; UNROLL-NO-VF:       for.preheader:
1001; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = add i32 [[N]], -2
1002; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
1003; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
1004; UNROLL-NO-VF-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 2
1005; UNROLL-NO-VF-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1006; UNROLL-NO-VF:       vector.ph:
1007; UNROLL-NO-VF-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 2
1008; UNROLL-NO-VF-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
1009; UNROLL-NO-VF-NEXT:    [[IND_END:%.*]] = add i64 1, [[N_VEC]]
1010; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
1011; UNROLL-NO-VF:       vector.body:
1012; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1013; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i16 [ [[TMP0]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
1014; UNROLL-NO-VF-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
1015; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i64 [[OFFSET_IDX]], 0
1016; UNROLL-NO-VF-NEXT:    [[INDUCTION1:%.*]] = add i64 [[OFFSET_IDX]], 1
1017; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDUCTION]]
1018; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDUCTION1]]
1019; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = load i16, i16* [[TMP4]], align 2
1020; UNROLL-NO-VF-NEXT:    [[TMP7]] = load i16, i16* [[TMP5]], align 2
1021; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = sitofp i16 [[TMP6]] to double
1022; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = sitofp i16 [[TMP7]] to double
1023; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = sitofp i16 [[VECTOR_RECUR]] to double
1024; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = sitofp i16 [[TMP6]] to double
1025; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = fmul fast double [[TMP10]], [[CONV1]]
1026; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = fmul fast double [[TMP11]], [[CONV1]]
1027; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = fsub fast double [[TMP8]], [[TMP12]]
1028; UNROLL-NO-VF-NEXT:    [[TMP15:%.*]] = fsub fast double [[TMP9]], [[TMP13]]
1029; UNROLL-NO-VF-NEXT:    [[TMP16:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDUCTION]]
1030; UNROLL-NO-VF-NEXT:    [[TMP17:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[INDUCTION1]]
1031; UNROLL-NO-VF-NEXT:    store double [[TMP14]], double* [[TMP16]], align 8
1032; UNROLL-NO-VF-NEXT:    store double [[TMP15]], double* [[TMP17]], align 8
1033; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1034; UNROLL-NO-VF-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1035; UNROLL-NO-VF-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
1036; UNROLL-NO-VF:       middle.block:
1037; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
1038; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
1039; UNROLL-NO-VF:       scalar.ph:
1040; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
1041; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ]
1042; UNROLL-NO-VF-NEXT:    br label [[SCALAR_BODY:%.*]]
1043; UNROLL-NO-VF:       scalar.body:
1044; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP19:%.*]], [[SCALAR_BODY]] ]
1045; UNROLL-NO-VF-NEXT:    [[ADVARS_IV:%.*]] = phi i64 [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1046; UNROLL-NO-VF-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[ADVARS_IV]]
1047; UNROLL-NO-VF-NEXT:    [[TMP19]] = load i16, i16* [[ARRAYIDX5]], align 2
1048; UNROLL-NO-VF-NEXT:    [[CONV6:%.*]] = sitofp i16 [[TMP19]] to double
1049; UNROLL-NO-VF-NEXT:    [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double
1050; UNROLL-NO-VF-NEXT:    [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]]
1051; UNROLL-NO-VF-NEXT:    [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]]
1052; UNROLL-NO-VF-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[ADVARS_IV]]
1053; UNROLL-NO-VF-NEXT:    store double [[SUB13]], double* [[ARRAYIDX15]], align 8
1054; UNROLL-NO-VF-NEXT:    [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1
1055; UNROLL-NO-VF-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32
1056; UNROLL-NO-VF-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
1057; UNROLL-NO-VF-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
1058; UNROLL-NO-VF:       for.end.loopexit:
1059; UNROLL-NO-VF-NEXT:    br label [[FOR_END]]
1060; UNROLL-NO-VF:       for.end:
1061; UNROLL-NO-VF-NEXT:    ret void
1062;
1063; SINK-AFTER-LABEL: @recurrence_3(
1064; SINK-AFTER-NEXT:  entry:
1065; SINK-AFTER-NEXT:    [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
1066; SINK-AFTER-NEXT:    [[CONV:%.*]] = sitofp i16 [[TMP0]] to double
1067; SINK-AFTER-NEXT:    [[CONV1:%.*]] = fpext float [[F:%.*]] to double
1068; SINK-AFTER-NEXT:    [[CONV2:%.*]] = sitofp i16 [[P:%.*]] to double
1069; SINK-AFTER-NEXT:    [[MUL:%.*]] = fmul fast double [[CONV2]], [[CONV1]]
1070; SINK-AFTER-NEXT:    [[SUB:%.*]] = fsub fast double [[CONV]], [[MUL]]
1071; SINK-AFTER-NEXT:    store double [[SUB]], double* [[B:%.*]], align 8
1072; SINK-AFTER-NEXT:    [[CMP25:%.*]] = icmp sgt i32 [[N:%.*]], 1
1073; SINK-AFTER-NEXT:    br i1 [[CMP25]], label [[FOR_PREHEADER:%.*]], label [[FOR_END:%.*]]
1074; SINK-AFTER:       for.preheader:
1075; SINK-AFTER-NEXT:    [[TMP1:%.*]] = add i32 [[N]], -2
1076; SINK-AFTER-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
1077; SINK-AFTER-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
1078; SINK-AFTER-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP3]], 4
1079; SINK-AFTER-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1080; SINK-AFTER:       vector.ph:
1081; SINK-AFTER-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP3]], 4
1082; SINK-AFTER-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP3]], [[N_MOD_VF]]
1083; SINK-AFTER-NEXT:    [[IND_END:%.*]] = add i64 1, [[N_VEC]]
1084; SINK-AFTER-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[TMP0]], i32 3
1085; SINK-AFTER-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x double> poison, double [[CONV1]], i32 0
1086; SINK-AFTER-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x double> [[BROADCAST_SPLATINSERT]], <4 x double> poison, <4 x i32> zeroinitializer
1087; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
1088; SINK-AFTER:       vector.body:
1089; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1090; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
1091; SINK-AFTER-NEXT:    [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]]
1092; SINK-AFTER-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 0
1093; SINK-AFTER-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP4]]
1094; SINK-AFTER-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP5]], i32 0
1095; SINK-AFTER-NEXT:    [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <4 x i16>*
1096; SINK-AFTER-NEXT:    [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP7]], align 2
1097; SINK-AFTER-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1098; SINK-AFTER-NEXT:    [[TMP9:%.*]] = sitofp <4 x i16> [[WIDE_LOAD]] to <4 x double>
1099; SINK-AFTER-NEXT:    [[TMP10:%.*]] = sitofp <4 x i16> [[TMP8]] to <4 x double>
1100; SINK-AFTER-NEXT:    [[TMP11:%.*]] = fmul fast <4 x double> [[TMP10]], [[BROADCAST_SPLAT]]
1101; SINK-AFTER-NEXT:    [[TMP12:%.*]] = fsub fast <4 x double> [[TMP9]], [[TMP11]]
1102; SINK-AFTER-NEXT:    [[TMP13:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[TMP4]]
1103; SINK-AFTER-NEXT:    [[TMP14:%.*]] = getelementptr inbounds double, double* [[TMP13]], i32 0
1104; SINK-AFTER-NEXT:    [[TMP15:%.*]] = bitcast double* [[TMP14]] to <4 x double>*
1105; SINK-AFTER-NEXT:    store <4 x double> [[TMP12]], <4 x double>* [[TMP15]], align 8
1106; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1107; SINK-AFTER-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1108; SINK-AFTER-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
1109; SINK-AFTER:       middle.block:
1110; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
1111; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
1112; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
1113; SINK-AFTER-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
1114; SINK-AFTER:       scalar.ph:
1115; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[TMP0]], [[FOR_PREHEADER]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
1116; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1, [[FOR_PREHEADER]] ]
1117; SINK-AFTER-NEXT:    br label [[SCALAR_BODY:%.*]]
1118; SINK-AFTER:       scalar.body:
1119; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP17:%.*]], [[SCALAR_BODY]] ]
1120; SINK-AFTER-NEXT:    [[ADVARS_IV:%.*]] = phi i64 [ [[ADVARS_IV_NEXT:%.*]], [[SCALAR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1121; SINK-AFTER-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[ADVARS_IV]]
1122; SINK-AFTER-NEXT:    [[TMP17]] = load i16, i16* [[ARRAYIDX5]], align 2
1123; SINK-AFTER-NEXT:    [[CONV6:%.*]] = sitofp i16 [[TMP17]] to double
1124; SINK-AFTER-NEXT:    [[CONV11:%.*]] = sitofp i16 [[SCALAR_RECUR]] to double
1125; SINK-AFTER-NEXT:    [[MUL12:%.*]] = fmul fast double [[CONV11]], [[CONV1]]
1126; SINK-AFTER-NEXT:    [[SUB13:%.*]] = fsub fast double [[CONV6]], [[MUL12]]
1127; SINK-AFTER-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds double, double* [[B]], i64 [[ADVARS_IV]]
1128; SINK-AFTER-NEXT:    store double [[SUB13]], double* [[ARRAYIDX15]], align 8
1129; SINK-AFTER-NEXT:    [[ADVARS_IV_NEXT]] = add nuw nsw i64 [[ADVARS_IV]], 1
1130; SINK-AFTER-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[ADVARS_IV_NEXT]] to i32
1131; SINK-AFTER-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
1132; SINK-AFTER-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[SCALAR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
1133; SINK-AFTER:       for.end.loopexit:
1134; SINK-AFTER-NEXT:    br label [[FOR_END]]
1135; SINK-AFTER:       for.end:
1136; SINK-AFTER-NEXT:    ret void
1137;
1138entry:
1139  %0 = load i16, i16* %a, align 2
1140  %conv = sitofp i16 %0 to double
1141  %conv1 = fpext float %f to double
1142  %conv2 = sitofp i16 %p to double
1143  %mul = fmul fast double %conv2, %conv1
1144  %sub = fsub fast double %conv, %mul
1145  store double %sub, double* %b, align 8
1146  %cmp25 = icmp sgt i32 %n, 1
1147  br i1 %cmp25, label %for.preheader, label %for.end
1148
1149for.preheader:
1150  br label %scalar.body
1151
1152scalar.body:
1153  %1 = phi i16 [ %0, %for.preheader ], [ %2, %scalar.body ]
1154  %advars.iv = phi i64 [ %advars.iv.next, %scalar.body ], [ 1, %for.preheader ]
1155  %arrayidx5 = getelementptr inbounds i16, i16* %a, i64 %advars.iv
1156  %2 = load i16, i16* %arrayidx5, align 2
1157  %conv6 = sitofp i16 %2 to double
1158  %conv11 = sitofp i16 %1 to double
1159  %mul12 = fmul fast double %conv11, %conv1
1160  %sub13 = fsub fast double %conv6, %mul12
1161  %arrayidx15 = getelementptr inbounds double, double* %b, i64 %advars.iv
1162  store double %sub13, double* %arrayidx15, align 8
1163  %advars.iv.next = add nuw nsw i64 %advars.iv, 1
1164  %lftr.wideiv = trunc i64 %advars.iv.next to i32
1165  %exitcond = icmp eq i32 %lftr.wideiv, %n
1166  br i1 %exitcond, label %for.end.loopexit, label %scalar.body
1167
1168for.end.loopexit:
1169  br label %for.end
1170
1171for.end:
1172  ret void
1173}
1174
1175; void PR26734(short *a, int *b, int *c, int d, short *e) {
1176;   for (; d != 21; d++) {
1177;     *b &= *c;
1178;     *e = *a - 6;
1179;     *c = *e;
1180;   }
1181; }
1182;
1183;
1184define void @PR26734(i16* %a, i32* %b, i32* %c, i32 %d, i16* %e) {
1185; CHECK-LABEL: @PR26734(
1186; CHECK-NEXT:  entry:
1187; CHECK-NEXT:    [[CMP4:%.*]] = icmp eq i32 [[D:%.*]], 21
1188; CHECK-NEXT:    br i1 [[CMP4]], label [[ENTRY_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH:%.*]]
1189; CHECK:       entry.for.end_crit_edge:
1190; CHECK-NEXT:    br label [[FOR_END:%.*]]
1191; CHECK:       for.body.lr.ph:
1192; CHECK-NEXT:    [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
1193; CHECK-NEXT:    [[SUB:%.*]] = add i16 [[TMP0]], -6
1194; CHECK-NEXT:    [[CONV2:%.*]] = sext i16 [[SUB]] to i32
1195; CHECK-NEXT:    [[C_PROMOTED:%.*]] = load i32, i32* [[C:%.*]], align 4
1196; CHECK-NEXT:    [[B_PROMOTED:%.*]] = load i32, i32* [[B:%.*]], align 4
1197; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
1198; CHECK:       for.body:
1199; CHECK-NEXT:    [[INC7:%.*]] = phi i32 [ [[D]], [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
1200; CHECK-NEXT:    [[AND6:%.*]] = phi i32 [ [[B_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ]
1201; CHECK-NEXT:    [[CONV25:%.*]] = phi i32 [ [[C_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[CONV2]], [[FOR_BODY]] ]
1202; CHECK-NEXT:    [[AND]] = and i32 [[AND6]], [[CONV25]]
1203; CHECK-NEXT:    [[INC]] = add nsw i32 [[INC7]], 1
1204; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[INC]], 21
1205; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
1206; CHECK:       for.cond.for.end_crit_edge:
1207; CHECK-NEXT:    store i32 [[CONV2]], i32* [[C]], align 4
1208; CHECK-NEXT:    store i32 [[AND]], i32* [[B]], align 4
1209; CHECK-NEXT:    store i16 [[SUB]], i16* [[E:%.*]], align 2
1210; CHECK-NEXT:    br label [[FOR_END]]
1211; CHECK:       for.end:
1212; CHECK-NEXT:    ret void
1213;
1214; UNROLL-LABEL: @PR26734(
1215; UNROLL-NEXT:  entry:
1216; UNROLL-NEXT:    [[CMP4:%.*]] = icmp eq i32 [[D:%.*]], 21
1217; UNROLL-NEXT:    br i1 [[CMP4]], label [[ENTRY_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH:%.*]]
1218; UNROLL:       entry.for.end_crit_edge:
1219; UNROLL-NEXT:    br label [[FOR_END:%.*]]
1220; UNROLL:       for.body.lr.ph:
1221; UNROLL-NEXT:    [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
1222; UNROLL-NEXT:    [[SUB:%.*]] = add i16 [[TMP0]], -6
1223; UNROLL-NEXT:    [[CONV2:%.*]] = sext i16 [[SUB]] to i32
1224; UNROLL-NEXT:    [[C_PROMOTED:%.*]] = load i32, i32* [[C:%.*]], align 4
1225; UNROLL-NEXT:    [[B_PROMOTED:%.*]] = load i32, i32* [[B:%.*]], align 4
1226; UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
1227; UNROLL:       for.body:
1228; UNROLL-NEXT:    [[INC7:%.*]] = phi i32 [ [[D]], [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
1229; UNROLL-NEXT:    [[AND6:%.*]] = phi i32 [ [[B_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ]
1230; UNROLL-NEXT:    [[CONV25:%.*]] = phi i32 [ [[C_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[CONV2]], [[FOR_BODY]] ]
1231; UNROLL-NEXT:    [[AND]] = and i32 [[AND6]], [[CONV25]]
1232; UNROLL-NEXT:    [[INC]] = add nsw i32 [[INC7]], 1
1233; UNROLL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[INC]], 21
1234; UNROLL-NEXT:    br i1 [[CMP]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
1235; UNROLL:       for.cond.for.end_crit_edge:
1236; UNROLL-NEXT:    store i32 [[CONV2]], i32* [[C]], align 4
1237; UNROLL-NEXT:    store i32 [[AND]], i32* [[B]], align 4
1238; UNROLL-NEXT:    store i16 [[SUB]], i16* [[E:%.*]], align 2
1239; UNROLL-NEXT:    br label [[FOR_END]]
1240; UNROLL:       for.end:
1241; UNROLL-NEXT:    ret void
1242;
1243; UNROLL-NO-IC-LABEL: @PR26734(
1244; UNROLL-NO-IC-NEXT:  entry:
1245; UNROLL-NO-IC-NEXT:    [[CMP4:%.*]] = icmp eq i32 [[D:%.*]], 21
1246; UNROLL-NO-IC-NEXT:    br i1 [[CMP4]], label [[ENTRY_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH:%.*]]
1247; UNROLL-NO-IC:       entry.for.end_crit_edge:
1248; UNROLL-NO-IC-NEXT:    [[DOTPRE:%.*]] = load i32, i32* [[B:%.*]], align 4
1249; UNROLL-NO-IC-NEXT:    br label [[FOR_END:%.*]]
1250; UNROLL-NO-IC:       for.body.lr.ph:
1251; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
1252; UNROLL-NO-IC-NEXT:    [[SUB:%.*]] = add i16 [[TMP0]], -6
1253; UNROLL-NO-IC-NEXT:    [[CONV2:%.*]] = sext i16 [[SUB]] to i32
1254; UNROLL-NO-IC-NEXT:    [[C_PROMOTED:%.*]] = load i32, i32* [[C:%.*]], align 4
1255; UNROLL-NO-IC-NEXT:    [[B_PROMOTED:%.*]] = load i32, i32* [[B]], align 4
1256; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
1257; UNROLL-NO-IC:       for.body:
1258; UNROLL-NO-IC-NEXT:    [[INC7:%.*]] = phi i32 [ [[D]], [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
1259; UNROLL-NO-IC-NEXT:    [[AND6:%.*]] = phi i32 [ [[B_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ]
1260; UNROLL-NO-IC-NEXT:    [[CONV25:%.*]] = phi i32 [ [[C_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[CONV2]], [[FOR_BODY]] ]
1261; UNROLL-NO-IC-NEXT:    [[AND]] = and i32 [[AND6]], [[CONV25]]
1262; UNROLL-NO-IC-NEXT:    [[INC]] = add nsw i32 [[INC7]], 1
1263; UNROLL-NO-IC-NEXT:    [[CMP:%.*]] = icmp eq i32 [[INC]], 21
1264; UNROLL-NO-IC-NEXT:    br i1 [[CMP]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
1265; UNROLL-NO-IC:       for.cond.for.end_crit_edge:
1266; UNROLL-NO-IC-NEXT:    [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ]
1267; UNROLL-NO-IC-NEXT:    store i32 [[CONV2]], i32* [[C]], align 4
1268; UNROLL-NO-IC-NEXT:    store i32 [[AND_LCSSA]], i32* [[B]], align 4
1269; UNROLL-NO-IC-NEXT:    store i16 [[SUB]], i16* [[E:%.*]], align 2
1270; UNROLL-NO-IC-NEXT:    br label [[FOR_END]]
1271; UNROLL-NO-IC:       for.end:
1272; UNROLL-NO-IC-NEXT:    ret void
1273;
1274; UNROLL-NO-VF-LABEL: @PR26734(
1275; UNROLL-NO-VF-NEXT:  entry:
1276; UNROLL-NO-VF-NEXT:    [[CMP4:%.*]] = icmp eq i32 [[D:%.*]], 21
1277; UNROLL-NO-VF-NEXT:    br i1 [[CMP4]], label [[ENTRY_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH:%.*]]
1278; UNROLL-NO-VF:       entry.for.end_crit_edge:
1279; UNROLL-NO-VF-NEXT:    [[DOTPRE:%.*]] = load i32, i32* [[B:%.*]], align 4
1280; UNROLL-NO-VF-NEXT:    br label [[FOR_END:%.*]]
1281; UNROLL-NO-VF:       for.body.lr.ph:
1282; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
1283; UNROLL-NO-VF-NEXT:    [[SUB:%.*]] = add i16 [[TMP0]], -6
1284; UNROLL-NO-VF-NEXT:    [[CONV2:%.*]] = sext i16 [[SUB]] to i32
1285; UNROLL-NO-VF-NEXT:    [[C_PROMOTED:%.*]] = load i32, i32* [[C:%.*]], align 4
1286; UNROLL-NO-VF-NEXT:    [[B_PROMOTED:%.*]] = load i32, i32* [[B]], align 4
1287; UNROLL-NO-VF-NEXT:    br label [[FOR_BODY:%.*]]
1288; UNROLL-NO-VF:       for.body:
1289; UNROLL-NO-VF-NEXT:    [[INC7:%.*]] = phi i32 [ [[D]], [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
1290; UNROLL-NO-VF-NEXT:    [[AND6:%.*]] = phi i32 [ [[B_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ]
1291; UNROLL-NO-VF-NEXT:    [[CONV25:%.*]] = phi i32 [ [[C_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[CONV2]], [[FOR_BODY]] ]
1292; UNROLL-NO-VF-NEXT:    [[AND]] = and i32 [[AND6]], [[CONV25]]
1293; UNROLL-NO-VF-NEXT:    [[INC]] = add nsw i32 [[INC7]], 1
1294; UNROLL-NO-VF-NEXT:    [[CMP:%.*]] = icmp eq i32 [[INC]], 21
1295; UNROLL-NO-VF-NEXT:    br i1 [[CMP]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
1296; UNROLL-NO-VF:       for.cond.for.end_crit_edge:
1297; UNROLL-NO-VF-NEXT:    [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ]
1298; UNROLL-NO-VF-NEXT:    store i32 [[CONV2]], i32* [[C]], align 4
1299; UNROLL-NO-VF-NEXT:    store i32 [[AND_LCSSA]], i32* [[B]], align 4
1300; UNROLL-NO-VF-NEXT:    store i16 [[SUB]], i16* [[E:%.*]], align 2
1301; UNROLL-NO-VF-NEXT:    br label [[FOR_END]]
1302; UNROLL-NO-VF:       for.end:
1303; UNROLL-NO-VF-NEXT:    ret void
1304;
1305; SINK-AFTER-LABEL: @PR26734(
1306; SINK-AFTER-NEXT:  entry:
1307; SINK-AFTER-NEXT:    [[CMP4:%.*]] = icmp eq i32 [[D:%.*]], 21
1308; SINK-AFTER-NEXT:    br i1 [[CMP4]], label [[ENTRY_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY_LR_PH:%.*]]
1309; SINK-AFTER:       entry.for.end_crit_edge:
1310; SINK-AFTER-NEXT:    [[DOTPRE:%.*]] = load i32, i32* [[B:%.*]], align 4
1311; SINK-AFTER-NEXT:    br label [[FOR_END:%.*]]
1312; SINK-AFTER:       for.body.lr.ph:
1313; SINK-AFTER-NEXT:    [[TMP0:%.*]] = load i16, i16* [[A:%.*]], align 2
1314; SINK-AFTER-NEXT:    [[SUB:%.*]] = add i16 [[TMP0]], -6
1315; SINK-AFTER-NEXT:    [[CONV2:%.*]] = sext i16 [[SUB]] to i32
1316; SINK-AFTER-NEXT:    [[C_PROMOTED:%.*]] = load i32, i32* [[C:%.*]], align 4
1317; SINK-AFTER-NEXT:    [[B_PROMOTED:%.*]] = load i32, i32* [[B]], align 4
1318; SINK-AFTER-NEXT:    br label [[FOR_BODY:%.*]]
1319; SINK-AFTER:       for.body:
1320; SINK-AFTER-NEXT:    [[INC7:%.*]] = phi i32 [ [[D]], [[FOR_BODY_LR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
1321; SINK-AFTER-NEXT:    [[AND6:%.*]] = phi i32 [ [[B_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[AND:%.*]], [[FOR_BODY]] ]
1322; SINK-AFTER-NEXT:    [[CONV25:%.*]] = phi i32 [ [[C_PROMOTED]], [[FOR_BODY_LR_PH]] ], [ [[CONV2]], [[FOR_BODY]] ]
1323; SINK-AFTER-NEXT:    [[AND]] = and i32 [[AND6]], [[CONV25]]
1324; SINK-AFTER-NEXT:    [[INC]] = add nsw i32 [[INC7]], 1
1325; SINK-AFTER-NEXT:    [[CMP:%.*]] = icmp eq i32 [[INC]], 21
1326; SINK-AFTER-NEXT:    br i1 [[CMP]], label [[FOR_COND_FOR_END_CRIT_EDGE:%.*]], label [[FOR_BODY]]
1327; SINK-AFTER:       for.cond.for.end_crit_edge:
1328; SINK-AFTER-NEXT:    [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ]
1329; SINK-AFTER-NEXT:    store i32 [[CONV2]], i32* [[C]], align 4
1330; SINK-AFTER-NEXT:    store i32 [[AND_LCSSA]], i32* [[B]], align 4
1331; SINK-AFTER-NEXT:    store i16 [[SUB]], i16* [[E:%.*]], align 2
1332; SINK-AFTER-NEXT:    br label [[FOR_END]]
1333; SINK-AFTER:       for.end:
1334; SINK-AFTER-NEXT:    ret void
1335;
1336entry:
1337  %cmp4 = icmp eq i32 %d, 21
1338  br i1 %cmp4, label %entry.for.end_crit_edge, label %for.body.lr.ph
1339
1340entry.for.end_crit_edge:
1341  %.pre = load i32, i32* %b, align 4
1342  br label %for.end
1343
1344for.body.lr.ph:
1345  %0 = load i16, i16* %a, align 2
1346  %sub = add i16 %0, -6
1347  %conv2 = sext i16 %sub to i32
1348  %c.promoted = load i32, i32* %c, align 4
1349  %b.promoted = load i32, i32* %b, align 4
1350  br label %for.body
1351
1352for.body:
1353  %inc7 = phi i32 [ %d, %for.body.lr.ph ], [ %inc, %for.body ]
1354  %and6 = phi i32 [ %b.promoted, %for.body.lr.ph ], [ %and, %for.body ]
1355  %conv25 = phi i32 [ %c.promoted, %for.body.lr.ph ], [ %conv2, %for.body ]
1356  %and = and i32 %and6, %conv25
1357  %inc = add nsw i32 %inc7, 1
1358  %cmp = icmp eq i32 %inc, 21
1359  br i1 %cmp, label %for.cond.for.end_crit_edge, label %for.body
1360
1361for.cond.for.end_crit_edge:
1362  %and.lcssa = phi i32 [ %and, %for.body ]
1363  store i32 %conv2, i32* %c, align 4
1364  store i32 %and.lcssa, i32* %b, align 4
1365  store i16 %sub, i16* %e, align 2
1366  br label %for.end
1367
1368for.end:
1369  ret void
1370}
1371
1372; int PR27246() {
1373;   unsigned int e, n;
1374;   for (int i = 1; i < 49; ++i) {
1375;     for (int k = i; k > 1; --k)
1376;       e = k;
1377;     n = e;
1378;   }
1379;   return n;
1380; }
1381;
1382;
1383define i32 @PR27246() {
1384; CHECK-LABEL: @PR27246(
1385; CHECK-NEXT:  entry:
1386; CHECK-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
1387; CHECK:       for.cond1.preheader:
1388; CHECK-NEXT:    [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
1389; CHECK-NEXT:    [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1:%.*]], [[FOR_COND_CLEANUP3]] ]
1390; CHECK-NEXT:    br label [[FOR_COND1:%.*]]
1391; CHECK:       for.cond.cleanup:
1392; CHECK-NEXT:    ret i32 [[E_1]]
1393; CHECK:       for.cond1:
1394; CHECK-NEXT:    [[E_1]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ]
1395; CHECK-NEXT:    [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ]
1396; CHECK-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1
1397; CHECK-NEXT:    [[DEC]] = add nsw i32 [[K_0]], -1
1398; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]]
1399; CHECK:       for.cond.cleanup3:
1400; CHECK-NEXT:    [[INC]] = add nuw nsw i32 [[I_016]], 1
1401; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49
1402; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
1403;
1404; UNROLL-LABEL: @PR27246(
1405; UNROLL-NEXT:  entry:
1406; UNROLL-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
1407; UNROLL:       for.cond1.preheader:
1408; UNROLL-NEXT:    [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
1409; UNROLL-NEXT:    [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1:%.*]], [[FOR_COND_CLEANUP3]] ]
1410; UNROLL-NEXT:    br label [[FOR_COND1:%.*]]
1411; UNROLL:       for.cond.cleanup:
1412; UNROLL-NEXT:    ret i32 [[E_1]]
1413; UNROLL:       for.cond1:
1414; UNROLL-NEXT:    [[E_1]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ]
1415; UNROLL-NEXT:    [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ]
1416; UNROLL-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1
1417; UNROLL-NEXT:    [[DEC]] = add nsw i32 [[K_0]], -1
1418; UNROLL-NEXT:    br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]]
1419; UNROLL:       for.cond.cleanup3:
1420; UNROLL-NEXT:    [[INC]] = add nuw nsw i32 [[I_016]], 1
1421; UNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49
1422; UNROLL-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
1423;
1424; UNROLL-NO-IC-LABEL: @PR27246(
1425; UNROLL-NO-IC-NEXT:  entry:
1426; UNROLL-NO-IC-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
1427; UNROLL-NO-IC:       for.cond1.preheader:
1428; UNROLL-NO-IC-NEXT:    [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
1429; UNROLL-NO-IC-NEXT:    [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1_LCSSA:%.*]], [[FOR_COND_CLEANUP3]] ]
1430; UNROLL-NO-IC-NEXT:    br label [[FOR_COND1:%.*]]
1431; UNROLL-NO-IC:       for.cond.cleanup:
1432; UNROLL-NO-IC-NEXT:    [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ]
1433; UNROLL-NO-IC-NEXT:    ret i32 [[E_1_LCSSA_LCSSA]]
1434; UNROLL-NO-IC:       for.cond1:
1435; UNROLL-NO-IC-NEXT:    [[E_1:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ]
1436; UNROLL-NO-IC-NEXT:    [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ]
1437; UNROLL-NO-IC-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1
1438; UNROLL-NO-IC-NEXT:    [[DEC]] = add nsw i32 [[K_0]], -1
1439; UNROLL-NO-IC-NEXT:    br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]]
1440; UNROLL-NO-IC:       for.cond.cleanup3:
1441; UNROLL-NO-IC-NEXT:    [[E_1_LCSSA]] = phi i32 [ [[E_1]], [[FOR_COND1]] ]
1442; UNROLL-NO-IC-NEXT:    [[INC]] = add nuw nsw i32 [[I_016]], 1
1443; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49
1444; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
1445;
1446; UNROLL-NO-VF-LABEL: @PR27246(
1447; UNROLL-NO-VF-NEXT:  entry:
1448; UNROLL-NO-VF-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
1449; UNROLL-NO-VF:       for.cond1.preheader:
1450; UNROLL-NO-VF-NEXT:    [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
1451; UNROLL-NO-VF-NEXT:    [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1_LCSSA:%.*]], [[FOR_COND_CLEANUP3]] ]
1452; UNROLL-NO-VF-NEXT:    br label [[FOR_COND1:%.*]]
1453; UNROLL-NO-VF:       for.cond.cleanup:
1454; UNROLL-NO-VF-NEXT:    [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ]
1455; UNROLL-NO-VF-NEXT:    ret i32 [[E_1_LCSSA_LCSSA]]
1456; UNROLL-NO-VF:       for.cond1:
1457; UNROLL-NO-VF-NEXT:    [[E_1:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ]
1458; UNROLL-NO-VF-NEXT:    [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ]
1459; UNROLL-NO-VF-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1
1460; UNROLL-NO-VF-NEXT:    [[DEC]] = add nsw i32 [[K_0]], -1
1461; UNROLL-NO-VF-NEXT:    br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]]
1462; UNROLL-NO-VF:       for.cond.cleanup3:
1463; UNROLL-NO-VF-NEXT:    [[E_1_LCSSA]] = phi i32 [ [[E_1]], [[FOR_COND1]] ]
1464; UNROLL-NO-VF-NEXT:    [[INC]] = add nuw nsw i32 [[I_016]], 1
1465; UNROLL-NO-VF-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49
1466; UNROLL-NO-VF-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
1467;
1468; SINK-AFTER-LABEL: @PR27246(
1469; SINK-AFTER-NEXT:  entry:
1470; SINK-AFTER-NEXT:    br label [[FOR_COND1_PREHEADER:%.*]]
1471; SINK-AFTER:       for.cond1.preheader:
1472; SINK-AFTER-NEXT:    [[I_016:%.*]] = phi i32 [ 1, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_COND_CLEANUP3:%.*]] ]
1473; SINK-AFTER-NEXT:    [[E_015:%.*]] = phi i32 [ poison, [[ENTRY]] ], [ [[E_1_LCSSA:%.*]], [[FOR_COND_CLEANUP3]] ]
1474; SINK-AFTER-NEXT:    br label [[FOR_COND1:%.*]]
1475; SINK-AFTER:       for.cond.cleanup:
1476; SINK-AFTER-NEXT:    [[E_1_LCSSA_LCSSA:%.*]] = phi i32 [ [[E_1_LCSSA]], [[FOR_COND_CLEANUP3]] ]
1477; SINK-AFTER-NEXT:    ret i32 [[E_1_LCSSA_LCSSA]]
1478; SINK-AFTER:       for.cond1:
1479; SINK-AFTER-NEXT:    [[E_1:%.*]] = phi i32 [ [[K_0:%.*]], [[FOR_COND1]] ], [ [[E_015]], [[FOR_COND1_PREHEADER]] ]
1480; SINK-AFTER-NEXT:    [[K_0]] = phi i32 [ [[DEC:%.*]], [[FOR_COND1]] ], [ [[I_016]], [[FOR_COND1_PREHEADER]] ]
1481; SINK-AFTER-NEXT:    [[CMP2:%.*]] = icmp sgt i32 [[K_0]], 1
1482; SINK-AFTER-NEXT:    [[DEC]] = add nsw i32 [[K_0]], -1
1483; SINK-AFTER-NEXT:    br i1 [[CMP2]], label [[FOR_COND1]], label [[FOR_COND_CLEANUP3]]
1484; SINK-AFTER:       for.cond.cleanup3:
1485; SINK-AFTER-NEXT:    [[E_1_LCSSA]] = phi i32 [ [[E_1]], [[FOR_COND1]] ]
1486; SINK-AFTER-NEXT:    [[INC]] = add nuw nsw i32 [[I_016]], 1
1487; SINK-AFTER-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC]], 49
1488; SINK-AFTER-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP:%.*]], label [[FOR_COND1_PREHEADER]]
1489;
1490entry:
1491  br label %for.cond1.preheader
1492
1493for.cond1.preheader:
1494  %i.016 = phi i32 [ 1, %entry ], [ %inc, %for.cond.cleanup3 ]
1495  %e.015 = phi i32 [ poison, %entry ], [ %e.1.lcssa, %for.cond.cleanup3 ]
1496  br label %for.cond1
1497
1498for.cond.cleanup:
1499  %e.1.lcssa.lcssa = phi i32 [ %e.1.lcssa, %for.cond.cleanup3 ]
1500  ret i32 %e.1.lcssa.lcssa
1501
1502for.cond1:
1503  %e.1 = phi i32 [ %k.0, %for.cond1 ], [ %e.015, %for.cond1.preheader ]
1504  %k.0 = phi i32 [ %dec, %for.cond1 ], [ %i.016, %for.cond1.preheader ]
1505  %cmp2 = icmp sgt i32 %k.0, 1
1506  %dec = add nsw i32 %k.0, -1
1507  br i1 %cmp2, label %for.cond1, label %for.cond.cleanup3
1508
1509for.cond.cleanup3:
1510  %e.1.lcssa = phi i32 [ %e.1, %for.cond1 ]
1511  %inc = add nuw nsw i32 %i.016, 1
1512  %exitcond = icmp eq i32 %inc, 49
1513  br i1 %exitcond, label %for.cond.cleanup, label %for.cond1.preheader
1514}
1515
1516;
1517define void @PR30183(i32 %pre_load, i32* %a, i32* %b, i64 %n) {
1518; CHECK-LABEL: @PR30183(
1519; CHECK-NEXT:  entry:
1520; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[N:%.*]], -2
1521; CHECK-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
1522; CHECK-NEXT:    [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
1523; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 6
1524; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1525; CHECK:       vector.ph:
1526; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], -4
1527; CHECK-NEXT:    [[IND_END:%.*]] = shl i64 [[N_VEC]], 1
1528; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1529; CHECK:       vector.body:
1530; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1531; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1532; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1533; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
1534; CHECK:       middle.block:
1535; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1536; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1537; CHECK:       scalar.ph:
1538; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1539; CHECK-NEXT:    br label [[SCALAR_BODY:%.*]]
1540; CHECK:       scalar.body:
1541; CHECK-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
1542; CHECK-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 2
1543; CHECK-NEXT:    [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
1544; CHECK-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
1545; CHECK:       for.end:
1546; CHECK-NEXT:    ret void
1547;
1548; UNROLL-LABEL: @PR30183(
1549; UNROLL-NEXT:  entry:
1550; UNROLL-NEXT:    [[TMP0:%.*]] = add i64 [[N:%.*]], -2
1551; UNROLL-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
1552; UNROLL-NEXT:    [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
1553; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP0]], 14
1554; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1555; UNROLL:       vector.ph:
1556; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], -8
1557; UNROLL-NEXT:    [[IND_END:%.*]] = shl i64 [[N_VEC]], 1
1558; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
1559; UNROLL:       vector.body:
1560; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1561; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1562; UNROLL-NEXT:    [[TMP3:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1563; UNROLL-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
1564; UNROLL:       middle.block:
1565; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1566; UNROLL-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1567; UNROLL:       scalar.ph:
1568; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1569; UNROLL-NEXT:    br label [[SCALAR_BODY:%.*]]
1570; UNROLL:       scalar.body:
1571; UNROLL-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
1572; UNROLL-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 2
1573; UNROLL-NEXT:    [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
1574; UNROLL-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
1575; UNROLL:       for.end:
1576; UNROLL-NEXT:    ret void
1577;
1578; UNROLL-NO-IC-LABEL: @PR30183(
1579; UNROLL-NO-IC-NEXT:  entry:
1580; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i64 [[N:%.*]], -2
1581; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
1582; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
1583; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 8
1584; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1585; UNROLL-NO-IC:       vector.ph:
1586; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 8
1587; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
1588; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = mul i64 [[N_VEC]], 2
1589; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD:%.*]], i32 3
1590; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
1591; UNROLL-NO-IC:       vector.body:
1592; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1593; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP42:%.*]], [[VECTOR_BODY]] ]
1594; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
1595; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
1596; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 2
1597; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 4
1598; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 6
1599; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = add i64 [[OFFSET_IDX]], 8
1600; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = add i64 [[OFFSET_IDX]], 10
1601; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = add i64 [[OFFSET_IDX]], 12
1602; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = add i64 [[OFFSET_IDX]], 14
1603; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = add nuw nsw i64 [[TMP3]], 2
1604; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = add nuw nsw i64 [[TMP4]], 2
1605; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = add nuw nsw i64 [[TMP5]], 2
1606; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = add nuw nsw i64 [[TMP6]], 2
1607; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = add nuw nsw i64 [[TMP7]], 2
1608; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = add nuw nsw i64 [[TMP8]], 2
1609; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = add nuw nsw i64 [[TMP9]], 2
1610; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = add nuw nsw i64 [[TMP10]], 2
1611; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP11]]
1612; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP12]]
1613; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP13]]
1614; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP14]]
1615; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP15]]
1616; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP16]]
1617; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP17]]
1618; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP18]]
1619; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = load i32, i32* [[TMP19]], align 4
1620; UNROLL-NO-IC-NEXT:    [[TMP28:%.*]] = load i32, i32* [[TMP20]], align 4
1621; UNROLL-NO-IC-NEXT:    [[TMP29:%.*]] = load i32, i32* [[TMP21]], align 4
1622; UNROLL-NO-IC-NEXT:    [[TMP30:%.*]] = load i32, i32* [[TMP22]], align 4
1623; UNROLL-NO-IC-NEXT:    [[TMP31:%.*]] = insertelement <4 x i32> poison, i32 [[TMP27]], i32 0
1624; UNROLL-NO-IC-NEXT:    [[TMP32:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP28]], i32 1
1625; UNROLL-NO-IC-NEXT:    [[TMP33:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP29]], i32 2
1626; UNROLL-NO-IC-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP30]], i32 3
1627; UNROLL-NO-IC-NEXT:    [[TMP35:%.*]] = load i32, i32* [[TMP23]], align 4
1628; UNROLL-NO-IC-NEXT:    [[TMP36:%.*]] = load i32, i32* [[TMP24]], align 4
1629; UNROLL-NO-IC-NEXT:    [[TMP37:%.*]] = load i32, i32* [[TMP25]], align 4
1630; UNROLL-NO-IC-NEXT:    [[TMP38:%.*]] = load i32, i32* [[TMP26]], align 4
1631; UNROLL-NO-IC-NEXT:    [[TMP39:%.*]] = insertelement <4 x i32> poison, i32 [[TMP35]], i32 0
1632; UNROLL-NO-IC-NEXT:    [[TMP40:%.*]] = insertelement <4 x i32> [[TMP39]], i32 [[TMP36]], i32 1
1633; UNROLL-NO-IC-NEXT:    [[TMP41:%.*]] = insertelement <4 x i32> [[TMP40]], i32 [[TMP37]], i32 2
1634; UNROLL-NO-IC-NEXT:    [[TMP42]] = insertelement <4 x i32> [[TMP41]], i32 [[TMP38]], i32 3
1635; UNROLL-NO-IC-NEXT:    [[TMP43:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP34]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1636; UNROLL-NO-IC-NEXT:    [[TMP44:%.*]] = shufflevector <4 x i32> [[TMP34]], <4 x i32> [[TMP42]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1637; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1638; UNROLL-NO-IC-NEXT:    [[TMP45:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1639; UNROLL-NO-IC-NEXT:    br i1 [[TMP45]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
1640; UNROLL-NO-IC:       middle.block:
1641; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1642; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP42]], i32 3
1643; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP42]], i32 2
1644; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1645; UNROLL-NO-IC:       scalar.ph:
1646; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
1647; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1648; UNROLL-NO-IC-NEXT:    br label [[SCALAR_BODY:%.*]]
1649; UNROLL-NO-IC:       scalar.body:
1650; UNROLL-NO-IC-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
1651; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR2:%.*]], [[SCALAR_BODY]] ]
1652; UNROLL-NO-IC-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 2
1653; UNROLL-NO-IC-NEXT:    [[VAR1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_NEXT]]
1654; UNROLL-NO-IC-NEXT:    [[VAR2]] = load i32, i32* [[VAR1]], align 4
1655; UNROLL-NO-IC-NEXT:    [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
1656; UNROLL-NO-IC-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
1657; UNROLL-NO-IC:       for.end:
1658; UNROLL-NO-IC-NEXT:    ret void
1659;
1660; UNROLL-NO-VF-LABEL: @PR30183(
1661; UNROLL-NO-VF-NEXT:  entry:
1662; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i64 [[N:%.*]], -2
1663; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
1664; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
1665; UNROLL-NO-VF-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 2
1666; UNROLL-NO-VF-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1667; UNROLL-NO-VF:       vector.ph:
1668; UNROLL-NO-VF-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 2
1669; UNROLL-NO-VF-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
1670; UNROLL-NO-VF-NEXT:    [[IND_END:%.*]] = mul i64 [[N_VEC]], 2
1671; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
1672; UNROLL-NO-VF:       vector.body:
1673; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1674; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ [[PRE_LOAD:%.*]], [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ]
1675; UNROLL-NO-VF-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
1676; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i64 [[OFFSET_IDX]], 0
1677; UNROLL-NO-VF-NEXT:    [[INDUCTION1:%.*]] = add i64 [[OFFSET_IDX]], 2
1678; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[INDUCTION]], 2
1679; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = add nuw nsw i64 [[INDUCTION1]], 2
1680; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP3]]
1681; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]]
1682; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = load i32, i32* [[TMP5]], align 4
1683; UNROLL-NO-VF-NEXT:    [[TMP8]] = load i32, i32* [[TMP6]], align 4
1684; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1685; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1686; UNROLL-NO-VF-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
1687; UNROLL-NO-VF:       middle.block:
1688; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1689; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1690; UNROLL-NO-VF:       scalar.ph:
1691; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[ENTRY:%.*]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ]
1692; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1693; UNROLL-NO-VF-NEXT:    br label [[SCALAR_BODY:%.*]]
1694; UNROLL-NO-VF:       scalar.body:
1695; UNROLL-NO-VF-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
1696; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR2:%.*]], [[SCALAR_BODY]] ]
1697; UNROLL-NO-VF-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 2
1698; UNROLL-NO-VF-NEXT:    [[VAR1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_NEXT]]
1699; UNROLL-NO-VF-NEXT:    [[VAR2]] = load i32, i32* [[VAR1]], align 4
1700; UNROLL-NO-VF-NEXT:    [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
1701; UNROLL-NO-VF-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
1702; UNROLL-NO-VF:       for.end:
1703; UNROLL-NO-VF-NEXT:    ret void
1704;
1705; SINK-AFTER-LABEL: @PR30183(
1706; SINK-AFTER-NEXT:  entry:
1707; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i64 [[N:%.*]], -2
1708; SINK-AFTER-NEXT:    [[TMP1:%.*]] = lshr i64 [[TMP0]], 1
1709; SINK-AFTER-NEXT:    [[TMP2:%.*]] = add nuw i64 [[TMP1]], 1
1710; SINK-AFTER-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[TMP2]], 4
1711; SINK-AFTER-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1712; SINK-AFTER:       vector.ph:
1713; SINK-AFTER-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[TMP2]], 4
1714; SINK-AFTER-NEXT:    [[N_VEC:%.*]] = sub i64 [[TMP2]], [[N_MOD_VF]]
1715; SINK-AFTER-NEXT:    [[IND_END:%.*]] = mul i64 [[N_VEC]], 2
1716; SINK-AFTER-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i32> poison, i32 [[PRE_LOAD:%.*]], i32 3
1717; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
1718; SINK-AFTER:       vector.body:
1719; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1720; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[VECTOR_BODY]] ]
1721; SINK-AFTER-NEXT:    [[OFFSET_IDX:%.*]] = mul i64 [[INDEX]], 2
1722; SINK-AFTER-NEXT:    [[TMP3:%.*]] = add i64 [[OFFSET_IDX]], 0
1723; SINK-AFTER-NEXT:    [[TMP4:%.*]] = add i64 [[OFFSET_IDX]], 2
1724; SINK-AFTER-NEXT:    [[TMP5:%.*]] = add i64 [[OFFSET_IDX]], 4
1725; SINK-AFTER-NEXT:    [[TMP6:%.*]] = add i64 [[OFFSET_IDX]], 6
1726; SINK-AFTER-NEXT:    [[TMP7:%.*]] = add nuw nsw i64 [[TMP3]], 2
1727; SINK-AFTER-NEXT:    [[TMP8:%.*]] = add nuw nsw i64 [[TMP4]], 2
1728; SINK-AFTER-NEXT:    [[TMP9:%.*]] = add nuw nsw i64 [[TMP5]], 2
1729; SINK-AFTER-NEXT:    [[TMP10:%.*]] = add nuw nsw i64 [[TMP6]], 2
1730; SINK-AFTER-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[TMP7]]
1731; SINK-AFTER-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP8]]
1732; SINK-AFTER-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP9]]
1733; SINK-AFTER-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP10]]
1734; SINK-AFTER-NEXT:    [[TMP15:%.*]] = load i32, i32* [[TMP11]], align 4
1735; SINK-AFTER-NEXT:    [[TMP16:%.*]] = load i32, i32* [[TMP12]], align 4
1736; SINK-AFTER-NEXT:    [[TMP17:%.*]] = load i32, i32* [[TMP13]], align 4
1737; SINK-AFTER-NEXT:    [[TMP18:%.*]] = load i32, i32* [[TMP14]], align 4
1738; SINK-AFTER-NEXT:    [[TMP19:%.*]] = insertelement <4 x i32> poison, i32 [[TMP15]], i32 0
1739; SINK-AFTER-NEXT:    [[TMP20:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP16]], i32 1
1740; SINK-AFTER-NEXT:    [[TMP21:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP17]], i32 2
1741; SINK-AFTER-NEXT:    [[TMP22]] = insertelement <4 x i32> [[TMP21]], i32 [[TMP18]], i32 3
1742; SINK-AFTER-NEXT:    [[TMP23:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP22]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1743; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1744; SINK-AFTER-NEXT:    [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1745; SINK-AFTER-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
1746; SINK-AFTER:       middle.block:
1747; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1748; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP22]], i32 3
1749; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP22]], i32 2
1750; SINK-AFTER-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1751; SINK-AFTER:       scalar.ph:
1752; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ [[PRE_LOAD]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
1753; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1754; SINK-AFTER-NEXT:    br label [[SCALAR_BODY:%.*]]
1755; SINK-AFTER:       scalar.body:
1756; SINK-AFTER-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
1757; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR2:%.*]], [[SCALAR_BODY]] ]
1758; SINK-AFTER-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 2
1759; SINK-AFTER-NEXT:    [[VAR1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[I_NEXT]]
1760; SINK-AFTER-NEXT:    [[VAR2]] = load i32, i32* [[VAR1]], align 4
1761; SINK-AFTER-NEXT:    [[COND:%.*]] = icmp eq i64 [[I_NEXT]], [[N]]
1762; SINK-AFTER-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
1763; SINK-AFTER:       for.end:
1764; SINK-AFTER-NEXT:    ret void
1765;
1766entry:
1767  br label %scalar.body
1768
1769scalar.body:
1770  %i = phi i64 [ 0, %entry ], [ %i.next, %scalar.body ]
1771  %var0 = phi i32 [ %pre_load, %entry ], [ %var2, %scalar.body ]
1772  %i.next = add nuw nsw i64 %i, 2
1773  %var1 = getelementptr inbounds i32, i32* %a, i64 %i.next
1774  %var2 = load i32, i32* %var1
1775  %cond = icmp eq i64 %i.next,%n
1776  br i1 %cond, label %for.end, label %scalar.body
1777
1778for.end:
1779  ret void
1780}
1781
1782;
1783define void @constant_folded_previous_value() {
1784; CHECK-LABEL: @constant_folded_previous_value(
1785; CHECK-NEXT:  entry:
1786; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1787; CHECK:       vector.ph:
1788; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1789; CHECK:       vector.body:
1790; CHECK-NEXT:    br i1 undef, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
1791; CHECK:       middle.block:
1792; CHECK-NEXT:    br i1 undef, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1793; CHECK:       scalar.ph:
1794; CHECK-NEXT:    br label [[SCALAR_BODY:%.*]]
1795; CHECK:       scalar.body:
1796; CHECK-NEXT:    br i1 undef, label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
1797; CHECK:       for.end:
1798; CHECK-NEXT:    ret void
1799;
1800; UNROLL-LABEL: @constant_folded_previous_value(
1801; UNROLL-NEXT:  entry:
1802; UNROLL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1803; UNROLL:       vector.ph:
1804; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
1805; UNROLL:       vector.body:
1806; UNROLL-NEXT:    br i1 undef, label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
1807; UNROLL:       middle.block:
1808; UNROLL-NEXT:    br i1 undef, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1809; UNROLL:       scalar.ph:
1810; UNROLL-NEXT:    br label [[SCALAR_BODY:%.*]]
1811; UNROLL:       scalar.body:
1812; UNROLL-NEXT:    br i1 undef, label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
1813; UNROLL:       for.end:
1814; UNROLL-NEXT:    ret void
1815;
1816; UNROLL-NO-IC-LABEL: @constant_folded_previous_value(
1817; UNROLL-NO-IC-NEXT:  entry:
1818; UNROLL-NO-IC-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1819; UNROLL-NO-IC:       vector.ph:
1820; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
1821; UNROLL-NO-IC:       vector.body:
1822; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1823; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ <i64 1, i64 1, i64 1, i64 1>, [[VECTOR_BODY]] ]
1824; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> <i64 1, i64 1, i64 1, i64 1>, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1825; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1826; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef
1827; UNROLL-NO-IC-NEXT:    br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
1828; UNROLL-NO-IC:       middle.block:
1829; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 undef, undef
1830; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1831; UNROLL-NO-IC:       scalar.ph:
1832; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 1, [[MIDDLE_BLOCK]] ]
1833; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ undef, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1834; UNROLL-NO-IC-NEXT:    br label [[SCALAR_BODY:%.*]]
1835; UNROLL-NO-IC:       scalar.body:
1836; UNROLL-NO-IC-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
1837; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ]
1838; UNROLL-NO-IC-NEXT:    [[VAR3]] = add i64 0, 1
1839; UNROLL-NO-IC-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1840; UNROLL-NO-IC-NEXT:    [[COND:%.*]] = icmp eq i64 [[I_NEXT]], undef
1841; UNROLL-NO-IC-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
1842; UNROLL-NO-IC:       for.end:
1843; UNROLL-NO-IC-NEXT:    ret void
1844;
1845; UNROLL-NO-VF-LABEL: @constant_folded_previous_value(
1846; UNROLL-NO-VF-NEXT:  entry:
1847; UNROLL-NO-VF-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1848; UNROLL-NO-VF:       vector.ph:
1849; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
1850; UNROLL-NO-VF:       vector.body:
1851; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1852; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
1853; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i64 0, 1
1854; UNROLL-NO-VF-NEXT:    [[TMP1]] = add i64 0, 1
1855; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1856; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef
1857; UNROLL-NO-VF-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
1858; UNROLL-NO-VF:       middle.block:
1859; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i64 undef, undef
1860; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1861; UNROLL-NO-VF:       scalar.ph:
1862; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ]
1863; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ undef, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1864; UNROLL-NO-VF-NEXT:    br label [[SCALAR_BODY:%.*]]
1865; UNROLL-NO-VF:       scalar.body:
1866; UNROLL-NO-VF-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
1867; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ]
1868; UNROLL-NO-VF-NEXT:    [[VAR3]] = add i64 0, 1
1869; UNROLL-NO-VF-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1870; UNROLL-NO-VF-NEXT:    [[COND:%.*]] = icmp eq i64 [[I_NEXT]], undef
1871; UNROLL-NO-VF-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
1872; UNROLL-NO-VF:       for.end:
1873; UNROLL-NO-VF-NEXT:    ret void
1874;
1875; SINK-AFTER-LABEL: @constant_folded_previous_value(
1876; SINK-AFTER-NEXT:  entry:
1877; SINK-AFTER-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1878; SINK-AFTER:       vector.ph:
1879; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
1880; SINK-AFTER:       vector.body:
1881; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1882; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i64> [ <i64 poison, i64 poison, i64 poison, i64 0>, [[VECTOR_PH]] ], [ <i64 1, i64 1, i64 1, i64 1>, [[VECTOR_BODY]] ]
1883; SINK-AFTER-NEXT:    [[TMP0:%.*]] = shufflevector <4 x i64> [[VECTOR_RECUR]], <4 x i64> <i64 1, i64 1, i64 1, i64 1>, <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1884; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1885; SINK-AFTER-NEXT:    [[TMP1:%.*]] = icmp eq i64 [[INDEX_NEXT]], undef
1886; SINK-AFTER-NEXT:    br i1 [[TMP1]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
1887; SINK-AFTER:       middle.block:
1888; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i64 undef, undef
1889; SINK-AFTER-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1890; SINK-AFTER:       scalar.ph:
1891; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ 1, [[MIDDLE_BLOCK]] ]
1892; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ undef, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
1893; SINK-AFTER-NEXT:    br label [[SCALAR_BODY:%.*]]
1894; SINK-AFTER:       scalar.body:
1895; SINK-AFTER-NEXT:    [[I:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[I_NEXT:%.*]], [[SCALAR_BODY]] ]
1896; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i64 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[VAR3:%.*]], [[SCALAR_BODY]] ]
1897; SINK-AFTER-NEXT:    [[VAR3]] = add i64 0, 1
1898; SINK-AFTER-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1899; SINK-AFTER-NEXT:    [[COND:%.*]] = icmp eq i64 [[I_NEXT]], undef
1900; SINK-AFTER-NEXT:    br i1 [[COND]], label [[FOR_END]], label [[SCALAR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
1901; SINK-AFTER:       for.end:
1902; SINK-AFTER-NEXT:    ret void
1903;
1904entry:
1905  br label %scalar.body
1906
1907scalar.body:
1908  %i = phi i64 [ 0, %entry ], [ %i.next, %scalar.body ]
1909  %var2 = phi i64 [ 0, %entry ], [ %var3, %scalar.body ]
1910  %var3 = add i64 0, 1
1911  %i.next = add nuw nsw i64 %i, 1
1912  %cond = icmp eq i64 %i.next, undef
1913  br i1 %cond, label %for.end, label %scalar.body
1914
1915for.end:
1916  ret void
1917}
1918
1919; We vectorize this first order recurrence, by generating two
1920; extracts for the phi `val.phi` - one at the last index and
1921; another at the second last index. We need these 2 extracts because
1922; the first order recurrence phi is used outside the loop, so we require the phi
1923; itself and not its update (addx).
1924; Check the case when unrolled but not vectorized.
1925define i32 @extract_second_last_iteration(i32* %cval, i32 %x)  {
1926; CHECK-LABEL: @extract_second_last_iteration(
1927; CHECK-NEXT:  entry:
1928; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1929; CHECK:       vector.ph:
1930; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
1931; CHECK:       vector.body:
1932; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1933; CHECK-NEXT:    [[TMP0:%.*]] = phi i32 [ 2, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
1934; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
1935; CHECK-NEXT:    [[TMP1]] = add i32 [[TMP0]], 4
1936; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
1937; CHECK-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
1938; CHECK:       middle.block:
1939; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = add i32 [[TMP0]], [[X:%.*]]
1940; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1941; CHECK:       scalar.ph:
1942; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
1943; CHECK:       for.body:
1944; CHECK-NEXT:    br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
1945; CHECK:       for.end:
1946; CHECK-NEXT:    [[VAL_PHI_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
1947; CHECK-NEXT:    ret i32 [[VAL_PHI_LCSSA]]
1948;
1949; UNROLL-LABEL: @extract_second_last_iteration(
1950; UNROLL-NEXT:  entry:
1951; UNROLL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1952; UNROLL:       vector.ph:
1953; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
1954; UNROLL:       vector.body:
1955; UNROLL-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1956; UNROLL-NEXT:    [[TMP0:%.*]] = phi i32 [ 2, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
1957; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
1958; UNROLL-NEXT:    [[TMP1]] = add i32 [[TMP0]], 8
1959; UNROLL-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
1960; UNROLL-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
1961; UNROLL:       middle.block:
1962; UNROLL-NEXT:    [[TMP3:%.*]] = add i32 [[TMP0]], 4
1963; UNROLL-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = add i32 [[TMP3]], [[X:%.*]]
1964; UNROLL-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
1965; UNROLL:       scalar.ph:
1966; UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
1967; UNROLL:       for.body:
1968; UNROLL-NEXT:    br i1 undef, label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
1969; UNROLL:       for.end:
1970; UNROLL-NEXT:    [[VAL_PHI_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
1971; UNROLL-NEXT:    ret i32 [[VAL_PHI_LCSSA]]
1972;
1973; UNROLL-NO-IC-LABEL: @extract_second_last_iteration(
1974; UNROLL-NO-IC-NEXT:  entry:
1975; UNROLL-NO-IC-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1976; UNROLL-NO-IC:       vector.ph:
1977; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
1978; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
1979; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[X]], i32 0
1980; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
1981; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
1982; UNROLL-NO-IC:       vector.body:
1983; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1984; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1985; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
1986; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
1987; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
1988; UNROLL-NO-IC-NEXT:    [[TMP1]] = add <4 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT3]]
1989; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP0]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1990; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i32> [[TMP0]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
1991; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
1992; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], <i32 4, i32 4, i32 4, i32 4>
1993; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
1994; UNROLL-NO-IC-NEXT:    br i1 [[TMP4]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
1995; UNROLL-NO-IC:       middle.block:
1996; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i32 96, 96
1997; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
1998; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP1]], i32 2
1999; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2000; UNROLL-NO-IC:       scalar.ph:
2001; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
2002; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2003; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
2004; UNROLL-NO-IC:       for.body:
2005; UNROLL-NO-IC-NEXT:    [[INC_PHI:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
2006; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADDX:%.*]], [[FOR_BODY]] ]
2007; UNROLL-NO-IC-NEXT:    [[INC]] = add i32 [[INC_PHI]], 1
2008; UNROLL-NO-IC-NEXT:    [[BC:%.*]] = zext i32 [[INC_PHI]] to i64
2009; UNROLL-NO-IC-NEXT:    [[ADDX]] = add i32 [[INC_PHI]], [[X]]
2010; UNROLL-NO-IC-NEXT:    [[CMP:%.*]] = icmp eq i32 [[INC_PHI]], 95
2011; UNROLL-NO-IC-NEXT:    br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
2012; UNROLL-NO-IC:       for.end:
2013; UNROLL-NO-IC-NEXT:    [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
2014; UNROLL-NO-IC-NEXT:    ret i32 [[VAL_PHI_LCSSA]]
2015;
2016; UNROLL-NO-VF-LABEL: @extract_second_last_iteration(
2017; UNROLL-NO-VF-NEXT:  entry:
2018; UNROLL-NO-VF-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2019; UNROLL-NO-VF:       vector.ph:
2020; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
2021; UNROLL-NO-VF:       vector.body:
2022; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2023; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
2024; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i32 [[INDEX]], 0
2025; UNROLL-NO-VF-NEXT:    [[INDUCTION1:%.*]] = add i32 [[INDEX]], 1
2026; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i32 [[INDUCTION]], [[X:%.*]]
2027; UNROLL-NO-VF-NEXT:    [[TMP1]] = add i32 [[INDUCTION1]], [[X]]
2028; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
2029; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
2030; UNROLL-NO-VF-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
2031; UNROLL-NO-VF:       middle.block:
2032; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i32 96, 96
2033; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2034; UNROLL-NO-VF:       scalar.ph:
2035; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP1]], [[MIDDLE_BLOCK]] ]
2036; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2037; UNROLL-NO-VF-NEXT:    br label [[FOR_BODY:%.*]]
2038; UNROLL-NO-VF:       for.body:
2039; UNROLL-NO-VF-NEXT:    [[INC_PHI:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
2040; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADDX:%.*]], [[FOR_BODY]] ]
2041; UNROLL-NO-VF-NEXT:    [[INC]] = add i32 [[INC_PHI]], 1
2042; UNROLL-NO-VF-NEXT:    [[BC:%.*]] = zext i32 [[INC_PHI]] to i64
2043; UNROLL-NO-VF-NEXT:    [[ADDX]] = add i32 [[INC_PHI]], [[X]]
2044; UNROLL-NO-VF-NEXT:    [[CMP:%.*]] = icmp eq i32 [[INC_PHI]], 95
2045; UNROLL-NO-VF-NEXT:    br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
2046; UNROLL-NO-VF:       for.end:
2047; UNROLL-NO-VF-NEXT:    [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[TMP0]], [[MIDDLE_BLOCK]] ]
2048; UNROLL-NO-VF-NEXT:    ret i32 [[VAL_PHI_LCSSA]]
2049;
2050; SINK-AFTER-LABEL: @extract_second_last_iteration(
2051; SINK-AFTER-NEXT:  entry:
2052; SINK-AFTER-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2053; SINK-AFTER:       vector.ph:
2054; SINK-AFTER-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[X:%.*]], i32 0
2055; SINK-AFTER-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
2056; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
2057; SINK-AFTER:       vector.body:
2058; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2059; SINK-AFTER-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
2060; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP0:%.*]], [[VECTOR_BODY]] ]
2061; SINK-AFTER-NEXT:    [[TMP0]] = add <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
2062; SINK-AFTER-NEXT:    [[TMP1:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP0]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2063; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
2064; SINK-AFTER-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
2065; SINK-AFTER-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 96
2066; SINK-AFTER-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
2067; SINK-AFTER:       middle.block:
2068; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i32 96, 96
2069; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP0]], i32 3
2070; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP0]], i32 2
2071; SINK-AFTER-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2072; SINK-AFTER:       scalar.ph:
2073; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
2074; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ 96, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2075; SINK-AFTER-NEXT:    br label [[FOR_BODY:%.*]]
2076; SINK-AFTER:       for.body:
2077; SINK-AFTER-NEXT:    [[INC_PHI:%.*]] = phi i32 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INC:%.*]], [[FOR_BODY]] ]
2078; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[ADDX:%.*]], [[FOR_BODY]] ]
2079; SINK-AFTER-NEXT:    [[INC]] = add i32 [[INC_PHI]], 1
2080; SINK-AFTER-NEXT:    [[BC:%.*]] = zext i32 [[INC_PHI]] to i64
2081; SINK-AFTER-NEXT:    [[ADDX]] = add i32 [[INC_PHI]], [[X]]
2082; SINK-AFTER-NEXT:    [[CMP:%.*]] = icmp eq i32 [[INC_PHI]], 95
2083; SINK-AFTER-NEXT:    br i1 [[CMP]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
2084; SINK-AFTER:       for.end:
2085; SINK-AFTER-NEXT:    [[VAL_PHI_LCSSA:%.*]] = phi i32 [ [[SCALAR_RECUR]], [[FOR_BODY]] ], [ [[VECTOR_RECUR_EXTRACT_FOR_PHI]], [[MIDDLE_BLOCK]] ]
2086; SINK-AFTER-NEXT:    ret i32 [[VAL_PHI_LCSSA]]
2087;
2088entry:
2089  br label %for.body
2090
2091for.body:
2092  %inc.phi = phi i32 [ 0, %entry ], [ %inc, %for.body ]
2093  %val.phi = phi i32 [ 0, %entry ], [ %addx, %for.body ]
2094  %inc = add i32 %inc.phi, 1
2095  %bc = zext i32 %inc.phi to i64
2096  %addx = add i32 %inc.phi, %x
2097  %cmp = icmp eq i32 %inc.phi, 95
2098  br i1 %cmp, label %for.end, label %for.body
2099
2100for.end:
2101  ret i32 %val.phi
2102}
2103
2104; We vectorize this first order recurrence, with a set of insertelements for
2105; each unrolled part. Make sure these insertelements are generated in-order,
2106; because the shuffle of the first order recurrence will be added after the
2107; insertelement of the last part UF - 1, assuming the latter appears after the
2108; insertelements of all other parts.
2109;
2110; int PR33613(double *b, double j, int d) {
2111;   int a = 0;
2112;   for(int i = 0; i < 10240; i++, b+=25) {
2113;     double f = b[d]; // Scalarize to form insertelements
2114;     if (j * f)
2115;       a++;
2116;     j = f;
2117;   }
2118;   return a;
2119; }
2120;
2121;
2122define i32 @PR33613(double* %b, double %j, i32 %d) {
2123; CHECK-LABEL: @PR33613(
2124; CHECK-NEXT:  entry:
2125; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[D:%.*]] to i64
2126; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2127; CHECK:       vector.ph:
2128; CHECK-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x double> poison, double [[J:%.*]], i64 3
2129; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
2130; CHECK:       vector.body:
2131; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2132; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP23:%.*]], [[VECTOR_BODY]] ]
2133; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
2134; CHECK-NEXT:    [[TMP0:%.*]] = mul i64 [[INDEX]], 25
2135; CHECK-NEXT:    [[NEXT_GEP5:%.*]] = getelementptr double, double* [[B:%.*]], i64 [[IDXPROM]]
2136; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[INDEX]], 1
2137; CHECK-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 25
2138; CHECK-NEXT:    [[NEXT_GEP26:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
2139; CHECK-NEXT:    [[TMP3:%.*]] = or i64 [[INDEX]], 2
2140; CHECK-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], 25
2141; CHECK-NEXT:    [[NEXT_GEP37:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
2142; CHECK-NEXT:    [[TMP5:%.*]] = or i64 [[INDEX]], 3
2143; CHECK-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 25
2144; CHECK-NEXT:    [[NEXT_GEP48:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
2145; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr double, double* [[NEXT_GEP5]], i64 [[TMP0]]
2146; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr double, double* [[NEXT_GEP26]], i64 [[TMP2]]
2147; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr double, double* [[NEXT_GEP37]], i64 [[TMP4]]
2148; CHECK-NEXT:    [[TMP10:%.*]] = getelementptr double, double* [[NEXT_GEP48]], i64 [[TMP6]]
2149; CHECK-NEXT:    [[TMP11:%.*]] = load double, double* [[TMP7]], align 8
2150; CHECK-NEXT:    [[TMP12:%.*]] = load double, double* [[TMP8]], align 8
2151; CHECK-NEXT:    [[TMP13:%.*]] = load double, double* [[TMP9]], align 8
2152; CHECK-NEXT:    [[TMP14:%.*]] = load double, double* [[TMP10]], align 8
2153; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <4 x double> poison, double [[TMP11]], i64 0
2154; CHECK-NEXT:    [[TMP16:%.*]] = insertelement <4 x double> [[TMP15]], double [[TMP12]], i64 1
2155; CHECK-NEXT:    [[TMP17:%.*]] = insertelement <4 x double> [[TMP16]], double [[TMP13]], i64 2
2156; CHECK-NEXT:    [[TMP18]] = insertelement <4 x double> [[TMP17]], double [[TMP14]], i64 3
2157; CHECK-NEXT:    [[TMP19:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[TMP17]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2158; CHECK-NEXT:    [[TMP20:%.*]] = fmul <4 x double> [[TMP19]], [[TMP18]]
2159; CHECK-NEXT:    [[TMP21:%.*]] = fcmp une <4 x double> [[TMP20]], zeroinitializer
2160; CHECK-NEXT:    [[TMP22:%.*]] = zext <4 x i1> [[TMP21]] to <4 x i32>
2161; CHECK-NEXT:    [[TMP23]] = add <4 x i32> [[VEC_PHI]], [[TMP22]]
2162; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
2163; CHECK-NEXT:    [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240
2164; CHECK-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
2165; CHECK:       middle.block:
2166; CHECK-NEXT:    [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP23]])
2167; CHECK-NEXT:    br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
2168; CHECK:       scalar.ph:
2169; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
2170; CHECK:       for.cond.cleanup:
2171; CHECK-NEXT:    [[A_1_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP25]], [[MIDDLE_BLOCK]] ]
2172; CHECK-NEXT:    ret i32 [[A_1_LCSSA]]
2173; CHECK:       for.body:
2174; CHECK-NEXT:    br i1 undef, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
2175;
2176; UNROLL-LABEL: @PR33613(
2177; UNROLL-NEXT:  entry:
2178; UNROLL-NEXT:    [[IDXPROM:%.*]] = sext i32 [[D:%.*]] to i64
2179; UNROLL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2180; UNROLL:       vector.ph:
2181; UNROLL-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x double> poison, double [[J:%.*]], i64 3
2182; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
2183; UNROLL:       vector.body:
2184; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2185; UNROLL-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[VECTOR_BODY]] ]
2186; UNROLL-NEXT:    [[VEC_PHI9:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[VECTOR_BODY]] ]
2187; UNROLL-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP38:%.*]], [[VECTOR_BODY]] ]
2188; UNROLL-NEXT:    [[TMP0:%.*]] = mul i64 [[INDEX]], 25
2189; UNROLL-NEXT:    [[NEXT_GEP10:%.*]] = getelementptr double, double* [[B:%.*]], i64 [[IDXPROM]]
2190; UNROLL-NEXT:    [[TMP1:%.*]] = or i64 [[INDEX]], 1
2191; UNROLL-NEXT:    [[TMP2:%.*]] = mul i64 [[TMP1]], 25
2192; UNROLL-NEXT:    [[NEXT_GEP211:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
2193; UNROLL-NEXT:    [[TMP3:%.*]] = or i64 [[INDEX]], 2
2194; UNROLL-NEXT:    [[TMP4:%.*]] = mul i64 [[TMP3]], 25
2195; UNROLL-NEXT:    [[NEXT_GEP312:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
2196; UNROLL-NEXT:    [[TMP5:%.*]] = or i64 [[INDEX]], 3
2197; UNROLL-NEXT:    [[TMP6:%.*]] = mul i64 [[TMP5]], 25
2198; UNROLL-NEXT:    [[NEXT_GEP413:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
2199; UNROLL-NEXT:    [[TMP7:%.*]] = or i64 [[INDEX]], 4
2200; UNROLL-NEXT:    [[TMP8:%.*]] = mul i64 [[TMP7]], 25
2201; UNROLL-NEXT:    [[NEXT_GEP514:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
2202; UNROLL-NEXT:    [[TMP9:%.*]] = or i64 [[INDEX]], 5
2203; UNROLL-NEXT:    [[TMP10:%.*]] = mul i64 [[TMP9]], 25
2204; UNROLL-NEXT:    [[NEXT_GEP615:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
2205; UNROLL-NEXT:    [[TMP11:%.*]] = or i64 [[INDEX]], 6
2206; UNROLL-NEXT:    [[TMP12:%.*]] = mul i64 [[TMP11]], 25
2207; UNROLL-NEXT:    [[NEXT_GEP716:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
2208; UNROLL-NEXT:    [[TMP13:%.*]] = or i64 [[INDEX]], 7
2209; UNROLL-NEXT:    [[TMP14:%.*]] = mul i64 [[TMP13]], 25
2210; UNROLL-NEXT:    [[NEXT_GEP817:%.*]] = getelementptr double, double* [[B]], i64 [[IDXPROM]]
2211; UNROLL-NEXT:    [[TMP15:%.*]] = getelementptr double, double* [[NEXT_GEP10]], i64 [[TMP0]]
2212; UNROLL-NEXT:    [[TMP16:%.*]] = getelementptr double, double* [[NEXT_GEP211]], i64 [[TMP2]]
2213; UNROLL-NEXT:    [[TMP17:%.*]] = getelementptr double, double* [[NEXT_GEP312]], i64 [[TMP4]]
2214; UNROLL-NEXT:    [[TMP18:%.*]] = getelementptr double, double* [[NEXT_GEP413]], i64 [[TMP6]]
2215; UNROLL-NEXT:    [[TMP19:%.*]] = getelementptr double, double* [[NEXT_GEP514]], i64 [[TMP8]]
2216; UNROLL-NEXT:    [[TMP20:%.*]] = getelementptr double, double* [[NEXT_GEP615]], i64 [[TMP10]]
2217; UNROLL-NEXT:    [[TMP21:%.*]] = getelementptr double, double* [[NEXT_GEP716]], i64 [[TMP12]]
2218; UNROLL-NEXT:    [[TMP22:%.*]] = getelementptr double, double* [[NEXT_GEP817]], i64 [[TMP14]]
2219; UNROLL-NEXT:    [[TMP23:%.*]] = load double, double* [[TMP15]], align 8
2220; UNROLL-NEXT:    [[TMP24:%.*]] = load double, double* [[TMP16]], align 8
2221; UNROLL-NEXT:    [[TMP25:%.*]] = load double, double* [[TMP17]], align 8
2222; UNROLL-NEXT:    [[TMP26:%.*]] = load double, double* [[TMP18]], align 8
2223; UNROLL-NEXT:    [[TMP27:%.*]] = insertelement <4 x double> poison, double [[TMP23]], i64 0
2224; UNROLL-NEXT:    [[TMP28:%.*]] = insertelement <4 x double> [[TMP27]], double [[TMP24]], i64 1
2225; UNROLL-NEXT:    [[TMP29:%.*]] = insertelement <4 x double> [[TMP28]], double [[TMP25]], i64 2
2226; UNROLL-NEXT:    [[TMP30:%.*]] = insertelement <4 x double> [[TMP29]], double [[TMP26]], i64 3
2227; UNROLL-NEXT:    [[TMP31:%.*]] = load double, double* [[TMP19]], align 8
2228; UNROLL-NEXT:    [[TMP32:%.*]] = load double, double* [[TMP20]], align 8
2229; UNROLL-NEXT:    [[TMP33:%.*]] = load double, double* [[TMP21]], align 8
2230; UNROLL-NEXT:    [[TMP34:%.*]] = load double, double* [[TMP22]], align 8
2231; UNROLL-NEXT:    [[TMP35:%.*]] = insertelement <4 x double> poison, double [[TMP31]], i64 0
2232; UNROLL-NEXT:    [[TMP36:%.*]] = insertelement <4 x double> [[TMP35]], double [[TMP32]], i64 1
2233; UNROLL-NEXT:    [[TMP37:%.*]] = insertelement <4 x double> [[TMP36]], double [[TMP33]], i64 2
2234; UNROLL-NEXT:    [[TMP38]] = insertelement <4 x double> [[TMP37]], double [[TMP34]], i64 3
2235; UNROLL-NEXT:    [[TMP39:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[TMP29]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2236; UNROLL-NEXT:    [[TMP40:%.*]] = shufflevector <4 x double> [[TMP30]], <4 x double> [[TMP37]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2237; UNROLL-NEXT:    [[TMP41:%.*]] = fmul <4 x double> [[TMP39]], [[TMP30]]
2238; UNROLL-NEXT:    [[TMP42:%.*]] = fmul <4 x double> [[TMP40]], [[TMP38]]
2239; UNROLL-NEXT:    [[TMP43:%.*]] = fcmp une <4 x double> [[TMP41]], zeroinitializer
2240; UNROLL-NEXT:    [[TMP44:%.*]] = fcmp une <4 x double> [[TMP42]], zeroinitializer
2241; UNROLL-NEXT:    [[TMP45:%.*]] = zext <4 x i1> [[TMP43]] to <4 x i32>
2242; UNROLL-NEXT:    [[TMP46:%.*]] = zext <4 x i1> [[TMP44]] to <4 x i32>
2243; UNROLL-NEXT:    [[TMP47]] = add <4 x i32> [[VEC_PHI]], [[TMP45]]
2244; UNROLL-NEXT:    [[TMP48]] = add <4 x i32> [[VEC_PHI9]], [[TMP46]]
2245; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
2246; UNROLL-NEXT:    [[TMP49:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240
2247; UNROLL-NEXT:    br i1 [[TMP49]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
2248; UNROLL:       middle.block:
2249; UNROLL-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP48]], [[TMP47]]
2250; UNROLL-NEXT:    [[TMP50:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
2251; UNROLL-NEXT:    br i1 true, label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
2252; UNROLL:       scalar.ph:
2253; UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
2254; UNROLL:       for.cond.cleanup:
2255; UNROLL-NEXT:    [[A_1_LCSSA:%.*]] = phi i32 [ undef, [[FOR_BODY]] ], [ [[TMP50]], [[MIDDLE_BLOCK]] ]
2256; UNROLL-NEXT:    ret i32 [[A_1_LCSSA]]
2257; UNROLL:       for.body:
2258; UNROLL-NEXT:    br i1 undef, label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
2259;
2260; UNROLL-NO-IC-LABEL: @PR33613(
2261; UNROLL-NO-IC-NEXT:  entry:
2262; UNROLL-NO-IC-NEXT:    [[IDXPROM:%.*]] = sext i32 [[D:%.*]] to i64
2263; UNROLL-NO-IC-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2264; UNROLL-NO-IC:       vector.ph:
2265; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = getelementptr double, double* [[B:%.*]], i64 256000
2266; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x double> poison, double [[J:%.*]], i32 3
2267; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
2268; UNROLL-NO-IC:       vector.body:
2269; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2270; UNROLL-NO-IC-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP48:%.*]], [[VECTOR_BODY]] ]
2271; UNROLL-NO-IC-NEXT:    [[VEC_PHI9:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP49:%.*]], [[VECTOR_BODY]] ]
2272; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP39:%.*]], [[VECTOR_BODY]] ]
2273; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
2274; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 25
2275; UNROLL-NO-IC-NEXT:    [[NEXT_GEP:%.*]] = getelementptr double, double* [[B]], i64 [[TMP1]]
2276; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 1
2277; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 25
2278; UNROLL-NO-IC-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP3]]
2279; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 2
2280; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 25
2281; UNROLL-NO-IC-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr double, double* [[B]], i64 [[TMP5]]
2282; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 3
2283; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 25
2284; UNROLL-NO-IC-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr double, double* [[B]], i64 [[TMP7]]
2285; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = add i64 [[INDEX]], 4
2286; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = mul i64 [[TMP8]], 25
2287; UNROLL-NO-IC-NEXT:    [[NEXT_GEP5:%.*]] = getelementptr double, double* [[B]], i64 [[TMP9]]
2288; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = add i64 [[INDEX]], 5
2289; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = mul i64 [[TMP10]], 25
2290; UNROLL-NO-IC-NEXT:    [[NEXT_GEP6:%.*]] = getelementptr double, double* [[B]], i64 [[TMP11]]
2291; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = add i64 [[INDEX]], 6
2292; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = mul i64 [[TMP12]], 25
2293; UNROLL-NO-IC-NEXT:    [[NEXT_GEP7:%.*]] = getelementptr double, double* [[B]], i64 [[TMP13]]
2294; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = add i64 [[INDEX]], 7
2295; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = mul i64 [[TMP14]], 25
2296; UNROLL-NO-IC-NEXT:    [[NEXT_GEP8:%.*]] = getelementptr double, double* [[B]], i64 [[TMP15]]
2297; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP]], i64 [[IDXPROM]]
2298; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP2]], i64 [[IDXPROM]]
2299; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP3]], i64 [[IDXPROM]]
2300; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP4]], i64 [[IDXPROM]]
2301; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP5]], i64 [[IDXPROM]]
2302; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP6]], i64 [[IDXPROM]]
2303; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP7]], i64 [[IDXPROM]]
2304; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP8]], i64 [[IDXPROM]]
2305; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = load double, double* [[TMP16]], align 8
2306; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = load double, double* [[TMP17]], align 8
2307; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = load double, double* [[TMP18]], align 8
2308; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = load double, double* [[TMP19]], align 8
2309; UNROLL-NO-IC-NEXT:    [[TMP28:%.*]] = insertelement <4 x double> poison, double [[TMP24]], i32 0
2310; UNROLL-NO-IC-NEXT:    [[TMP29:%.*]] = insertelement <4 x double> [[TMP28]], double [[TMP25]], i32 1
2311; UNROLL-NO-IC-NEXT:    [[TMP30:%.*]] = insertelement <4 x double> [[TMP29]], double [[TMP26]], i32 2
2312; UNROLL-NO-IC-NEXT:    [[TMP31:%.*]] = insertelement <4 x double> [[TMP30]], double [[TMP27]], i32 3
2313; UNROLL-NO-IC-NEXT:    [[TMP32:%.*]] = load double, double* [[TMP20]], align 8
2314; UNROLL-NO-IC-NEXT:    [[TMP33:%.*]] = load double, double* [[TMP21]], align 8
2315; UNROLL-NO-IC-NEXT:    [[TMP34:%.*]] = load double, double* [[TMP22]], align 8
2316; UNROLL-NO-IC-NEXT:    [[TMP35:%.*]] = load double, double* [[TMP23]], align 8
2317; UNROLL-NO-IC-NEXT:    [[TMP36:%.*]] = insertelement <4 x double> poison, double [[TMP32]], i32 0
2318; UNROLL-NO-IC-NEXT:    [[TMP37:%.*]] = insertelement <4 x double> [[TMP36]], double [[TMP33]], i32 1
2319; UNROLL-NO-IC-NEXT:    [[TMP38:%.*]] = insertelement <4 x double> [[TMP37]], double [[TMP34]], i32 2
2320; UNROLL-NO-IC-NEXT:    [[TMP39]] = insertelement <4 x double> [[TMP38]], double [[TMP35]], i32 3
2321; UNROLL-NO-IC-NEXT:    [[TMP40:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[TMP31]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2322; UNROLL-NO-IC-NEXT:    [[TMP41:%.*]] = shufflevector <4 x double> [[TMP31]], <4 x double> [[TMP39]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2323; UNROLL-NO-IC-NEXT:    [[TMP42:%.*]] = fmul <4 x double> [[TMP40]], [[TMP31]]
2324; UNROLL-NO-IC-NEXT:    [[TMP43:%.*]] = fmul <4 x double> [[TMP41]], [[TMP39]]
2325; UNROLL-NO-IC-NEXT:    [[TMP44:%.*]] = fcmp une <4 x double> [[TMP42]], zeroinitializer
2326; UNROLL-NO-IC-NEXT:    [[TMP45:%.*]] = fcmp une <4 x double> [[TMP43]], zeroinitializer
2327; UNROLL-NO-IC-NEXT:    [[TMP46:%.*]] = zext <4 x i1> [[TMP44]] to <4 x i32>
2328; UNROLL-NO-IC-NEXT:    [[TMP47:%.*]] = zext <4 x i1> [[TMP45]] to <4 x i32>
2329; UNROLL-NO-IC-NEXT:    [[TMP48]] = add <4 x i32> [[VEC_PHI]], [[TMP46]]
2330; UNROLL-NO-IC-NEXT:    [[TMP49]] = add <4 x i32> [[VEC_PHI9]], [[TMP47]]
2331; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
2332; UNROLL-NO-IC-NEXT:    [[TMP50:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240
2333; UNROLL-NO-IC-NEXT:    br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
2334; UNROLL-NO-IC:       middle.block:
2335; UNROLL-NO-IC-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP49]], [[TMP48]]
2336; UNROLL-NO-IC-NEXT:    [[TMP51:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
2337; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 10240, 10240
2338; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[TMP39]], i32 3
2339; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x double> [[TMP39]], i32 2
2340; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
2341; UNROLL-NO-IC:       scalar.ph:
2342; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[J]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
2343; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi double* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ]
2344; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2345; UNROLL-NO-IC-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ]
2346; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
2347; UNROLL-NO-IC:       for.cond.cleanup:
2348; UNROLL-NO-IC-NEXT:    [[A_1_LCSSA:%.*]] = phi i32 [ [[A_1:%.*]], [[FOR_BODY]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ]
2349; UNROLL-NO-IC-NEXT:    ret i32 [[A_1_LCSSA]]
2350; UNROLL-NO-IC:       for.body:
2351; UNROLL-NO-IC-NEXT:    [[B_ADDR_012:%.*]] = phi double* [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
2352; UNROLL-NO-IC-NEXT:    [[I_011:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ]
2353; UNROLL-NO-IC-NEXT:    [[A_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_1]], [[FOR_BODY]] ]
2354; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP52:%.*]], [[FOR_BODY]] ]
2355; UNROLL-NO-IC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 [[IDXPROM]]
2356; UNROLL-NO-IC-NEXT:    [[TMP52]] = load double, double* [[ARRAYIDX]], align 8
2357; UNROLL-NO-IC-NEXT:    [[MUL:%.*]] = fmul double [[SCALAR_RECUR]], [[TMP52]]
2358; UNROLL-NO-IC-NEXT:    [[TOBOOL:%.*]] = fcmp une double [[MUL]], 0.000000e+00
2359; UNROLL-NO-IC-NEXT:    [[INC:%.*]] = zext i1 [[TOBOOL]] to i32
2360; UNROLL-NO-IC-NEXT:    [[A_1]] = add nsw i32 [[A_010]], [[INC]]
2361; UNROLL-NO-IC-NEXT:    [[INC1]] = add nuw nsw i32 [[I_011]], 1
2362; UNROLL-NO-IC-NEXT:    [[ADD_PTR]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 25
2363; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC1]], 10240
2364; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
2365;
2366; UNROLL-NO-VF-LABEL: @PR33613(
2367; UNROLL-NO-VF-NEXT:  entry:
2368; UNROLL-NO-VF-NEXT:    [[IDXPROM:%.*]] = sext i32 [[D:%.*]] to i64
2369; UNROLL-NO-VF-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2370; UNROLL-NO-VF:       vector.ph:
2371; UNROLL-NO-VF-NEXT:    [[IND_END:%.*]] = getelementptr double, double* [[B:%.*]], i64 256000
2372; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
2373; UNROLL-NO-VF:       vector.body:
2374; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2375; UNROLL-NO-VF-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ]
2376; UNROLL-NO-VF-NEXT:    [[VEC_PHI3:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP15:%.*]], [[VECTOR_BODY]] ]
2377; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi double [ [[J:%.*]], [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
2378; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
2379; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 25
2380; UNROLL-NO-VF-NEXT:    [[NEXT_GEP:%.*]] = getelementptr double, double* [[B]], i64 [[TMP1]]
2381; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 1
2382; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 25
2383; UNROLL-NO-VF-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP3]]
2384; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP]], i64 [[IDXPROM]]
2385; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP2]], i64 [[IDXPROM]]
2386; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = load double, double* [[TMP4]], align 8
2387; UNROLL-NO-VF-NEXT:    [[TMP7]] = load double, double* [[TMP5]], align 8
2388; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = fmul double [[VECTOR_RECUR]], [[TMP6]]
2389; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = fmul double [[TMP6]], [[TMP7]]
2390; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = fcmp une double [[TMP8]], 0.000000e+00
2391; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = fcmp une double [[TMP9]], 0.000000e+00
2392; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = zext i1 [[TMP10]] to i32
2393; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = zext i1 [[TMP11]] to i32
2394; UNROLL-NO-VF-NEXT:    [[TMP14]] = add i32 [[VEC_PHI]], [[TMP12]]
2395; UNROLL-NO-VF-NEXT:    [[TMP15]] = add i32 [[VEC_PHI3]], [[TMP13]]
2396; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
2397; UNROLL-NO-VF-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240
2398; UNROLL-NO-VF-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP13:![0-9]+]]
2399; UNROLL-NO-VF:       middle.block:
2400; UNROLL-NO-VF-NEXT:    [[BIN_RDX:%.*]] = add i32 [[TMP15]], [[TMP14]]
2401; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i64 10240, 10240
2402; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
2403; UNROLL-NO-VF:       scalar.ph:
2404; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[J]], [[ENTRY:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
2405; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi double* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ]
2406; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2407; UNROLL-NO-VF-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
2408; UNROLL-NO-VF-NEXT:    br label [[FOR_BODY:%.*]]
2409; UNROLL-NO-VF:       for.cond.cleanup:
2410; UNROLL-NO-VF-NEXT:    [[A_1_LCSSA:%.*]] = phi i32 [ [[A_1:%.*]], [[FOR_BODY]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
2411; UNROLL-NO-VF-NEXT:    ret i32 [[A_1_LCSSA]]
2412; UNROLL-NO-VF:       for.body:
2413; UNROLL-NO-VF-NEXT:    [[B_ADDR_012:%.*]] = phi double* [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
2414; UNROLL-NO-VF-NEXT:    [[I_011:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ]
2415; UNROLL-NO-VF-NEXT:    [[A_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_1]], [[FOR_BODY]] ]
2416; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP17:%.*]], [[FOR_BODY]] ]
2417; UNROLL-NO-VF-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 [[IDXPROM]]
2418; UNROLL-NO-VF-NEXT:    [[TMP17]] = load double, double* [[ARRAYIDX]], align 8
2419; UNROLL-NO-VF-NEXT:    [[MUL:%.*]] = fmul double [[SCALAR_RECUR]], [[TMP17]]
2420; UNROLL-NO-VF-NEXT:    [[TOBOOL:%.*]] = fcmp une double [[MUL]], 0.000000e+00
2421; UNROLL-NO-VF-NEXT:    [[INC:%.*]] = zext i1 [[TOBOOL]] to i32
2422; UNROLL-NO-VF-NEXT:    [[A_1]] = add nsw i32 [[A_010]], [[INC]]
2423; UNROLL-NO-VF-NEXT:    [[INC1]] = add nuw nsw i32 [[I_011]], 1
2424; UNROLL-NO-VF-NEXT:    [[ADD_PTR]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 25
2425; UNROLL-NO-VF-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC1]], 10240
2426; UNROLL-NO-VF-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
2427;
2428; SINK-AFTER-LABEL: @PR33613(
2429; SINK-AFTER-NEXT:  entry:
2430; SINK-AFTER-NEXT:    [[IDXPROM:%.*]] = sext i32 [[D:%.*]] to i64
2431; SINK-AFTER-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2432; SINK-AFTER:       vector.ph:
2433; SINK-AFTER-NEXT:    [[IND_END:%.*]] = getelementptr double, double* [[B:%.*]], i64 256000
2434; SINK-AFTER-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x double> poison, double [[J:%.*]], i32 3
2435; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
2436; SINK-AFTER:       vector.body:
2437; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2438; SINK-AFTER-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[VECTOR_BODY]] ]
2439; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x double> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ]
2440; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
2441; SINK-AFTER-NEXT:    [[TMP1:%.*]] = mul i64 [[TMP0]], 25
2442; SINK-AFTER-NEXT:    [[NEXT_GEP:%.*]] = getelementptr double, double* [[B]], i64 [[TMP1]]
2443; SINK-AFTER-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 1
2444; SINK-AFTER-NEXT:    [[TMP3:%.*]] = mul i64 [[TMP2]], 25
2445; SINK-AFTER-NEXT:    [[NEXT_GEP2:%.*]] = getelementptr double, double* [[B]], i64 [[TMP3]]
2446; SINK-AFTER-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 2
2447; SINK-AFTER-NEXT:    [[TMP5:%.*]] = mul i64 [[TMP4]], 25
2448; SINK-AFTER-NEXT:    [[NEXT_GEP3:%.*]] = getelementptr double, double* [[B]], i64 [[TMP5]]
2449; SINK-AFTER-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 3
2450; SINK-AFTER-NEXT:    [[TMP7:%.*]] = mul i64 [[TMP6]], 25
2451; SINK-AFTER-NEXT:    [[NEXT_GEP4:%.*]] = getelementptr double, double* [[B]], i64 [[TMP7]]
2452; SINK-AFTER-NEXT:    [[TMP8:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP]], i64 [[IDXPROM]]
2453; SINK-AFTER-NEXT:    [[TMP9:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP2]], i64 [[IDXPROM]]
2454; SINK-AFTER-NEXT:    [[TMP10:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP3]], i64 [[IDXPROM]]
2455; SINK-AFTER-NEXT:    [[TMP11:%.*]] = getelementptr inbounds double, double* [[NEXT_GEP4]], i64 [[IDXPROM]]
2456; SINK-AFTER-NEXT:    [[TMP12:%.*]] = load double, double* [[TMP8]], align 8
2457; SINK-AFTER-NEXT:    [[TMP13:%.*]] = load double, double* [[TMP9]], align 8
2458; SINK-AFTER-NEXT:    [[TMP14:%.*]] = load double, double* [[TMP10]], align 8
2459; SINK-AFTER-NEXT:    [[TMP15:%.*]] = load double, double* [[TMP11]], align 8
2460; SINK-AFTER-NEXT:    [[TMP16:%.*]] = insertelement <4 x double> poison, double [[TMP12]], i32 0
2461; SINK-AFTER-NEXT:    [[TMP17:%.*]] = insertelement <4 x double> [[TMP16]], double [[TMP13]], i32 1
2462; SINK-AFTER-NEXT:    [[TMP18:%.*]] = insertelement <4 x double> [[TMP17]], double [[TMP14]], i32 2
2463; SINK-AFTER-NEXT:    [[TMP19]] = insertelement <4 x double> [[TMP18]], double [[TMP15]], i32 3
2464; SINK-AFTER-NEXT:    [[TMP20:%.*]] = shufflevector <4 x double> [[VECTOR_RECUR]], <4 x double> [[TMP19]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2465; SINK-AFTER-NEXT:    [[TMP21:%.*]] = fmul <4 x double> [[TMP20]], [[TMP19]]
2466; SINK-AFTER-NEXT:    [[TMP22:%.*]] = fcmp une <4 x double> [[TMP21]], zeroinitializer
2467; SINK-AFTER-NEXT:    [[TMP23:%.*]] = zext <4 x i1> [[TMP22]] to <4 x i32>
2468; SINK-AFTER-NEXT:    [[TMP24]] = add <4 x i32> [[VEC_PHI]], [[TMP23]]
2469; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
2470; SINK-AFTER-NEXT:    [[TMP25:%.*]] = icmp eq i64 [[INDEX_NEXT]], 10240
2471; SINK-AFTER-NEXT:    br i1 [[TMP25]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP14:![0-9]+]]
2472; SINK-AFTER:       middle.block:
2473; SINK-AFTER-NEXT:    [[TMP26:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP24]])
2474; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i64 10240, 10240
2475; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x double> [[TMP19]], i32 3
2476; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x double> [[TMP19]], i32 2
2477; SINK-AFTER-NEXT:    br i1 [[CMP_N]], label [[FOR_COND_CLEANUP:%.*]], label [[SCALAR_PH]]
2478; SINK-AFTER:       scalar.ph:
2479; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi double [ [[J]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
2480; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi double* [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[B]], [[ENTRY]] ]
2481; SINK-AFTER-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ 10240, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2482; SINK-AFTER-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[ENTRY]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ]
2483; SINK-AFTER-NEXT:    br label [[FOR_BODY:%.*]]
2484; SINK-AFTER:       for.cond.cleanup:
2485; SINK-AFTER-NEXT:    [[A_1_LCSSA:%.*]] = phi i32 [ [[A_1:%.*]], [[FOR_BODY]] ], [ [[TMP26]], [[MIDDLE_BLOCK]] ]
2486; SINK-AFTER-NEXT:    ret i32 [[A_1_LCSSA]]
2487; SINK-AFTER:       for.body:
2488; SINK-AFTER-NEXT:    [[B_ADDR_012:%.*]] = phi double* [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[ADD_PTR:%.*]], [[FOR_BODY]] ]
2489; SINK-AFTER-NEXT:    [[I_011:%.*]] = phi i32 [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[INC1:%.*]], [[FOR_BODY]] ]
2490; SINK-AFTER-NEXT:    [[A_010:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[A_1]], [[FOR_BODY]] ]
2491; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi double [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP27:%.*]], [[FOR_BODY]] ]
2492; SINK-AFTER-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 [[IDXPROM]]
2493; SINK-AFTER-NEXT:    [[TMP27]] = load double, double* [[ARRAYIDX]], align 8
2494; SINK-AFTER-NEXT:    [[MUL:%.*]] = fmul double [[SCALAR_RECUR]], [[TMP27]]
2495; SINK-AFTER-NEXT:    [[TOBOOL:%.*]] = fcmp une double [[MUL]], 0.000000e+00
2496; SINK-AFTER-NEXT:    [[INC:%.*]] = zext i1 [[TOBOOL]] to i32
2497; SINK-AFTER-NEXT:    [[A_1]] = add nsw i32 [[A_010]], [[INC]]
2498; SINK-AFTER-NEXT:    [[INC1]] = add nuw nsw i32 [[I_011]], 1
2499; SINK-AFTER-NEXT:    [[ADD_PTR]] = getelementptr inbounds double, double* [[B_ADDR_012]], i64 25
2500; SINK-AFTER-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INC1]], 10240
2501; SINK-AFTER-NEXT:    br i1 [[EXITCOND]], label [[FOR_COND_CLEANUP]], label [[FOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
2502;
2503entry:
2504  %idxprom = sext i32 %d to i64
2505  br label %for.body
2506
2507for.cond.cleanup:
2508  %a.1.lcssa = phi i32 [ %a.1, %for.body ]
2509  ret i32 %a.1.lcssa
2510
2511for.body:
2512  %b.addr.012 = phi double* [ %b, %entry ], [ %add.ptr, %for.body ]
2513  %i.011 = phi i32 [ 0, %entry ], [ %inc1, %for.body ]
2514  %a.010 = phi i32 [ 0, %entry ], [ %a.1, %for.body ]
2515  %j.addr.09 = phi double [ %j, %entry ], [ %0, %for.body ]
2516  %arrayidx = getelementptr inbounds double, double* %b.addr.012, i64 %idxprom
2517  %0 = load double, double* %arrayidx, align 8
2518  %mul = fmul double %j.addr.09, %0
2519  %tobool = fcmp une double %mul, 0.000000e+00
2520  %inc = zext i1 %tobool to i32
2521  %a.1 = add nsw i32 %a.010, %inc
2522  %inc1 = add nuw nsw i32 %i.011, 1
2523  %add.ptr = getelementptr inbounds double, double* %b.addr.012, i64 25
2524  %exitcond = icmp eq i32 %inc1, 10240
2525  br i1 %exitcond, label %for.cond.cleanup, label %for.body
2526}
2527
2528; void sink_after(short *a, int n, int *b) {
2529;   for(int i = 0; i < n; i++)
2530;     b[i] = (a[i] * a[i + 1]);
2531; }
2532;
2533; Check that the sext sank after the load in the vector loop.
2534;
2535define void @sink_after(i16* noalias %a, i32* noalias %b, i64 %n) {
2536; CHECK-LABEL: @sink_after(
2537; CHECK-NEXT:  entry:
2538; CHECK-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
2539; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
2540; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2541; CHECK:       vector.ph:
2542; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[N]], -4
2543; CHECK-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i64 3
2544; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
2545; CHECK:       vector.body:
2546; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2547; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
2548; CHECK-NEXT:    [[TMP0:%.*]] = or i64 [[INDEX]], 1
2549; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP0]]
2550; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16* [[TMP1]] to <4 x i16>*
2551; CHECK-NEXT:    [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP2]], align 2
2552; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2553; CHECK-NEXT:    [[TMP4:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32>
2554; CHECK-NEXT:    [[TMP5:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
2555; CHECK-NEXT:    [[TMP6:%.*]] = mul nsw <4 x i32> [[TMP5]], [[TMP4]]
2556; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
2557; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>*
2558; CHECK-NEXT:    store <4 x i32> [[TMP6]], <4 x i32>* [[TMP8]], align 4
2559; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
2560; CHECK-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2561; CHECK-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
2562; CHECK:       middle.block:
2563; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
2564; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3
2565; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2566; CHECK:       scalar.ph:
2567; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
2568; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
2569; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
2570; CHECK:       for.body:
2571; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP10:%.*]], [[FOR_BODY]] ]
2572; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
2573; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
2574; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
2575; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
2576; CHECK-NEXT:    [[TMP10]] = load i16, i16* [[ARRAYIDX2]], align 2
2577; CHECK-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP10]] to i32
2578; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
2579; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
2580; CHECK-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
2581; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
2582; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
2583; CHECK:       for.end:
2584; CHECK-NEXT:    ret void
2585;
2586; UNROLL-LABEL: @sink_after(
2587; UNROLL-NEXT:  entry:
2588; UNROLL-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
2589; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
2590; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2591; UNROLL:       vector.ph:
2592; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[N]], -8
2593; UNROLL-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i64 3
2594; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
2595; UNROLL:       vector.body:
2596; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2597; UNROLL-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ]
2598; UNROLL-NEXT:    [[TMP0:%.*]] = or i64 [[INDEX]], 1
2599; UNROLL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP0]]
2600; UNROLL-NEXT:    [[TMP2:%.*]] = bitcast i16* [[TMP1]] to <4 x i16>*
2601; UNROLL-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP2]], align 2
2602; UNROLL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i64 4
2603; UNROLL-NEXT:    [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <4 x i16>*
2604; UNROLL-NEXT:    [[WIDE_LOAD1]] = load <4 x i16>, <4 x i16>* [[TMP4]], align 2
2605; UNROLL-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2606; UNROLL-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2607; UNROLL-NEXT:    [[TMP7:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32>
2608; UNROLL-NEXT:    [[TMP8:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32>
2609; UNROLL-NEXT:    [[TMP9:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
2610; UNROLL-NEXT:    [[TMP10:%.*]] = sext <4 x i16> [[WIDE_LOAD1]] to <4 x i32>
2611; UNROLL-NEXT:    [[TMP11:%.*]] = mul nsw <4 x i32> [[TMP9]], [[TMP7]]
2612; UNROLL-NEXT:    [[TMP12:%.*]] = mul nsw <4 x i32> [[TMP10]], [[TMP8]]
2613; UNROLL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
2614; UNROLL-NEXT:    [[TMP14:%.*]] = bitcast i32* [[TMP13]] to <4 x i32>*
2615; UNROLL-NEXT:    store <4 x i32> [[TMP11]], <4 x i32>* [[TMP14]], align 4
2616; UNROLL-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i64 4
2617; UNROLL-NEXT:    [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>*
2618; UNROLL-NEXT:    store <4 x i32> [[TMP12]], <4 x i32>* [[TMP16]], align 4
2619; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
2620; UNROLL-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2621; UNROLL-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
2622; UNROLL:       middle.block:
2623; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
2624; UNROLL-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i64 3
2625; UNROLL-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2626; UNROLL:       scalar.ph:
2627; UNROLL-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
2628; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
2629; UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
2630; UNROLL:       for.body:
2631; UNROLL-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP18:%.*]], [[FOR_BODY]] ]
2632; UNROLL-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
2633; UNROLL-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
2634; UNROLL-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
2635; UNROLL-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
2636; UNROLL-NEXT:    [[TMP18]] = load i16, i16* [[ARRAYIDX2]], align 2
2637; UNROLL-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP18]] to i32
2638; UNROLL-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
2639; UNROLL-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
2640; UNROLL-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
2641; UNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
2642; UNROLL-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
2643; UNROLL:       for.end:
2644; UNROLL-NEXT:    ret void
2645;
2646; UNROLL-NO-IC-LABEL: @sink_after(
2647; UNROLL-NO-IC-NEXT:  entry:
2648; UNROLL-NO-IC-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
2649; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
2650; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2651; UNROLL-NO-IC:       vector.ph:
2652; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
2653; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
2654; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
2655; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
2656; UNROLL-NO-IC:       vector.body:
2657; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2658; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ]
2659; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
2660; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
2661; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP0]], 1
2662; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP1]], 1
2663; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP2]]
2664; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP3]]
2665; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 0
2666; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <4 x i16>*
2667; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP7]], align 2
2668; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 4
2669; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP8]] to <4 x i16>*
2670; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD1]] = load <4 x i16>, <4 x i16>* [[TMP9]], align 2
2671; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2672; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2673; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = sext <4 x i16> [[TMP10]] to <4 x i32>
2674; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = sext <4 x i16> [[TMP11]] to <4 x i32>
2675; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
2676; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = sext <4 x i16> [[WIDE_LOAD1]] to <4 x i32>
2677; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = mul nsw <4 x i32> [[TMP14]], [[TMP12]]
2678; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = mul nsw <4 x i32> [[TMP15]], [[TMP13]]
2679; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
2680; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]]
2681; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[TMP18]], i32 0
2682; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <4 x i32>*
2683; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP16]], <4 x i32>* [[TMP21]], align 4
2684; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[TMP18]], i32 4
2685; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = bitcast i32* [[TMP22]] to <4 x i32>*
2686; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP17]], <4 x i32>* [[TMP23]], align 4
2687; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
2688; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2689; UNROLL-NO-IC-NEXT:    br i1 [[TMP24]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
2690; UNROLL-NO-IC:       middle.block:
2691; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
2692; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 3
2693; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 2
2694; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2695; UNROLL-NO-IC:       scalar.ph:
2696; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
2697; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2698; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
2699; UNROLL-NO-IC:       for.body:
2700; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP25:%.*]], [[FOR_BODY]] ]
2701; UNROLL-NO-IC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
2702; UNROLL-NO-IC-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
2703; UNROLL-NO-IC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
2704; UNROLL-NO-IC-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
2705; UNROLL-NO-IC-NEXT:    [[TMP25]] = load i16, i16* [[ARRAYIDX2]], align 2
2706; UNROLL-NO-IC-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP25]] to i32
2707; UNROLL-NO-IC-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
2708; UNROLL-NO-IC-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
2709; UNROLL-NO-IC-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
2710; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
2711; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
2712; UNROLL-NO-IC:       for.end:
2713; UNROLL-NO-IC-NEXT:    ret void
2714;
2715; UNROLL-NO-VF-LABEL: @sink_after(
2716; UNROLL-NO-VF-NEXT:  entry:
2717; UNROLL-NO-VF-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
2718; UNROLL-NO-VF-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
2719; UNROLL-NO-VF-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2720; UNROLL-NO-VF:       vector.ph:
2721; UNROLL-NO-VF-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
2722; UNROLL-NO-VF-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
2723; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
2724; UNROLL-NO-VF:       vector.body:
2725; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2726; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
2727; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
2728; UNROLL-NO-VF-NEXT:    [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1
2729; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add nuw nsw i64 [[INDUCTION]], 1
2730; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[INDUCTION1]], 1
2731; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP0]]
2732; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP1]]
2733; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 2
2734; UNROLL-NO-VF-NEXT:    [[TMP5]] = load i16, i16* [[TMP3]], align 2
2735; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = sext i16 [[VECTOR_RECUR]] to i32
2736; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = sext i16 [[TMP4]] to i32
2737; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = sext i16 [[TMP4]] to i32
2738; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = sext i16 [[TMP5]] to i32
2739; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = mul nsw i32 [[TMP8]], [[TMP6]]
2740; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = mul nsw i32 [[TMP9]], [[TMP7]]
2741; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDUCTION]]
2742; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION1]]
2743; UNROLL-NO-VF-NEXT:    store i32 [[TMP10]], i32* [[TMP12]], align 4
2744; UNROLL-NO-VF-NEXT:    store i32 [[TMP11]], i32* [[TMP13]], align 4
2745; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
2746; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2747; UNROLL-NO-VF-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP15:![0-9]+]]
2748; UNROLL-NO-VF:       middle.block:
2749; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
2750; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2751; UNROLL-NO-VF:       scalar.ph:
2752; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
2753; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2754; UNROLL-NO-VF-NEXT:    br label [[FOR_BODY:%.*]]
2755; UNROLL-NO-VF:       for.body:
2756; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[FOR_BODY]] ]
2757; UNROLL-NO-VF-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
2758; UNROLL-NO-VF-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
2759; UNROLL-NO-VF-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
2760; UNROLL-NO-VF-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
2761; UNROLL-NO-VF-NEXT:    [[TMP15]] = load i16, i16* [[ARRAYIDX2]], align 2
2762; UNROLL-NO-VF-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP15]] to i32
2763; UNROLL-NO-VF-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
2764; UNROLL-NO-VF-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
2765; UNROLL-NO-VF-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
2766; UNROLL-NO-VF-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
2767; UNROLL-NO-VF-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
2768; UNROLL-NO-VF:       for.end:
2769; UNROLL-NO-VF-NEXT:    ret void
2770;
2771; SINK-AFTER-LABEL: @sink_after(
2772; SINK-AFTER-NEXT:  entry:
2773; SINK-AFTER-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
2774; SINK-AFTER-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
2775; SINK-AFTER-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2776; SINK-AFTER:       vector.ph:
2777; SINK-AFTER-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
2778; SINK-AFTER-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
2779; SINK-AFTER-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
2780; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
2781; SINK-AFTER:       vector.body:
2782; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2783; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
2784; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
2785; SINK-AFTER-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
2786; SINK-AFTER-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP1]]
2787; SINK-AFTER-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[TMP2]], i32 0
2788; SINK-AFTER-NEXT:    [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <4 x i16>*
2789; SINK-AFTER-NEXT:    [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP4]], align 2
2790; SINK-AFTER-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2791; SINK-AFTER-NEXT:    [[TMP6:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32>
2792; SINK-AFTER-NEXT:    [[TMP7:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
2793; SINK-AFTER-NEXT:    [[TMP8:%.*]] = mul nsw <4 x i32> [[TMP7]], [[TMP6]]
2794; SINK-AFTER-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
2795; SINK-AFTER-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[TMP9]], i32 0
2796; SINK-AFTER-NEXT:    [[TMP11:%.*]] = bitcast i32* [[TMP10]] to <4 x i32>*
2797; SINK-AFTER-NEXT:    store <4 x i32> [[TMP8]], <4 x i32>* [[TMP11]], align 4
2798; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
2799; SINK-AFTER-NEXT:    [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2800; SINK-AFTER-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP16:![0-9]+]]
2801; SINK-AFTER:       middle.block:
2802; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
2803; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
2804; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
2805; SINK-AFTER-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2806; SINK-AFTER:       scalar.ph:
2807; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
2808; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
2809; SINK-AFTER-NEXT:    br label [[FOR_BODY:%.*]]
2810; SINK-AFTER:       for.body:
2811; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP13:%.*]], [[FOR_BODY]] ]
2812; SINK-AFTER-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
2813; SINK-AFTER-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
2814; SINK-AFTER-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
2815; SINK-AFTER-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
2816; SINK-AFTER-NEXT:    [[TMP13]] = load i16, i16* [[ARRAYIDX2]], align 2
2817; SINK-AFTER-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP13]] to i32
2818; SINK-AFTER-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
2819; SINK-AFTER-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
2820; SINK-AFTER-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
2821; SINK-AFTER-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
2822; SINK-AFTER-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
2823; SINK-AFTER:       for.end:
2824; SINK-AFTER-NEXT:    ret void
2825;
2826entry:
2827  %.pre = load i16, i16* %a
2828  br label %for.body
2829
2830for.body:
2831  %0 = phi i16 [ %.pre, %entry ], [ %1, %for.body ]
2832  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
2833  %conv = sext i16 %0 to i32
2834  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
2835  %arrayidx2 = getelementptr inbounds i16, i16* %a, i64 %indvars.iv.next
2836  %1 = load i16, i16* %arrayidx2
2837  %conv3 = sext i16 %1 to i32
2838  %mul = mul nsw i32 %conv3, %conv
2839  %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
2840  store i32 %mul, i32* %arrayidx5
2841  %exitcond = icmp eq i64 %indvars.iv.next, %n
2842  br i1 %exitcond, label %for.end, label %for.body
2843
2844for.end:
2845  ret void
2846}
2847
2848; PR34711: given three consecutive instructions such that the first will be
2849; widened, the second is a cast that will be widened and needs to sink after the
2850; third, and the third is a first-order-recurring load that will be replicated
2851; instead of widened. Although the cast and the first instruction will both be
2852; widened, and are originally adjacent to each other, make sure the replicated
2853; load ends up appearing between them.
2854;
2855; void PR34711(short[2] *a, int *b, int *c, int n) {
2856;   for(int i = 0; i < n; i++) {
2857;     c[i] = 7;
2858;     b[i] = (a[i][0] * a[i][1]);
2859;   }
2860; }
2861;
2862; Check that the sext sank after the load in the vector loop.
2863;
2864define void @PR34711([2 x i16]* noalias %a, i32* noalias %b, i32* noalias %c, i64 %n) {
2865; CHECK-LABEL: @PR34711(
2866; CHECK-NEXT:  entry:
2867; CHECK-NEXT:    [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0
2868; CHECK-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2
2869; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
2870; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2871; CHECK:       vector.ph:
2872; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[N]], -4
2873; CHECK-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i64 3
2874; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
2875; CHECK:       vector.body:
2876; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2877; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ]
2878; CHECK-NEXT:    [[TMP0:%.*]] = or i64 [[INDEX]], 1
2879; CHECK-NEXT:    [[TMP1:%.*]] = or i64 [[INDEX]], 2
2880; CHECK-NEXT:    [[TMP2:%.*]] = or i64 [[INDEX]], 3
2881; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[INDEX]]
2882; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDEX]], i64 1
2883; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP0]], i64 1
2884; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP1]], i64 1
2885; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP2]], i64 1
2886; CHECK-NEXT:    [[TMP8:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
2887; CHECK-NEXT:    store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, <4 x i32>* [[TMP8]], align 4
2888; CHECK-NEXT:    [[TMP9:%.*]] = load i16, i16* [[TMP4]], align 2
2889; CHECK-NEXT:    [[TMP10:%.*]] = load i16, i16* [[TMP5]], align 2
2890; CHECK-NEXT:    [[TMP11:%.*]] = load i16, i16* [[TMP6]], align 2
2891; CHECK-NEXT:    [[TMP12:%.*]] = load i16, i16* [[TMP7]], align 2
2892; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <4 x i16> poison, i16 [[TMP9]], i64 0
2893; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i16> [[TMP13]], i16 [[TMP10]], i64 1
2894; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <4 x i16> [[TMP14]], i16 [[TMP11]], i64 2
2895; CHECK-NEXT:    [[TMP16]] = insertelement <4 x i16> [[TMP15]], i16 [[TMP12]], i64 3
2896; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP15]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2897; CHECK-NEXT:    [[TMP18:%.*]] = sext <4 x i16> [[TMP17]] to <4 x i32>
2898; CHECK-NEXT:    [[TMP19:%.*]] = sext <4 x i16> [[TMP16]] to <4 x i32>
2899; CHECK-NEXT:    [[TMP20:%.*]] = mul nsw <4 x i32> [[TMP19]], [[TMP18]]
2900; CHECK-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
2901; CHECK-NEXT:    [[TMP22:%.*]] = bitcast i32* [[TMP21]] to <4 x i32>*
2902; CHECK-NEXT:    store <4 x i32> [[TMP20]], <4 x i32>* [[TMP22]], align 4
2903; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
2904; CHECK-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2905; CHECK-NEXT:    br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
2906; CHECK:       middle.block:
2907; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
2908; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
2909; CHECK:       scalar.ph:
2910; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ]
2911; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
2912; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
2913; CHECK:       for.body:
2914; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP24:%.*]], [[FOR_BODY]] ]
2915; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
2916; CHECK-NEXT:    [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]]
2917; CHECK-NEXT:    [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1
2918; CHECK-NEXT:    store i32 7, i32* [[ARRAYCIDX]], align 4
2919; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
2920; CHECK-NEXT:    [[TMP24]] = load i16, i16* [[CUR_INDEX]], align 2
2921; CHECK-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP24]] to i32
2922; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
2923; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
2924; CHECK-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
2925; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
2926; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
2927; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
2928; CHECK:       for.end:
2929; CHECK-NEXT:    ret void
2930;
2931; UNROLL-LABEL: @PR34711(
2932; UNROLL-NEXT:  entry:
2933; UNROLL-NEXT:    [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0
2934; UNROLL-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2
2935; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
2936; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
2937; UNROLL:       vector.ph:
2938; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[N]], -8
2939; UNROLL-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i64 3
2940; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
2941; UNROLL:       vector.body:
2942; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
2943; UNROLL-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP34:%.*]], [[VECTOR_BODY]] ]
2944; UNROLL-NEXT:    [[TMP0:%.*]] = or i64 [[INDEX]], 1
2945; UNROLL-NEXT:    [[TMP1:%.*]] = or i64 [[INDEX]], 2
2946; UNROLL-NEXT:    [[TMP2:%.*]] = or i64 [[INDEX]], 3
2947; UNROLL-NEXT:    [[TMP3:%.*]] = or i64 [[INDEX]], 4
2948; UNROLL-NEXT:    [[TMP4:%.*]] = or i64 [[INDEX]], 5
2949; UNROLL-NEXT:    [[TMP5:%.*]] = or i64 [[INDEX]], 6
2950; UNROLL-NEXT:    [[TMP6:%.*]] = or i64 [[INDEX]], 7
2951; UNROLL-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[INDEX]]
2952; UNROLL-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDEX]], i64 1
2953; UNROLL-NEXT:    [[TMP9:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP0]], i64 1
2954; UNROLL-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP1]], i64 1
2955; UNROLL-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP2]], i64 1
2956; UNROLL-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP3]], i64 1
2957; UNROLL-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP4]], i64 1
2958; UNROLL-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP5]], i64 1
2959; UNROLL-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP6]], i64 1
2960; UNROLL-NEXT:    [[TMP16:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>*
2961; UNROLL-NEXT:    store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, <4 x i32>* [[TMP16]], align 4
2962; UNROLL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP7]], i64 4
2963; UNROLL-NEXT:    [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <4 x i32>*
2964; UNROLL-NEXT:    store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, <4 x i32>* [[TMP18]], align 4
2965; UNROLL-NEXT:    [[TMP19:%.*]] = load i16, i16* [[TMP8]], align 2
2966; UNROLL-NEXT:    [[TMP20:%.*]] = load i16, i16* [[TMP9]], align 2
2967; UNROLL-NEXT:    [[TMP21:%.*]] = load i16, i16* [[TMP10]], align 2
2968; UNROLL-NEXT:    [[TMP22:%.*]] = load i16, i16* [[TMP11]], align 2
2969; UNROLL-NEXT:    [[TMP23:%.*]] = insertelement <4 x i16> poison, i16 [[TMP19]], i64 0
2970; UNROLL-NEXT:    [[TMP24:%.*]] = insertelement <4 x i16> [[TMP23]], i16 [[TMP20]], i64 1
2971; UNROLL-NEXT:    [[TMP25:%.*]] = insertelement <4 x i16> [[TMP24]], i16 [[TMP21]], i64 2
2972; UNROLL-NEXT:    [[TMP26:%.*]] = insertelement <4 x i16> [[TMP25]], i16 [[TMP22]], i64 3
2973; UNROLL-NEXT:    [[TMP27:%.*]] = load i16, i16* [[TMP12]], align 2
2974; UNROLL-NEXT:    [[TMP28:%.*]] = load i16, i16* [[TMP13]], align 2
2975; UNROLL-NEXT:    [[TMP29:%.*]] = load i16, i16* [[TMP14]], align 2
2976; UNROLL-NEXT:    [[TMP30:%.*]] = load i16, i16* [[TMP15]], align 2
2977; UNROLL-NEXT:    [[TMP31:%.*]] = insertelement <4 x i16> poison, i16 [[TMP27]], i64 0
2978; UNROLL-NEXT:    [[TMP32:%.*]] = insertelement <4 x i16> [[TMP31]], i16 [[TMP28]], i64 1
2979; UNROLL-NEXT:    [[TMP33:%.*]] = insertelement <4 x i16> [[TMP32]], i16 [[TMP29]], i64 2
2980; UNROLL-NEXT:    [[TMP34]] = insertelement <4 x i16> [[TMP33]], i16 [[TMP30]], i64 3
2981; UNROLL-NEXT:    [[TMP35:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP25]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2982; UNROLL-NEXT:    [[TMP36:%.*]] = shufflevector <4 x i16> [[TMP26]], <4 x i16> [[TMP33]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
2983; UNROLL-NEXT:    [[TMP37:%.*]] = sext <4 x i16> [[TMP35]] to <4 x i32>
2984; UNROLL-NEXT:    [[TMP38:%.*]] = sext <4 x i16> [[TMP36]] to <4 x i32>
2985; UNROLL-NEXT:    [[TMP39:%.*]] = sext <4 x i16> [[TMP26]] to <4 x i32>
2986; UNROLL-NEXT:    [[TMP40:%.*]] = sext <4 x i16> [[TMP34]] to <4 x i32>
2987; UNROLL-NEXT:    [[TMP41:%.*]] = mul nsw <4 x i32> [[TMP39]], [[TMP37]]
2988; UNROLL-NEXT:    [[TMP42:%.*]] = mul nsw <4 x i32> [[TMP40]], [[TMP38]]
2989; UNROLL-NEXT:    [[TMP43:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
2990; UNROLL-NEXT:    [[TMP44:%.*]] = bitcast i32* [[TMP43]] to <4 x i32>*
2991; UNROLL-NEXT:    store <4 x i32> [[TMP41]], <4 x i32>* [[TMP44]], align 4
2992; UNROLL-NEXT:    [[TMP45:%.*]] = getelementptr inbounds i32, i32* [[TMP43]], i64 4
2993; UNROLL-NEXT:    [[TMP46:%.*]] = bitcast i32* [[TMP45]] to <4 x i32>*
2994; UNROLL-NEXT:    store <4 x i32> [[TMP42]], <4 x i32>* [[TMP46]], align 4
2995; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
2996; UNROLL-NEXT:    [[TMP47:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
2997; UNROLL-NEXT:    br i1 [[TMP47]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
2998; UNROLL:       middle.block:
2999; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
3000; UNROLL-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
3001; UNROLL:       scalar.ph:
3002; UNROLL-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP30]], [[MIDDLE_BLOCK]] ]
3003; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
3004; UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
3005; UNROLL:       for.body:
3006; UNROLL-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP48:%.*]], [[FOR_BODY]] ]
3007; UNROLL-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
3008; UNROLL-NEXT:    [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]]
3009; UNROLL-NEXT:    [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1
3010; UNROLL-NEXT:    store i32 7, i32* [[ARRAYCIDX]], align 4
3011; UNROLL-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
3012; UNROLL-NEXT:    [[TMP48]] = load i16, i16* [[CUR_INDEX]], align 2
3013; UNROLL-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP48]] to i32
3014; UNROLL-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
3015; UNROLL-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
3016; UNROLL-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
3017; UNROLL-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
3018; UNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
3019; UNROLL-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
3020; UNROLL:       for.end:
3021; UNROLL-NEXT:    ret void
3022;
3023; UNROLL-NO-IC-LABEL: @PR34711(
3024; UNROLL-NO-IC-NEXT:  entry:
3025; UNROLL-NO-IC-NEXT:    [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0
3026; UNROLL-NO-IC-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2
3027; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
3028; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3029; UNROLL-NO-IC:       vector.ph:
3030; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
3031; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
3032; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
3033; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
3034; UNROLL-NO-IC:       vector.body:
3035; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3036; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP37:%.*]], [[VECTOR_BODY]] ]
3037; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
3038; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
3039; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
3040; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
3041; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add i64 [[INDEX]], 4
3042; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = add i64 [[INDEX]], 5
3043; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = add i64 [[INDEX]], 6
3044; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = add i64 [[INDEX]], 7
3045; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[TMP0]]
3046; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[TMP4]]
3047; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP0]], i64 1
3048; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP1]], i64 1
3049; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP2]], i64 1
3050; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP3]], i64 1
3051; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP4]], i64 1
3052; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP5]], i64 1
3053; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP6]], i64 1
3054; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP7]], i64 1
3055; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 0
3056; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = bitcast i32* [[TMP18]] to <4 x i32>*
3057; UNROLL-NO-IC-NEXT:    store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, <4 x i32>* [[TMP19]], align 4
3058; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[TMP8]], i32 4
3059; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = bitcast i32* [[TMP20]] to <4 x i32>*
3060; UNROLL-NO-IC-NEXT:    store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, <4 x i32>* [[TMP21]], align 4
3061; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = load i16, i16* [[TMP10]], align 2
3062; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = load i16, i16* [[TMP11]], align 2
3063; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = load i16, i16* [[TMP12]], align 2
3064; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = load i16, i16* [[TMP13]], align 2
3065; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = insertelement <4 x i16> poison, i16 [[TMP22]], i32 0
3066; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = insertelement <4 x i16> [[TMP26]], i16 [[TMP23]], i32 1
3067; UNROLL-NO-IC-NEXT:    [[TMP28:%.*]] = insertelement <4 x i16> [[TMP27]], i16 [[TMP24]], i32 2
3068; UNROLL-NO-IC-NEXT:    [[TMP29:%.*]] = insertelement <4 x i16> [[TMP28]], i16 [[TMP25]], i32 3
3069; UNROLL-NO-IC-NEXT:    [[TMP30:%.*]] = load i16, i16* [[TMP14]], align 2
3070; UNROLL-NO-IC-NEXT:    [[TMP31:%.*]] = load i16, i16* [[TMP15]], align 2
3071; UNROLL-NO-IC-NEXT:    [[TMP32:%.*]] = load i16, i16* [[TMP16]], align 2
3072; UNROLL-NO-IC-NEXT:    [[TMP33:%.*]] = load i16, i16* [[TMP17]], align 2
3073; UNROLL-NO-IC-NEXT:    [[TMP34:%.*]] = insertelement <4 x i16> poison, i16 [[TMP30]], i32 0
3074; UNROLL-NO-IC-NEXT:    [[TMP35:%.*]] = insertelement <4 x i16> [[TMP34]], i16 [[TMP31]], i32 1
3075; UNROLL-NO-IC-NEXT:    [[TMP36:%.*]] = insertelement <4 x i16> [[TMP35]], i16 [[TMP32]], i32 2
3076; UNROLL-NO-IC-NEXT:    [[TMP37]] = insertelement <4 x i16> [[TMP36]], i16 [[TMP33]], i32 3
3077; UNROLL-NO-IC-NEXT:    [[TMP38:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP29]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3078; UNROLL-NO-IC-NEXT:    [[TMP39:%.*]] = shufflevector <4 x i16> [[TMP29]], <4 x i16> [[TMP37]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3079; UNROLL-NO-IC-NEXT:    [[TMP40:%.*]] = sext <4 x i16> [[TMP38]] to <4 x i32>
3080; UNROLL-NO-IC-NEXT:    [[TMP41:%.*]] = sext <4 x i16> [[TMP39]] to <4 x i32>
3081; UNROLL-NO-IC-NEXT:    [[TMP42:%.*]] = sext <4 x i16> [[TMP29]] to <4 x i32>
3082; UNROLL-NO-IC-NEXT:    [[TMP43:%.*]] = sext <4 x i16> [[TMP37]] to <4 x i32>
3083; UNROLL-NO-IC-NEXT:    [[TMP44:%.*]] = mul nsw <4 x i32> [[TMP42]], [[TMP40]]
3084; UNROLL-NO-IC-NEXT:    [[TMP45:%.*]] = mul nsw <4 x i32> [[TMP43]], [[TMP41]]
3085; UNROLL-NO-IC-NEXT:    [[TMP46:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
3086; UNROLL-NO-IC-NEXT:    [[TMP47:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP4]]
3087; UNROLL-NO-IC-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[TMP46]], i32 0
3088; UNROLL-NO-IC-NEXT:    [[TMP49:%.*]] = bitcast i32* [[TMP48]] to <4 x i32>*
3089; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP44]], <4 x i32>* [[TMP49]], align 4
3090; UNROLL-NO-IC-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i32, i32* [[TMP46]], i32 4
3091; UNROLL-NO-IC-NEXT:    [[TMP51:%.*]] = bitcast i32* [[TMP50]] to <4 x i32>*
3092; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP45]], <4 x i32>* [[TMP51]], align 4
3093; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
3094; UNROLL-NO-IC-NEXT:    [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3095; UNROLL-NO-IC-NEXT:    br i1 [[TMP52]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
3096; UNROLL-NO-IC:       middle.block:
3097; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
3098; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP37]], i32 3
3099; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP37]], i32 2
3100; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
3101; UNROLL-NO-IC:       scalar.ph:
3102; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
3103; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
3104; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
3105; UNROLL-NO-IC:       for.body:
3106; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP53:%.*]], [[FOR_BODY]] ]
3107; UNROLL-NO-IC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
3108; UNROLL-NO-IC-NEXT:    [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]]
3109; UNROLL-NO-IC-NEXT:    [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1
3110; UNROLL-NO-IC-NEXT:    store i32 7, i32* [[ARRAYCIDX]], align 4
3111; UNROLL-NO-IC-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
3112; UNROLL-NO-IC-NEXT:    [[TMP53]] = load i16, i16* [[CUR_INDEX]], align 2
3113; UNROLL-NO-IC-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP53]] to i32
3114; UNROLL-NO-IC-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
3115; UNROLL-NO-IC-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
3116; UNROLL-NO-IC-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
3117; UNROLL-NO-IC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
3118; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
3119; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
3120; UNROLL-NO-IC:       for.end:
3121; UNROLL-NO-IC-NEXT:    ret void
3122;
3123; UNROLL-NO-VF-LABEL: @PR34711(
3124; UNROLL-NO-VF-NEXT:  entry:
3125; UNROLL-NO-VF-NEXT:    [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0
3126; UNROLL-NO-VF-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2
3127; UNROLL-NO-VF-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
3128; UNROLL-NO-VF-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3129; UNROLL-NO-VF:       vector.ph:
3130; UNROLL-NO-VF-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
3131; UNROLL-NO-VF-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
3132; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
3133; UNROLL-NO-VF:       vector.body:
3134; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3135; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
3136; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
3137; UNROLL-NO-VF-NEXT:    [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1
3138; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[INDUCTION]]
3139; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDUCTION1]]
3140; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDUCTION]], i64 1
3141; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDUCTION1]], i64 1
3142; UNROLL-NO-VF-NEXT:    store i32 7, i32* [[TMP0]], align 4
3143; UNROLL-NO-VF-NEXT:    store i32 7, i32* [[TMP1]], align 4
3144; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 2
3145; UNROLL-NO-VF-NEXT:    [[TMP5]] = load i16, i16* [[TMP3]], align 2
3146; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = sext i16 [[VECTOR_RECUR]] to i32
3147; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = sext i16 [[TMP4]] to i32
3148; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = sext i16 [[TMP4]] to i32
3149; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = sext i16 [[TMP5]] to i32
3150; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = mul nsw i32 [[TMP8]], [[TMP6]]
3151; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = mul nsw i32 [[TMP9]], [[TMP7]]
3152; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDUCTION]]
3153; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION1]]
3154; UNROLL-NO-VF-NEXT:    store i32 [[TMP10]], i32* [[TMP12]], align 4
3155; UNROLL-NO-VF-NEXT:    store i32 [[TMP11]], i32* [[TMP13]], align 4
3156; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
3157; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3158; UNROLL-NO-VF-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP17:![0-9]+]]
3159; UNROLL-NO-VF:       middle.block:
3160; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
3161; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
3162; UNROLL-NO-VF:       scalar.ph:
3163; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
3164; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
3165; UNROLL-NO-VF-NEXT:    br label [[FOR_BODY:%.*]]
3166; UNROLL-NO-VF:       for.body:
3167; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP15:%.*]], [[FOR_BODY]] ]
3168; UNROLL-NO-VF-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
3169; UNROLL-NO-VF-NEXT:    [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]]
3170; UNROLL-NO-VF-NEXT:    [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1
3171; UNROLL-NO-VF-NEXT:    store i32 7, i32* [[ARRAYCIDX]], align 4
3172; UNROLL-NO-VF-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
3173; UNROLL-NO-VF-NEXT:    [[TMP15]] = load i16, i16* [[CUR_INDEX]], align 2
3174; UNROLL-NO-VF-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP15]] to i32
3175; UNROLL-NO-VF-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
3176; UNROLL-NO-VF-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
3177; UNROLL-NO-VF-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
3178; UNROLL-NO-VF-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
3179; UNROLL-NO-VF-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
3180; UNROLL-NO-VF-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
3181; UNROLL-NO-VF:       for.end:
3182; UNROLL-NO-VF-NEXT:    ret void
3183;
3184; SINK-AFTER-LABEL: @PR34711(
3185; SINK-AFTER-NEXT:  entry:
3186; SINK-AFTER-NEXT:    [[PRE_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 0
3187; SINK-AFTER-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[PRE_INDEX]], align 2
3188; SINK-AFTER-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
3189; SINK-AFTER-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3190; SINK-AFTER:       vector.ph:
3191; SINK-AFTER-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
3192; SINK-AFTER-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
3193; SINK-AFTER-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
3194; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
3195; SINK-AFTER:       vector.body:
3196; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3197; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ]
3198; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
3199; SINK-AFTER-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 1
3200; SINK-AFTER-NEXT:    [[TMP2:%.*]] = add i64 [[INDEX]], 2
3201; SINK-AFTER-NEXT:    [[TMP3:%.*]] = add i64 [[INDEX]], 3
3202; SINK-AFTER-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[C:%.*]], i64 [[TMP0]]
3203; SINK-AFTER-NEXT:    [[TMP5:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP0]], i64 1
3204; SINK-AFTER-NEXT:    [[TMP6:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP1]], i64 1
3205; SINK-AFTER-NEXT:    [[TMP7:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP2]], i64 1
3206; SINK-AFTER-NEXT:    [[TMP8:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[TMP3]], i64 1
3207; SINK-AFTER-NEXT:    [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[TMP4]], i32 0
3208; SINK-AFTER-NEXT:    [[TMP10:%.*]] = bitcast i32* [[TMP9]] to <4 x i32>*
3209; SINK-AFTER-NEXT:    store <4 x i32> <i32 7, i32 7, i32 7, i32 7>, <4 x i32>* [[TMP10]], align 4
3210; SINK-AFTER-NEXT:    [[TMP11:%.*]] = load i16, i16* [[TMP5]], align 2
3211; SINK-AFTER-NEXT:    [[TMP12:%.*]] = load i16, i16* [[TMP6]], align 2
3212; SINK-AFTER-NEXT:    [[TMP13:%.*]] = load i16, i16* [[TMP7]], align 2
3213; SINK-AFTER-NEXT:    [[TMP14:%.*]] = load i16, i16* [[TMP8]], align 2
3214; SINK-AFTER-NEXT:    [[TMP15:%.*]] = insertelement <4 x i16> poison, i16 [[TMP11]], i32 0
3215; SINK-AFTER-NEXT:    [[TMP16:%.*]] = insertelement <4 x i16> [[TMP15]], i16 [[TMP12]], i32 1
3216; SINK-AFTER-NEXT:    [[TMP17:%.*]] = insertelement <4 x i16> [[TMP16]], i16 [[TMP13]], i32 2
3217; SINK-AFTER-NEXT:    [[TMP18]] = insertelement <4 x i16> [[TMP17]], i16 [[TMP14]], i32 3
3218; SINK-AFTER-NEXT:    [[TMP19:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP18]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3219; SINK-AFTER-NEXT:    [[TMP20:%.*]] = sext <4 x i16> [[TMP19]] to <4 x i32>
3220; SINK-AFTER-NEXT:    [[TMP21:%.*]] = sext <4 x i16> [[TMP18]] to <4 x i32>
3221; SINK-AFTER-NEXT:    [[TMP22:%.*]] = mul nsw <4 x i32> [[TMP21]], [[TMP20]]
3222; SINK-AFTER-NEXT:    [[TMP23:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
3223; SINK-AFTER-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 0
3224; SINK-AFTER-NEXT:    [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <4 x i32>*
3225; SINK-AFTER-NEXT:    store <4 x i32> [[TMP22]], <4 x i32>* [[TMP25]], align 4
3226; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
3227; SINK-AFTER-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3228; SINK-AFTER-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP18:![0-9]+]]
3229; SINK-AFTER:       middle.block:
3230; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
3231; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP18]], i32 3
3232; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP18]], i32 2
3233; SINK-AFTER-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
3234; SINK-AFTER:       scalar.ph:
3235; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
3236; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
3237; SINK-AFTER-NEXT:    br label [[FOR_BODY:%.*]]
3238; SINK-AFTER:       for.body:
3239; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP27:%.*]], [[FOR_BODY]] ]
3240; SINK-AFTER-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
3241; SINK-AFTER-NEXT:    [[ARRAYCIDX:%.*]] = getelementptr inbounds i32, i32* [[C]], i64 [[INDVARS_IV]]
3242; SINK-AFTER-NEXT:    [[CUR_INDEX:%.*]] = getelementptr inbounds [2 x i16], [2 x i16]* [[A]], i64 [[INDVARS_IV]], i64 1
3243; SINK-AFTER-NEXT:    store i32 7, i32* [[ARRAYCIDX]], align 4
3244; SINK-AFTER-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
3245; SINK-AFTER-NEXT:    [[TMP27]] = load i16, i16* [[CUR_INDEX]], align 2
3246; SINK-AFTER-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP27]] to i32
3247; SINK-AFTER-NEXT:    [[MUL:%.*]] = mul nsw i32 [[CONV3]], [[CONV]]
3248; SINK-AFTER-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
3249; SINK-AFTER-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
3250; SINK-AFTER-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
3251; SINK-AFTER-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
3252; SINK-AFTER-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
3253; SINK-AFTER:       for.end:
3254; SINK-AFTER-NEXT:    ret void
3255;
3256
3257entry:
3258  %pre.index = getelementptr inbounds [2 x i16], [2 x i16]* %a, i64 0, i64 0
3259  %.pre = load i16, i16* %pre.index
3260  br label %for.body
3261
3262for.body:
3263  %0 = phi i16 [ %.pre, %entry ], [ %1, %for.body ]
3264  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
3265  %arraycidx = getelementptr inbounds i32, i32* %c, i64 %indvars.iv
3266  %cur.index = getelementptr inbounds [2 x i16], [2 x i16]* %a, i64 %indvars.iv, i64 1
3267  store i32 7, i32* %arraycidx   ; 1st instruction, to be widened.
3268  %conv = sext i16 %0 to i32     ; 2nd, cast to sink after third.
3269  %1 = load i16, i16* %cur.index ; 3rd, first-order-recurring load not widened.
3270  %conv3 = sext i16 %1 to i32
3271  %mul = mul nsw i32 %conv3, %conv
3272  %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
3273  store i32 %mul, i32* %arrayidx5
3274  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
3275  %exitcond = icmp eq i64 %indvars.iv.next, %n
3276  br i1 %exitcond, label %for.end, label %for.body
3277
3278for.end:
3279  ret void
3280}
3281
3282; void no_sink_after(short *a, int n, int *b) {
3283;   for(int i = 0; i < n; i++)
3284;     b[i] = ((a[i] + 2) * a[i + 1]);
3285; }
3286;
3287
3288;
3289define void @sink_after_with_multiple_users(i16* noalias %a, i32* noalias %b, i64 %n) {
3290; CHECK-LABEL: @sink_after_with_multiple_users(
3291; CHECK-NEXT:  entry:
3292; CHECK-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
3293; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
3294; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3295; CHECK:       vector.ph:
3296; CHECK-NEXT:    [[N_VEC:%.*]] = and i64 [[N]], -4
3297; CHECK-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i64 3
3298; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
3299; CHECK:       vector.body:
3300; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3301; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
3302; CHECK-NEXT:    [[TMP0:%.*]] = or i64 [[INDEX]], 1
3303; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP0]]
3304; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i16* [[TMP1]] to <4 x i16>*
3305; CHECK-NEXT:    [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP2]], align 2
3306; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3307; CHECK-NEXT:    [[TMP4:%.*]] = sext <4 x i16> [[TMP3]] to <4 x i32>
3308; CHECK-NEXT:    [[TMP5:%.*]] = add nsw <4 x i32> [[TMP4]], <i32 2, i32 2, i32 2, i32 2>
3309; CHECK-NEXT:    [[TMP6:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
3310; CHECK-NEXT:    [[TMP7:%.*]] = mul nsw <4 x i32> [[TMP5]], [[TMP6]]
3311; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
3312; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i32* [[TMP8]] to <4 x i32>*
3313; CHECK-NEXT:    store <4 x i32> [[TMP7]], <4 x i32>* [[TMP9]], align 4
3314; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
3315; CHECK-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3316; CHECK-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
3317; CHECK:       middle.block:
3318; CHECK-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
3319; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i64 3
3320; CHECK-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
3321; CHECK:       scalar.ph:
3322; CHECK-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
3323; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
3324; CHECK-NEXT:    br label [[FOR_BODY:%.*]]
3325; CHECK:       for.body:
3326; CHECK-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP11:%.*]], [[FOR_BODY]] ]
3327; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
3328; CHECK-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
3329; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 2
3330; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
3331; CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
3332; CHECK-NEXT:    [[TMP11]] = load i16, i16* [[ARRAYIDX2]], align 2
3333; CHECK-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP11]] to i32
3334; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]]
3335; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
3336; CHECK-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
3337; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
3338; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
3339; CHECK:       for.end:
3340; CHECK-NEXT:    ret void
3341;
3342; UNROLL-LABEL: @sink_after_with_multiple_users(
3343; UNROLL-NEXT:  entry:
3344; UNROLL-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
3345; UNROLL-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
3346; UNROLL-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3347; UNROLL:       vector.ph:
3348; UNROLL-NEXT:    [[N_VEC:%.*]] = and i64 [[N]], -8
3349; UNROLL-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i64 3
3350; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
3351; UNROLL:       vector.body:
3352; UNROLL-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3353; UNROLL-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ]
3354; UNROLL-NEXT:    [[TMP0:%.*]] = or i64 [[INDEX]], 1
3355; UNROLL-NEXT:    [[TMP1:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP0]]
3356; UNROLL-NEXT:    [[TMP2:%.*]] = bitcast i16* [[TMP1]] to <4 x i16>*
3357; UNROLL-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP2]], align 2
3358; UNROLL-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[TMP1]], i64 4
3359; UNROLL-NEXT:    [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <4 x i16>*
3360; UNROLL-NEXT:    [[WIDE_LOAD1]] = load <4 x i16>, <4 x i16>* [[TMP4]], align 2
3361; UNROLL-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3362; UNROLL-NEXT:    [[TMP6:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3363; UNROLL-NEXT:    [[TMP7:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32>
3364; UNROLL-NEXT:    [[TMP8:%.*]] = sext <4 x i16> [[TMP6]] to <4 x i32>
3365; UNROLL-NEXT:    [[TMP9:%.*]] = add nsw <4 x i32> [[TMP7]], <i32 2, i32 2, i32 2, i32 2>
3366; UNROLL-NEXT:    [[TMP10:%.*]] = add nsw <4 x i32> [[TMP8]], <i32 2, i32 2, i32 2, i32 2>
3367; UNROLL-NEXT:    [[TMP11:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
3368; UNROLL-NEXT:    [[TMP12:%.*]] = sext <4 x i16> [[WIDE_LOAD1]] to <4 x i32>
3369; UNROLL-NEXT:    [[TMP13:%.*]] = mul nsw <4 x i32> [[TMP9]], [[TMP11]]
3370; UNROLL-NEXT:    [[TMP14:%.*]] = mul nsw <4 x i32> [[TMP10]], [[TMP12]]
3371; UNROLL-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]]
3372; UNROLL-NEXT:    [[TMP16:%.*]] = bitcast i32* [[TMP15]] to <4 x i32>*
3373; UNROLL-NEXT:    store <4 x i32> [[TMP13]], <4 x i32>* [[TMP16]], align 4
3374; UNROLL-NEXT:    [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[TMP15]], i64 4
3375; UNROLL-NEXT:    [[TMP18:%.*]] = bitcast i32* [[TMP17]] to <4 x i32>*
3376; UNROLL-NEXT:    store <4 x i32> [[TMP14]], <4 x i32>* [[TMP18]], align 4
3377; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
3378; UNROLL-NEXT:    [[TMP19:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3379; UNROLL-NEXT:    br i1 [[TMP19]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
3380; UNROLL:       middle.block:
3381; UNROLL-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]]
3382; UNROLL-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i64 3
3383; UNROLL-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
3384; UNROLL:       scalar.ph:
3385; UNROLL-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
3386; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ]
3387; UNROLL-NEXT:    br label [[FOR_BODY:%.*]]
3388; UNROLL:       for.body:
3389; UNROLL-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP20:%.*]], [[FOR_BODY]] ]
3390; UNROLL-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
3391; UNROLL-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
3392; UNROLL-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 2
3393; UNROLL-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
3394; UNROLL-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
3395; UNROLL-NEXT:    [[TMP20]] = load i16, i16* [[ARRAYIDX2]], align 2
3396; UNROLL-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP20]] to i32
3397; UNROLL-NEXT:    [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]]
3398; UNROLL-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
3399; UNROLL-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
3400; UNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
3401; UNROLL-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
3402; UNROLL:       for.end:
3403; UNROLL-NEXT:    ret void
3404;
3405; UNROLL-NO-IC-LABEL: @sink_after_with_multiple_users(
3406; UNROLL-NO-IC-NEXT:  entry:
3407; UNROLL-NO-IC-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
3408; UNROLL-NO-IC-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 8
3409; UNROLL-NO-IC-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3410; UNROLL-NO-IC:       vector.ph:
3411; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 8
3412; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
3413; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
3414; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
3415; UNROLL-NO-IC:       vector.body:
3416; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3417; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD1:%.*]], [[VECTOR_BODY]] ]
3418; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
3419; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add i64 [[INDEX]], 4
3420; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP0]], 1
3421; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP1]], 1
3422; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP2]]
3423; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP3]]
3424; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 0
3425; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = bitcast i16* [[TMP6]] to <4 x i16>*
3426; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x i16>, <4 x i16>* [[TMP7]], align 2
3427; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i16, i16* [[TMP4]], i32 4
3428; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = bitcast i16* [[TMP8]] to <4 x i16>*
3429; UNROLL-NO-IC-NEXT:    [[WIDE_LOAD1]] = load <4 x i16>, <4 x i16>* [[TMP9]], align 2
3430; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3431; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = shufflevector <4 x i16> [[WIDE_LOAD]], <4 x i16> [[WIDE_LOAD1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3432; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = sext <4 x i16> [[TMP10]] to <4 x i32>
3433; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = sext <4 x i16> [[TMP11]] to <4 x i32>
3434; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = add nsw <4 x i32> [[TMP12]], <i32 2, i32 2, i32 2, i32 2>
3435; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = add nsw <4 x i32> [[TMP13]], <i32 2, i32 2, i32 2, i32 2>
3436; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
3437; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = sext <4 x i16> [[WIDE_LOAD1]] to <4 x i32>
3438; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = mul nsw <4 x i32> [[TMP14]], [[TMP16]]
3439; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = mul nsw <4 x i32> [[TMP15]], [[TMP17]]
3440; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
3441; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]]
3442; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 0
3443; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = bitcast i32* [[TMP22]] to <4 x i32>*
3444; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP18]], <4 x i32>* [[TMP23]], align 4
3445; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 4
3446; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = bitcast i32* [[TMP24]] to <4 x i32>*
3447; UNROLL-NO-IC-NEXT:    store <4 x i32> [[TMP19]], <4 x i32>* [[TMP25]], align 4
3448; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
3449; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3450; UNROLL-NO-IC-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
3451; UNROLL-NO-IC:       middle.block:
3452; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
3453; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 3
3454; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD1]], i32 2
3455; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
3456; UNROLL-NO-IC:       scalar.ph:
3457; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
3458; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
3459; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY:%.*]]
3460; UNROLL-NO-IC:       for.body:
3461; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP27:%.*]], [[FOR_BODY]] ]
3462; UNROLL-NO-IC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
3463; UNROLL-NO-IC-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
3464; UNROLL-NO-IC-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 2
3465; UNROLL-NO-IC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
3466; UNROLL-NO-IC-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
3467; UNROLL-NO-IC-NEXT:    [[TMP27]] = load i16, i16* [[ARRAYIDX2]], align 2
3468; UNROLL-NO-IC-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP27]] to i32
3469; UNROLL-NO-IC-NEXT:    [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]]
3470; UNROLL-NO-IC-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
3471; UNROLL-NO-IC-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
3472; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
3473; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
3474; UNROLL-NO-IC:       for.end:
3475; UNROLL-NO-IC-NEXT:    ret void
3476;
3477; UNROLL-NO-VF-LABEL: @sink_after_with_multiple_users(
3478; UNROLL-NO-VF-NEXT:  entry:
3479; UNROLL-NO-VF-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
3480; UNROLL-NO-VF-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 2
3481; UNROLL-NO-VF-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3482; UNROLL-NO-VF:       vector.ph:
3483; UNROLL-NO-VF-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 2
3484; UNROLL-NO-VF-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
3485; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
3486; UNROLL-NO-VF:       vector.body:
3487; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3488; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i16 [ [[DOTPRE]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
3489; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i64 [[INDEX]], 0
3490; UNROLL-NO-VF-NEXT:    [[INDUCTION1:%.*]] = add i64 [[INDEX]], 1
3491; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add nuw nsw i64 [[INDUCTION]], 1
3492; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[INDUCTION1]], 1
3493; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP0]]
3494; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP1]]
3495; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = load i16, i16* [[TMP2]], align 2
3496; UNROLL-NO-VF-NEXT:    [[TMP5]] = load i16, i16* [[TMP3]], align 2
3497; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = sext i16 [[VECTOR_RECUR]] to i32
3498; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = sext i16 [[TMP4]] to i32
3499; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = add nsw i32 [[TMP6]], 2
3500; UNROLL-NO-VF-NEXT:    [[TMP9:%.*]] = add nsw i32 [[TMP7]], 2
3501; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = sext i16 [[TMP4]] to i32
3502; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = sext i16 [[TMP5]] to i32
3503; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = mul nsw i32 [[TMP8]], [[TMP10]]
3504; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = mul nsw i32 [[TMP9]], [[TMP11]]
3505; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDUCTION]]
3506; UNROLL-NO-VF-NEXT:    [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDUCTION1]]
3507; UNROLL-NO-VF-NEXT:    store i32 [[TMP12]], i32* [[TMP14]], align 4
3508; UNROLL-NO-VF-NEXT:    store i32 [[TMP13]], i32* [[TMP15]], align 4
3509; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
3510; UNROLL-NO-VF-NEXT:    [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3511; UNROLL-NO-VF-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP19:![0-9]+]]
3512; UNROLL-NO-VF:       middle.block:
3513; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
3514; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
3515; UNROLL-NO-VF:       scalar.ph:
3516; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
3517; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
3518; UNROLL-NO-VF-NEXT:    br label [[FOR_BODY:%.*]]
3519; UNROLL-NO-VF:       for.body:
3520; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP17:%.*]], [[FOR_BODY]] ]
3521; UNROLL-NO-VF-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
3522; UNROLL-NO-VF-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
3523; UNROLL-NO-VF-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 2
3524; UNROLL-NO-VF-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
3525; UNROLL-NO-VF-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
3526; UNROLL-NO-VF-NEXT:    [[TMP17]] = load i16, i16* [[ARRAYIDX2]], align 2
3527; UNROLL-NO-VF-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP17]] to i32
3528; UNROLL-NO-VF-NEXT:    [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]]
3529; UNROLL-NO-VF-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
3530; UNROLL-NO-VF-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
3531; UNROLL-NO-VF-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
3532; UNROLL-NO-VF-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
3533; UNROLL-NO-VF:       for.end:
3534; UNROLL-NO-VF-NEXT:    ret void
3535;
3536; SINK-AFTER-LABEL: @sink_after_with_multiple_users(
3537; SINK-AFTER-NEXT:  entry:
3538; SINK-AFTER-NEXT:    [[DOTPRE:%.*]] = load i16, i16* [[A:%.*]], align 2
3539; SINK-AFTER-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
3540; SINK-AFTER-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3541; SINK-AFTER:       vector.ph:
3542; SINK-AFTER-NEXT:    [[N_MOD_VF:%.*]] = urem i64 [[N]], 4
3543; SINK-AFTER-NEXT:    [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]]
3544; SINK-AFTER-NEXT:    [[VECTOR_RECUR_INIT:%.*]] = insertelement <4 x i16> poison, i16 [[DOTPRE]], i32 3
3545; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
3546; SINK-AFTER:       vector.body:
3547; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3548; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ [[VECTOR_RECUR_INIT]], [[VECTOR_PH]] ], [ [[WIDE_LOAD:%.*]], [[VECTOR_BODY]] ]
3549; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
3550; SINK-AFTER-NEXT:    [[TMP1:%.*]] = add nuw nsw i64 [[TMP0]], 1
3551; SINK-AFTER-NEXT:    [[TMP2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[TMP1]]
3552; SINK-AFTER-NEXT:    [[TMP3:%.*]] = getelementptr inbounds i16, i16* [[TMP2]], i32 0
3553; SINK-AFTER-NEXT:    [[TMP4:%.*]] = bitcast i16* [[TMP3]] to <4 x i16>*
3554; SINK-AFTER-NEXT:    [[WIDE_LOAD]] = load <4 x i16>, <4 x i16>* [[TMP4]], align 2
3555; SINK-AFTER-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[WIDE_LOAD]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3556; SINK-AFTER-NEXT:    [[TMP6:%.*]] = sext <4 x i16> [[TMP5]] to <4 x i32>
3557; SINK-AFTER-NEXT:    [[TMP7:%.*]] = add nsw <4 x i32> [[TMP6]], <i32 2, i32 2, i32 2, i32 2>
3558; SINK-AFTER-NEXT:    [[TMP8:%.*]] = sext <4 x i16> [[WIDE_LOAD]] to <4 x i32>
3559; SINK-AFTER-NEXT:    [[TMP9:%.*]] = mul nsw <4 x i32> [[TMP7]], [[TMP8]]
3560; SINK-AFTER-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[TMP0]]
3561; SINK-AFTER-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[TMP10]], i32 0
3562; SINK-AFTER-NEXT:    [[TMP12:%.*]] = bitcast i32* [[TMP11]] to <4 x i32>*
3563; SINK-AFTER-NEXT:    store <4 x i32> [[TMP9]], <4 x i32>* [[TMP12]], align 4
3564; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
3565; SINK-AFTER-NEXT:    [[TMP13:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
3566; SINK-AFTER-NEXT:    br i1 [[TMP13]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP20:![0-9]+]]
3567; SINK-AFTER:       middle.block:
3568; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]]
3569; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 3
3570; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[WIDE_LOAD]], i32 2
3571; SINK-AFTER-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
3572; SINK-AFTER:       scalar.ph:
3573; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ [[DOTPRE]], [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
3574; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
3575; SINK-AFTER-NEXT:    br label [[FOR_BODY:%.*]]
3576; SINK-AFTER:       for.body:
3577; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[TMP14:%.*]], [[FOR_BODY]] ]
3578; SINK-AFTER-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
3579; SINK-AFTER-NEXT:    [[CONV:%.*]] = sext i16 [[SCALAR_RECUR]] to i32
3580; SINK-AFTER-NEXT:    [[ADD:%.*]] = add nsw i32 [[CONV]], 2
3581; SINK-AFTER-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
3582; SINK-AFTER-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds i16, i16* [[A]], i64 [[INDVARS_IV_NEXT]]
3583; SINK-AFTER-NEXT:    [[TMP14]] = load i16, i16* [[ARRAYIDX2]], align 2
3584; SINK-AFTER-NEXT:    [[CONV3:%.*]] = sext i16 [[TMP14]] to i32
3585; SINK-AFTER-NEXT:    [[MUL:%.*]] = mul nsw i32 [[ADD]], [[CONV3]]
3586; SINK-AFTER-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]]
3587; SINK-AFTER-NEXT:    store i32 [[MUL]], i32* [[ARRAYIDX5]], align 4
3588; SINK-AFTER-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[INDVARS_IV_NEXT]], [[N]]
3589; SINK-AFTER-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
3590; SINK-AFTER:       for.end:
3591; SINK-AFTER-NEXT:    ret void
3592;
3593entry:
3594  %.pre = load i16, i16* %a
3595  br label %for.body
3596
3597for.body:
3598  %0 = phi i16 [ %.pre, %entry ], [ %1, %for.body ]
3599  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
3600  %conv = sext i16 %0 to i32
3601  %add = add nsw i32 %conv, 2
3602  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
3603  %arrayidx2 = getelementptr inbounds i16, i16* %a, i64 %indvars.iv.next
3604  %1 = load i16, i16* %arrayidx2
3605  %conv3 = sext i16 %1 to i32
3606  %mul = mul nsw i32 %add, %conv3
3607  %arrayidx5 = getelementptr inbounds i32, i32* %b, i64 %indvars.iv
3608  store i32 %mul, i32* %arrayidx5
3609  %exitcond = icmp eq i64 %indvars.iv.next, %n
3610  br i1 %exitcond, label %for.end, label %for.body
3611
3612for.end:
3613  ret void
3614}
3615
3616; Do not sink branches: While branches are if-converted and do not require
3617; sinking, instructions with side effects (e.g. loads) conditioned by those
3618; branches will become users of the condition bit after vectorization and would
3619; need to be sunk if the loop is vectorized.
3620define void @do_not_sink_branch(i32 %x, i32* %in, i32* %out, i32 %tc) local_unnamed_addr #0 {
3621; CHECK-LABEL: @do_not_sink_branch(
3622; CHECK-NEXT:  entry:
3623; CHECK-NEXT:    [[CMP530:%.*]] = icmp sgt i32 [[TC:%.*]], 0
3624; CHECK-NEXT:    br label [[FOR_BODY4:%.*]]
3625; CHECK:       for.body4:
3626; CHECK-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[COND_END:%.*]] ]
3627; CHECK-NEXT:    [[CMP534:%.*]] = phi i1 [ [[CMP530]], [[ENTRY]] ], [ [[CMP5:%.*]], [[COND_END]] ]
3628; CHECK-NEXT:    br i1 [[CMP534]], label [[COND_TRUE:%.*]], label [[COND_END]]
3629; CHECK:       cond.true:
3630; CHECK-NEXT:    [[TMP0:%.*]] = zext i32 [[INDVARS_IV]] to i64
3631; CHECK-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 [[TMP0]]
3632; CHECK-NEXT:    [[IN_VAL:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
3633; CHECK-NEXT:    br label [[COND_END]]
3634; CHECK:       cond.end:
3635; CHECK-NEXT:    [[COND:%.*]] = phi i32 [ [[IN_VAL]], [[COND_TRUE]] ], [ 0, [[FOR_BODY4]] ]
3636; CHECK-NEXT:    [[TMP1:%.*]] = zext i32 [[INDVARS_IV]] to i64
3637; CHECK-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 [[TMP1]]
3638; CHECK-NEXT:    store i32 [[COND]], i32* [[ARRAYIDX8]], align 4
3639; CHECK-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
3640; CHECK-NEXT:    [[CMP5]] = icmp slt i32 [[INDVARS_IV_NEXT]], [[TC]]
3641; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[X:%.*]]
3642; CHECK-NEXT:    br i1 [[EXITCOND]], label [[FOR_END12_LOOPEXIT:%.*]], label [[FOR_BODY4]]
3643; CHECK:       for.end12.loopexit:
3644; CHECK-NEXT:    ret void
3645;
3646; UNROLL-LABEL: @do_not_sink_branch(
3647; UNROLL-NEXT:  entry:
3648; UNROLL-NEXT:    [[CMP530:%.*]] = icmp sgt i32 [[TC:%.*]], 0
3649; UNROLL-NEXT:    br label [[FOR_BODY4:%.*]]
3650; UNROLL:       for.body4:
3651; UNROLL-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[COND_END:%.*]] ]
3652; UNROLL-NEXT:    [[CMP534:%.*]] = phi i1 [ [[CMP530]], [[ENTRY]] ], [ [[CMP5:%.*]], [[COND_END]] ]
3653; UNROLL-NEXT:    br i1 [[CMP534]], label [[COND_TRUE:%.*]], label [[COND_END]]
3654; UNROLL:       cond.true:
3655; UNROLL-NEXT:    [[TMP0:%.*]] = zext i32 [[INDVARS_IV]] to i64
3656; UNROLL-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 [[TMP0]]
3657; UNROLL-NEXT:    [[IN_VAL:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
3658; UNROLL-NEXT:    br label [[COND_END]]
3659; UNROLL:       cond.end:
3660; UNROLL-NEXT:    [[COND:%.*]] = phi i32 [ [[IN_VAL]], [[COND_TRUE]] ], [ 0, [[FOR_BODY4]] ]
3661; UNROLL-NEXT:    [[TMP1:%.*]] = zext i32 [[INDVARS_IV]] to i64
3662; UNROLL-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i64 [[TMP1]]
3663; UNROLL-NEXT:    store i32 [[COND]], i32* [[ARRAYIDX8]], align 4
3664; UNROLL-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
3665; UNROLL-NEXT:    [[CMP5]] = icmp slt i32 [[INDVARS_IV_NEXT]], [[TC]]
3666; UNROLL-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[X:%.*]]
3667; UNROLL-NEXT:    br i1 [[EXITCOND]], label [[FOR_END12_LOOPEXIT:%.*]], label [[FOR_BODY4]]
3668; UNROLL:       for.end12.loopexit:
3669; UNROLL-NEXT:    ret void
3670;
3671; UNROLL-NO-IC-LABEL: @do_not_sink_branch(
3672; UNROLL-NO-IC-NEXT:  entry:
3673; UNROLL-NO-IC-NEXT:    [[CMP530:%.*]] = icmp slt i32 0, [[TC:%.*]]
3674; UNROLL-NO-IC-NEXT:    br label [[FOR_BODY4:%.*]]
3675; UNROLL-NO-IC:       for.body4:
3676; UNROLL-NO-IC-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[COND_END:%.*]] ]
3677; UNROLL-NO-IC-NEXT:    [[CMP534:%.*]] = phi i1 [ [[CMP530]], [[ENTRY]] ], [ [[CMP5:%.*]], [[COND_END]] ]
3678; UNROLL-NO-IC-NEXT:    br i1 [[CMP534]], label [[COND_TRUE:%.*]], label [[COND_END]]
3679; UNROLL-NO-IC:       cond.true:
3680; UNROLL-NO-IC-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i32 [[INDVARS_IV]]
3681; UNROLL-NO-IC-NEXT:    [[IN_VAL:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
3682; UNROLL-NO-IC-NEXT:    br label [[COND_END]]
3683; UNROLL-NO-IC:       cond.end:
3684; UNROLL-NO-IC-NEXT:    [[COND:%.*]] = phi i32 [ [[IN_VAL]], [[COND_TRUE]] ], [ 0, [[FOR_BODY4]] ]
3685; UNROLL-NO-IC-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i32 [[INDVARS_IV]]
3686; UNROLL-NO-IC-NEXT:    store i32 [[COND]], i32* [[ARRAYIDX8]], align 4
3687; UNROLL-NO-IC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
3688; UNROLL-NO-IC-NEXT:    [[CMP5]] = icmp slt i32 [[INDVARS_IV_NEXT]], [[TC]]
3689; UNROLL-NO-IC-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[X:%.*]]
3690; UNROLL-NO-IC-NEXT:    br i1 [[EXITCOND]], label [[FOR_END12_LOOPEXIT:%.*]], label [[FOR_BODY4]]
3691; UNROLL-NO-IC:       for.end12.loopexit:
3692; UNROLL-NO-IC-NEXT:    ret void
3693;
3694; UNROLL-NO-VF-LABEL: @do_not_sink_branch(
3695; UNROLL-NO-VF-NEXT:  entry:
3696; UNROLL-NO-VF-NEXT:    [[CMP530:%.*]] = icmp slt i32 0, [[TC:%.*]]
3697; UNROLL-NO-VF-NEXT:    br label [[FOR_BODY4:%.*]]
3698; UNROLL-NO-VF:       for.body4:
3699; UNROLL-NO-VF-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[COND_END:%.*]] ]
3700; UNROLL-NO-VF-NEXT:    [[CMP534:%.*]] = phi i1 [ [[CMP530]], [[ENTRY]] ], [ [[CMP5:%.*]], [[COND_END]] ]
3701; UNROLL-NO-VF-NEXT:    br i1 [[CMP534]], label [[COND_TRUE:%.*]], label [[COND_END]]
3702; UNROLL-NO-VF:       cond.true:
3703; UNROLL-NO-VF-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i32 [[INDVARS_IV]]
3704; UNROLL-NO-VF-NEXT:    [[IN_VAL:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
3705; UNROLL-NO-VF-NEXT:    br label [[COND_END]]
3706; UNROLL-NO-VF:       cond.end:
3707; UNROLL-NO-VF-NEXT:    [[COND:%.*]] = phi i32 [ [[IN_VAL]], [[COND_TRUE]] ], [ 0, [[FOR_BODY4]] ]
3708; UNROLL-NO-VF-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i32 [[INDVARS_IV]]
3709; UNROLL-NO-VF-NEXT:    store i32 [[COND]], i32* [[ARRAYIDX8]], align 4
3710; UNROLL-NO-VF-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
3711; UNROLL-NO-VF-NEXT:    [[CMP5]] = icmp slt i32 [[INDVARS_IV_NEXT]], [[TC]]
3712; UNROLL-NO-VF-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[X:%.*]]
3713; UNROLL-NO-VF-NEXT:    br i1 [[EXITCOND]], label [[FOR_END12_LOOPEXIT:%.*]], label [[FOR_BODY4]]
3714; UNROLL-NO-VF:       for.end12.loopexit:
3715; UNROLL-NO-VF-NEXT:    ret void
3716;
3717; SINK-AFTER-LABEL: @do_not_sink_branch(
3718; SINK-AFTER-NEXT:  entry:
3719; SINK-AFTER-NEXT:    [[CMP530:%.*]] = icmp slt i32 0, [[TC:%.*]]
3720; SINK-AFTER-NEXT:    br label [[FOR_BODY4:%.*]]
3721; SINK-AFTER:       for.body4:
3722; SINK-AFTER-NEXT:    [[INDVARS_IV:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INDVARS_IV_NEXT:%.*]], [[COND_END:%.*]] ]
3723; SINK-AFTER-NEXT:    [[CMP534:%.*]] = phi i1 [ [[CMP530]], [[ENTRY]] ], [ [[CMP5:%.*]], [[COND_END]] ]
3724; SINK-AFTER-NEXT:    br i1 [[CMP534]], label [[COND_TRUE:%.*]], label [[COND_END]]
3725; SINK-AFTER:       cond.true:
3726; SINK-AFTER-NEXT:    [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i32 [[INDVARS_IV]]
3727; SINK-AFTER-NEXT:    [[IN_VAL:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4
3728; SINK-AFTER-NEXT:    br label [[COND_END]]
3729; SINK-AFTER:       cond.end:
3730; SINK-AFTER-NEXT:    [[COND:%.*]] = phi i32 [ [[IN_VAL]], [[COND_TRUE]] ], [ 0, [[FOR_BODY4]] ]
3731; SINK-AFTER-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[OUT:%.*]], i32 [[INDVARS_IV]]
3732; SINK-AFTER-NEXT:    store i32 [[COND]], i32* [[ARRAYIDX8]], align 4
3733; SINK-AFTER-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i32 [[INDVARS_IV]], 1
3734; SINK-AFTER-NEXT:    [[CMP5]] = icmp slt i32 [[INDVARS_IV_NEXT]], [[TC]]
3735; SINK-AFTER-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[INDVARS_IV_NEXT]], [[X:%.*]]
3736; SINK-AFTER-NEXT:    br i1 [[EXITCOND]], label [[FOR_END12_LOOPEXIT:%.*]], label [[FOR_BODY4]]
3737; SINK-AFTER:       for.end12.loopexit:
3738; SINK-AFTER-NEXT:    ret void
3739;
3740entry:
3741  %cmp530 = icmp slt i32 0, %tc
3742  br label %for.body4
3743
3744for.body4:                                        ; preds = %cond.end, %entry
3745  %indvars.iv = phi i32 [ 0, %entry ], [ %indvars.iv.next, %cond.end ]
3746  %cmp534 = phi i1 [ %cmp530, %entry ], [ %cmp5, %cond.end ]
3747  br i1 %cmp534, label %cond.true, label %cond.end
3748
3749cond.true:                                        ; preds = %for.body4
3750  %arrayidx7 = getelementptr inbounds i32, i32* %in, i32 %indvars.iv
3751  %in.val = load i32, i32* %arrayidx7, align 4
3752  br label %cond.end
3753
3754cond.end:                                         ; preds = %for.body4, %cond.true
3755  %cond = phi i32 [ %in.val, %cond.true ], [ 0, %for.body4 ]
3756  %arrayidx8 = getelementptr inbounds i32, i32* %out, i32 %indvars.iv
3757  store i32 %cond, i32* %arrayidx8, align 4
3758  %indvars.iv.next = add nuw nsw i32 %indvars.iv, 1
3759  %cmp5 = icmp slt i32 %indvars.iv.next, %tc
3760  %exitcond = icmp eq i32 %indvars.iv.next, %x
3761  br i1 %exitcond, label %for.end12.loopexit, label %for.body4
3762
3763for.end12.loopexit:                               ; preds = %cond.end
3764  ret void
3765}
3766
3767; Dead instructions, like the exit condition are not part of the actual VPlan
3768; and do not need to be sunk. PR44634.
3769define void @sink_dead_inst() {
3770;
3771; CHECK-LABEL: @sink_dead_inst(
3772; CHECK-NEXT:  entry:
3773; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3774; CHECK:       vector.ph:
3775; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
3776; CHECK:       vector.body:
3777; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3778; CHECK-NEXT:    [[TMP0:%.*]] = phi i16 [ -24, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
3779; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
3780; CHECK-NEXT:    [[TMP1]] = add i16 [[TMP0]], 4
3781; CHECK-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
3782; CHECK-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
3783; CHECK:       middle.block:
3784; CHECK-NEXT:    [[TMP3:%.*]] = or i16 [[TMP0]], 1
3785; CHECK-NEXT:    [[VECTOR_RECUR_EXTRACT2:%.*]] = zext i16 [[TMP3]] to i32
3786; CHECK-NEXT:    br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
3787; CHECK:       scalar.ph:
3788; CHECK-NEXT:    [[SCALAR_RECUR_INIT4:%.*]] = phi i32 [ -27, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ]
3789; CHECK-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ -27, [[ENTRY]] ], [ 13, [[MIDDLE_BLOCK]] ]
3790; CHECK-NEXT:    br label [[FOR_COND:%.*]]
3791; CHECK:       for.cond:
3792; CHECK-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ]
3793; CHECK-NEXT:    [[SCALAR_RECUR5:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT4]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ]
3794; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR5]], 15
3795; CHECK-NEXT:    [[IV_NEXT]] = add i16 [[IV]], 1
3796; CHECK-NEXT:    [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32
3797; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
3798; CHECK:       for.end:
3799; CHECK-NEXT:    ret void
3800;
3801; UNROLL-LABEL: @sink_dead_inst(
3802; UNROLL-NEXT:  entry:
3803; UNROLL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3804; UNROLL:       vector.ph:
3805; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
3806; UNROLL:       vector.body:
3807; UNROLL-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3808; UNROLL-NEXT:    [[TMP0:%.*]] = phi i16 [ -24, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
3809; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
3810; UNROLL-NEXT:    [[TMP1]] = add i16 [[TMP0]], 8
3811; UNROLL-NEXT:    [[TMP2:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
3812; UNROLL-NEXT:    br i1 [[TMP2]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
3813; UNROLL:       middle.block:
3814; UNROLL-NEXT:    [[TMP3:%.*]] = or i16 [[TMP0]], 5
3815; UNROLL-NEXT:    [[VECTOR_RECUR_EXTRACT3:%.*]] = zext i16 [[TMP3]] to i32
3816; UNROLL-NEXT:    br i1 false, label [[FOR_END:%.*]], label [[SCALAR_PH]]
3817; UNROLL:       scalar.ph:
3818; UNROLL-NEXT:    [[SCALAR_RECUR_INIT5:%.*]] = phi i32 [ -27, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT3]], [[MIDDLE_BLOCK]] ]
3819; UNROLL-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ -27, [[ENTRY]] ], [ 13, [[MIDDLE_BLOCK]] ]
3820; UNROLL-NEXT:    br label [[FOR_COND:%.*]]
3821; UNROLL:       for.cond:
3822; UNROLL-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ]
3823; UNROLL-NEXT:    [[SCALAR_RECUR6:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT5]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ]
3824; UNROLL-NEXT:    [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR6]], 15
3825; UNROLL-NEXT:    [[IV_NEXT]] = add i16 [[IV]], 1
3826; UNROLL-NEXT:    [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32
3827; UNROLL-NEXT:    br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
3828; UNROLL:       for.end:
3829; UNROLL-NEXT:    ret void
3830;
3831; UNROLL-NO-IC-LABEL: @sink_dead_inst(
3832; UNROLL-NO-IC-NEXT:  entry:
3833; UNROLL-NO-IC-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3834; UNROLL-NO-IC:       vector.ph:
3835; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
3836; UNROLL-NO-IC:       vector.body:
3837; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3838; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 -27, i16 -26, i16 -25, i16 -24>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
3839; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
3840; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR2:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 -27>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
3841; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], <i16 4, i16 4, i16 4, i16 4>
3842; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add <4 x i16> [[VEC_IND]], <i16 1, i16 1, i16 1, i16 1>
3843; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add <4 x i16> [[STEP_ADD]], <i16 1, i16 1, i16 1, i16 1>
3844; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = zext <4 x i16> [[TMP0]] to <4 x i32>
3845; UNROLL-NO-IC-NEXT:    [[TMP3]] = zext <4 x i16> [[TMP1]] to <4 x i32>
3846; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR2]], <4 x i32> [[TMP2]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3847; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = shufflevector <4 x i32> [[TMP2]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3848; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = add <4 x i16> [[TMP0]], <i16 5, i16 5, i16 5, i16 5>
3849; UNROLL-NO-IC-NEXT:    [[TMP7]] = add <4 x i16> [[TMP1]], <i16 5, i16 5, i16 5, i16 5>
3850; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP6]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3851; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i16> [[TMP6]], <4 x i16> [[TMP7]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3852; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
3853; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], <i16 4, i16 4, i16 4, i16 4>
3854; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
3855; UNROLL-NO-IC-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
3856; UNROLL-NO-IC:       middle.block:
3857; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i32 43, 40
3858; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP7]], i32 3
3859; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP7]], i32 2
3860; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT3:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
3861; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI4:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2
3862; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
3863; UNROLL-NO-IC:       scalar.ph:
3864; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT5:%.*]] = phi i32 [ -27, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT3]], [[MIDDLE_BLOCK]] ]
3865; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, [[ENTRY]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
3866; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 13, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ]
3867; UNROLL-NO-IC-NEXT:    br label [[FOR_COND:%.*]]
3868; UNROLL-NO-IC:       for.cond:
3869; UNROLL-NO-IC-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ]
3870; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ]
3871; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR6:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT5]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ]
3872; UNROLL-NO-IC-NEXT:    [[USE_REC_1:%.*]] = sub i16 [[SCALAR_RECUR]], 10
3873; UNROLL-NO-IC-NEXT:    [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR6]], 15
3874; UNROLL-NO-IC-NEXT:    [[IV_NEXT]] = add i16 [[IV]], 1
3875; UNROLL-NO-IC-NEXT:    [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32
3876; UNROLL-NO-IC-NEXT:    [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5
3877; UNROLL-NO-IC-NEXT:    br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
3878; UNROLL-NO-IC:       for.end:
3879; UNROLL-NO-IC-NEXT:    ret void
3880;
3881; UNROLL-NO-VF-LABEL: @sink_dead_inst(
3882; UNROLL-NO-VF-NEXT:  entry:
3883; UNROLL-NO-VF-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3884; UNROLL-NO-VF:       vector.ph:
3885; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
3886; UNROLL-NO-VF:       vector.body:
3887; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3888; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i16 [ 0, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ]
3889; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR1:%.*]] = phi i32 [ -27, [[VECTOR_PH]] ], [ [[TMP4:%.*]], [[VECTOR_BODY]] ]
3890; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = trunc i32 [[INDEX]] to i16
3891; UNROLL-NO-VF-NEXT:    [[OFFSET_IDX:%.*]] = add i16 -27, [[TMP0]]
3892; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i16 [[OFFSET_IDX]], 0
3893; UNROLL-NO-VF-NEXT:    [[INDUCTION2:%.*]] = add i16 [[OFFSET_IDX]], 1
3894; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = add i16 [[INDUCTION]], 1
3895; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = add i16 [[INDUCTION2]], 1
3896; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = zext i16 [[TMP1]] to i32
3897; UNROLL-NO-VF-NEXT:    [[TMP4]] = zext i16 [[TMP2]] to i32
3898; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = add i16 [[TMP1]], 5
3899; UNROLL-NO-VF-NEXT:    [[TMP6]] = add i16 [[TMP2]], 5
3900; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
3901; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = icmp eq i32 [[INDEX_NEXT]], 42
3902; UNROLL-NO-VF-NEXT:    br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP21:![0-9]+]]
3903; UNROLL-NO-VF:       middle.block:
3904; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i32 43, 42
3905; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
3906; UNROLL-NO-VF:       scalar.ph:
3907; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT3:%.*]] = phi i32 [ -27, [[ENTRY:%.*]] ], [ [[TMP4]], [[MIDDLE_BLOCK]] ]
3908; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, [[ENTRY]] ], [ [[TMP6]], [[MIDDLE_BLOCK]] ]
3909; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 15, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ]
3910; UNROLL-NO-VF-NEXT:    br label [[FOR_COND:%.*]]
3911; UNROLL-NO-VF:       for.cond:
3912; UNROLL-NO-VF-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ]
3913; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ]
3914; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR4:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT3]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ]
3915; UNROLL-NO-VF-NEXT:    [[USE_REC_1:%.*]] = sub i16 [[SCALAR_RECUR]], 10
3916; UNROLL-NO-VF-NEXT:    [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR4]], 15
3917; UNROLL-NO-VF-NEXT:    [[IV_NEXT]] = add i16 [[IV]], 1
3918; UNROLL-NO-VF-NEXT:    [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32
3919; UNROLL-NO-VF-NEXT:    [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5
3920; UNROLL-NO-VF-NEXT:    br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]]
3921; UNROLL-NO-VF:       for.end:
3922; UNROLL-NO-VF-NEXT:    ret void
3923;
3924; SINK-AFTER-LABEL: @sink_dead_inst(
3925; SINK-AFTER-NEXT:  entry:
3926; SINK-AFTER-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3927; SINK-AFTER:       vector.ph:
3928; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
3929; SINK-AFTER:       vector.body:
3930; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
3931; SINK-AFTER-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 -27, i16 -26, i16 -25, i16 -24>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
3932; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i16> [ <i16 poison, i16 poison, i16 poison, i16 0>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
3933; SINK-AFTER-NEXT:    [[VECTOR_RECUR1:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 -27>, [[VECTOR_PH]] ], [ [[TMP1:%.*]], [[VECTOR_BODY]] ]
3934; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add <4 x i16> [[VEC_IND]], <i16 1, i16 1, i16 1, i16 1>
3935; SINK-AFTER-NEXT:    [[TMP1]] = zext <4 x i16> [[TMP0]] to <4 x i32>
3936; SINK-AFTER-NEXT:    [[TMP2:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR1]], <4 x i32> [[TMP1]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3937; SINK-AFTER-NEXT:    [[TMP3]] = add <4 x i16> [[TMP0]], <i16 5, i16 5, i16 5, i16 5>
3938; SINK-AFTER-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i16> [[VECTOR_RECUR]], <4 x i16> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
3939; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
3940; SINK-AFTER-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], <i16 4, i16 4, i16 4, i16 4>
3941; SINK-AFTER-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 40
3942; SINK-AFTER-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP22:![0-9]+]]
3943; SINK-AFTER:       middle.block:
3944; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i32 43, 40
3945; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i16> [[TMP3]], i32 3
3946; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i16> [[TMP3]], i32 2
3947; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT2:%.*]] = extractelement <4 x i32> [[TMP1]], i32 3
3948; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI3:%.*]] = extractelement <4 x i32> [[TMP1]], i32 2
3949; SINK-AFTER-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
3950; SINK-AFTER:       scalar.ph:
3951; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT4:%.*]] = phi i32 [ -27, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT2]], [[MIDDLE_BLOCK]] ]
3952; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i16 [ 0, [[ENTRY]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
3953; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 13, [[MIDDLE_BLOCK]] ], [ -27, [[ENTRY]] ]
3954; SINK-AFTER-NEXT:    br label [[FOR_COND:%.*]]
3955; SINK-AFTER:       for.cond:
3956; SINK-AFTER-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_COND]] ]
3957; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i16 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[REC_1_PREV:%.*]], [[FOR_COND]] ]
3958; SINK-AFTER-NEXT:    [[SCALAR_RECUR5:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT4]], [[SCALAR_PH]] ], [ [[REC_2_PREV:%.*]], [[FOR_COND]] ]
3959; SINK-AFTER-NEXT:    [[USE_REC_1:%.*]] = sub i16 [[SCALAR_RECUR]], 10
3960; SINK-AFTER-NEXT:    [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR5]], 15
3961; SINK-AFTER-NEXT:    [[IV_NEXT]] = add i16 [[IV]], 1
3962; SINK-AFTER-NEXT:    [[REC_2_PREV]] = zext i16 [[IV_NEXT]] to i32
3963; SINK-AFTER-NEXT:    [[REC_1_PREV]] = add i16 [[IV_NEXT]], 5
3964; SINK-AFTER-NEXT:    br i1 [[CMP]], label [[FOR_END]], label [[FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]]
3965; SINK-AFTER:       for.end:
3966; SINK-AFTER-NEXT:    ret void
3967;
3968entry:
3969  br label %for.cond
3970
3971for.cond:
3972  %iv = phi i16 [ -27, %entry ], [ %iv.next, %for.cond ]
3973  %rec.1 = phi i16 [ 0, %entry ], [ %rec.1.prev, %for.cond ]
3974  %rec.2 = phi i32 [ -27, %entry ], [ %rec.2.prev, %for.cond ]
3975  %use.rec.1 = sub i16 %rec.1, 10
3976  %cmp = icmp eq i32 %rec.2, 15
3977  %iv.next = add i16 %iv, 1
3978  %rec.2.prev = zext i16 %iv.next to i32
3979  %rec.1.prev = add i16 %iv.next, 5
3980  br i1 %cmp, label %for.end, label %for.cond
3981
3982for.end:
3983  ret void
3984}
3985
3986define i32 @sink_into_replication_region(i32 %y) {
3987;
3988; CHECK-LABEL: @sink_into_replication_region(
3989; CHECK-NEXT:  bb:
3990; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
3991; CHECK:       vector.ph:
3992; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.smax.i32(i32 [[Y:%.*]], i32 1)
3993; CHECK-NEXT:    [[N_RND_UP:%.*]] = add nuw i32 [[TMP0]], 3
3994; CHECK-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -4
3995; CHECK-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i32 [[TMP0]], -1
3996; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
3997; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
3998; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
3999; CHECK:       vector.body:
4000; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE8:%.*]] ]
4001; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[PRED_UDIV_CONTINUE8]] ]
4002; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[PRED_UDIV_CONTINUE8]] ]
4003; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
4004; CHECK-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
4005; CHECK-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer
4006; CHECK-NEXT:    [[VEC_IV:%.*]] = or <4 x i32> [[BROADCAST_SPLAT2]], <i32 0, i32 1, i32 2, i32 3>
4007; CHECK-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i32> [[VEC_IV]], [[BROADCAST_SPLAT]]
4008; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0
4009; CHECK-NEXT:    br i1 [[TMP2]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
4010; CHECK:       pred.udiv.if:
4011; CHECK-NEXT:    [[TMP3:%.*]] = udiv i32 219220132, [[OFFSET_IDX]]
4012; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x i32> poison, i32 [[TMP3]], i64 0
4013; CHECK-NEXT:    br label [[PRED_UDIV_CONTINUE]]
4014; CHECK:       pred.udiv.continue:
4015; CHECK-NEXT:    [[TMP5:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_UDIV_IF]] ]
4016; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1
4017; CHECK-NEXT:    br i1 [[TMP6]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]]
4018; CHECK:       pred.udiv.if3:
4019; CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], -1
4020; CHECK-NEXT:    [[TMP8:%.*]] = udiv i32 219220132, [[TMP7]]
4021; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <4 x i32> [[TMP5]], i32 [[TMP8]], i64 1
4022; CHECK-NEXT:    br label [[PRED_UDIV_CONTINUE4]]
4023; CHECK:       pred.udiv.continue4:
4024; CHECK-NEXT:    [[TMP10:%.*]] = phi <4 x i32> [ [[TMP5]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP9]], [[PRED_UDIV_IF3]] ]
4025; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x i1> [[TMP1]], i64 2
4026; CHECK-NEXT:    br i1 [[TMP11]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]]
4027; CHECK:       pred.udiv.if5:
4028; CHECK-NEXT:    [[TMP12:%.*]] = add i32 [[OFFSET_IDX]], -2
4029; CHECK-NEXT:    [[TMP13:%.*]] = udiv i32 219220132, [[TMP12]]
4030; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP13]], i64 2
4031; CHECK-NEXT:    br label [[PRED_UDIV_CONTINUE6]]
4032; CHECK:       pred.udiv.continue6:
4033; CHECK-NEXT:    [[TMP15:%.*]] = phi <4 x i32> [ [[TMP10]], [[PRED_UDIV_CONTINUE4]] ], [ [[TMP14]], [[PRED_UDIV_IF5]] ]
4034; CHECK-NEXT:    [[TMP16:%.*]] = extractelement <4 x i1> [[TMP1]], i64 3
4035; CHECK-NEXT:    br i1 [[TMP16]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8]]
4036; CHECK:       pred.udiv.if7:
4037; CHECK-NEXT:    [[TMP17:%.*]] = add i32 [[OFFSET_IDX]], -3
4038; CHECK-NEXT:    [[TMP18:%.*]] = udiv i32 219220132, [[TMP17]]
4039; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP18]], i64 3
4040; CHECK-NEXT:    br label [[PRED_UDIV_CONTINUE8]]
4041; CHECK:       pred.udiv.continue8:
4042; CHECK-NEXT:    [[TMP20]] = phi <4 x i32> [ [[TMP15]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP19]], [[PRED_UDIV_IF7]] ]
4043; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP20]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
4044; CHECK-NEXT:    [[TMP22]] = add <4 x i32> [[VEC_PHI]], [[TMP21]]
4045; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
4046; CHECK-NEXT:    [[TMP23:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4047; CHECK-NEXT:    br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF24:![0-9]+]], !llvm.loop [[LOOP25:![0-9]+]]
4048; CHECK:       middle.block:
4049; CHECK-NEXT:    [[TMP24:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP22]], <4 x i32> [[VEC_PHI]]
4050; CHECK-NEXT:    [[TMP25:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP24]])
4051; CHECK-NEXT:    br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
4052; CHECK:       scalar.ph:
4053; CHECK-NEXT:    br label [[BB2:%.*]]
4054; CHECK:       bb1:
4055; CHECK-NEXT:    [[VAR:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP25]], [[MIDDLE_BLOCK]] ]
4056; CHECK-NEXT:    ret i32 [[VAR]]
4057; CHECK:       bb2:
4058; CHECK-NEXT:    br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF26:![0-9]+]], !llvm.loop [[LOOP27:![0-9]+]]
4059;
4060; UNROLL-LABEL: @sink_into_replication_region(
4061; UNROLL-NEXT:  bb:
4062; UNROLL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4063; UNROLL:       vector.ph:
4064; UNROLL-NEXT:    [[TMP0:%.*]] = call i32 @llvm.smax.i32(i32 [[Y:%.*]], i32 1)
4065; UNROLL-NEXT:    [[N_RND_UP:%.*]] = add nuw i32 [[TMP0]], 7
4066; UNROLL-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
4067; UNROLL-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i32 [[TMP0]], -1
4068; UNROLL-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
4069; UNROLL-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
4070; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
4071; UNROLL:       vector.body:
4072; UNROLL-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE18:%.*]] ]
4073; UNROLL-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP41:%.*]], [[PRED_UDIV_CONTINUE18]] ]
4074; UNROLL-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP44:%.*]], [[PRED_UDIV_CONTINUE18]] ]
4075; UNROLL-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_UDIV_CONTINUE18]] ]
4076; UNROLL-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
4077; UNROLL-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i64 0
4078; UNROLL-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
4079; UNROLL-NEXT:    [[VEC_IV:%.*]] = or <4 x i32> [[BROADCAST_SPLAT3]], <i32 0, i32 1, i32 2, i32 3>
4080; UNROLL-NEXT:    [[VEC_IV4:%.*]] = or <4 x i32> [[BROADCAST_SPLAT3]], <i32 4, i32 5, i32 6, i32 7>
4081; UNROLL-NEXT:    [[TMP1:%.*]] = icmp ule <4 x i32> [[VEC_IV]], [[BROADCAST_SPLAT]]
4082; UNROLL-NEXT:    [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV4]], [[BROADCAST_SPLAT]]
4083; UNROLL-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[TMP1]], i64 0
4084; UNROLL-NEXT:    br i1 [[TMP3]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
4085; UNROLL:       pred.udiv.if:
4086; UNROLL-NEXT:    [[TMP4:%.*]] = udiv i32 219220132, [[OFFSET_IDX]]
4087; UNROLL-NEXT:    [[TMP5:%.*]] = insertelement <4 x i32> poison, i32 [[TMP4]], i64 0
4088; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE]]
4089; UNROLL:       pred.udiv.continue:
4090; UNROLL-NEXT:    [[TMP6:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP5]], [[PRED_UDIV_IF]] ]
4091; UNROLL-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP1]], i64 1
4092; UNROLL-NEXT:    br i1 [[TMP7]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]]
4093; UNROLL:       pred.udiv.if5:
4094; UNROLL-NEXT:    [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], -1
4095; UNROLL-NEXT:    [[TMP9:%.*]] = udiv i32 219220132, [[TMP8]]
4096; UNROLL-NEXT:    [[TMP10:%.*]] = insertelement <4 x i32> [[TMP6]], i32 [[TMP9]], i64 1
4097; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE6]]
4098; UNROLL:       pred.udiv.continue6:
4099; UNROLL-NEXT:    [[TMP11:%.*]] = phi <4 x i32> [ [[TMP6]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP10]], [[PRED_UDIV_IF5]] ]
4100; UNROLL-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP1]], i64 2
4101; UNROLL-NEXT:    br i1 [[TMP12]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8:%.*]]
4102; UNROLL:       pred.udiv.if7:
4103; UNROLL-NEXT:    [[TMP13:%.*]] = add i32 [[OFFSET_IDX]], -2
4104; UNROLL-NEXT:    [[TMP14:%.*]] = udiv i32 219220132, [[TMP13]]
4105; UNROLL-NEXT:    [[TMP15:%.*]] = insertelement <4 x i32> [[TMP11]], i32 [[TMP14]], i64 2
4106; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE8]]
4107; UNROLL:       pred.udiv.continue8:
4108; UNROLL-NEXT:    [[TMP16:%.*]] = phi <4 x i32> [ [[TMP11]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP15]], [[PRED_UDIV_IF7]] ]
4109; UNROLL-NEXT:    [[TMP17:%.*]] = extractelement <4 x i1> [[TMP1]], i64 3
4110; UNROLL-NEXT:    br i1 [[TMP17]], label [[PRED_UDIV_IF9:%.*]], label [[PRED_UDIV_CONTINUE10:%.*]]
4111; UNROLL:       pred.udiv.if9:
4112; UNROLL-NEXT:    [[TMP18:%.*]] = add i32 [[OFFSET_IDX]], -3
4113; UNROLL-NEXT:    [[TMP19:%.*]] = udiv i32 219220132, [[TMP18]]
4114; UNROLL-NEXT:    [[TMP20:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP19]], i64 3
4115; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE10]]
4116; UNROLL:       pred.udiv.continue10:
4117; UNROLL-NEXT:    [[TMP21:%.*]] = phi <4 x i32> [ [[TMP16]], [[PRED_UDIV_CONTINUE8]] ], [ [[TMP20]], [[PRED_UDIV_IF9]] ]
4118; UNROLL-NEXT:    [[TMP22:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0
4119; UNROLL-NEXT:    br i1 [[TMP22]], label [[PRED_UDIV_IF11:%.*]], label [[PRED_UDIV_CONTINUE12:%.*]]
4120; UNROLL:       pred.udiv.if11:
4121; UNROLL-NEXT:    [[TMP23:%.*]] = add i32 [[OFFSET_IDX]], -4
4122; UNROLL-NEXT:    [[TMP24:%.*]] = udiv i32 219220132, [[TMP23]]
4123; UNROLL-NEXT:    [[TMP25:%.*]] = insertelement <4 x i32> poison, i32 [[TMP24]], i64 0
4124; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE12]]
4125; UNROLL:       pred.udiv.continue12:
4126; UNROLL-NEXT:    [[TMP26:%.*]] = phi <4 x i32> [ poison, [[PRED_UDIV_CONTINUE10]] ], [ [[TMP25]], [[PRED_UDIV_IF11]] ]
4127; UNROLL-NEXT:    [[TMP27:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1
4128; UNROLL-NEXT:    br i1 [[TMP27]], label [[PRED_UDIV_IF13:%.*]], label [[PRED_UDIV_CONTINUE14:%.*]]
4129; UNROLL:       pred.udiv.if13:
4130; UNROLL-NEXT:    [[TMP28:%.*]] = add i32 [[OFFSET_IDX]], -5
4131; UNROLL-NEXT:    [[TMP29:%.*]] = udiv i32 219220132, [[TMP28]]
4132; UNROLL-NEXT:    [[TMP30:%.*]] = insertelement <4 x i32> [[TMP26]], i32 [[TMP29]], i64 1
4133; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE14]]
4134; UNROLL:       pred.udiv.continue14:
4135; UNROLL-NEXT:    [[TMP31:%.*]] = phi <4 x i32> [ [[TMP26]], [[PRED_UDIV_CONTINUE12]] ], [ [[TMP30]], [[PRED_UDIV_IF13]] ]
4136; UNROLL-NEXT:    [[TMP32:%.*]] = extractelement <4 x i1> [[TMP2]], i64 2
4137; UNROLL-NEXT:    br i1 [[TMP32]], label [[PRED_UDIV_IF15:%.*]], label [[PRED_UDIV_CONTINUE16:%.*]]
4138; UNROLL:       pred.udiv.if15:
4139; UNROLL-NEXT:    [[TMP33:%.*]] = add i32 [[OFFSET_IDX]], -6
4140; UNROLL-NEXT:    [[TMP34:%.*]] = udiv i32 219220132, [[TMP33]]
4141; UNROLL-NEXT:    [[TMP35:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP34]], i64 2
4142; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE16]]
4143; UNROLL:       pred.udiv.continue16:
4144; UNROLL-NEXT:    [[TMP36:%.*]] = phi <4 x i32> [ [[TMP31]], [[PRED_UDIV_CONTINUE14]] ], [ [[TMP35]], [[PRED_UDIV_IF15]] ]
4145; UNROLL-NEXT:    [[TMP37:%.*]] = extractelement <4 x i1> [[TMP2]], i64 3
4146; UNROLL-NEXT:    br i1 [[TMP37]], label [[PRED_UDIV_IF17:%.*]], label [[PRED_UDIV_CONTINUE18]]
4147; UNROLL:       pred.udiv.if17:
4148; UNROLL-NEXT:    [[TMP38:%.*]] = add i32 [[OFFSET_IDX]], -7
4149; UNROLL-NEXT:    [[TMP39:%.*]] = udiv i32 219220132, [[TMP38]]
4150; UNROLL-NEXT:    [[TMP40:%.*]] = insertelement <4 x i32> [[TMP36]], i32 [[TMP39]], i64 3
4151; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE18]]
4152; UNROLL:       pred.udiv.continue18:
4153; UNROLL-NEXT:    [[TMP41]] = phi <4 x i32> [ [[TMP36]], [[PRED_UDIV_CONTINUE16]] ], [ [[TMP40]], [[PRED_UDIV_IF17]] ]
4154; UNROLL-NEXT:    [[TMP42:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP21]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
4155; UNROLL-NEXT:    [[TMP43:%.*]] = shufflevector <4 x i32> [[TMP21]], <4 x i32> [[TMP41]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
4156; UNROLL-NEXT:    [[TMP44]] = add <4 x i32> [[VEC_PHI]], [[TMP42]]
4157; UNROLL-NEXT:    [[TMP45]] = add <4 x i32> [[VEC_PHI1]], [[TMP43]]
4158; UNROLL-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
4159; UNROLL-NEXT:    [[TMP46:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4160; UNROLL-NEXT:    br i1 [[TMP46]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF24:![0-9]+]], !llvm.loop [[LOOP25:![0-9]+]]
4161; UNROLL:       middle.block:
4162; UNROLL-NEXT:    [[TMP47:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI1]]
4163; UNROLL-NEXT:    [[TMP48:%.*]] = select <4 x i1> [[TMP1]], <4 x i32> [[TMP44]], <4 x i32> [[VEC_PHI]]
4164; UNROLL-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP47]], [[TMP48]]
4165; UNROLL-NEXT:    [[TMP49:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
4166; UNROLL-NEXT:    br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
4167; UNROLL:       scalar.ph:
4168; UNROLL-NEXT:    br label [[BB2:%.*]]
4169; UNROLL:       bb1:
4170; UNROLL-NEXT:    [[VAR:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP49]], [[MIDDLE_BLOCK]] ]
4171; UNROLL-NEXT:    ret i32 [[VAR]]
4172; UNROLL:       bb2:
4173; UNROLL-NEXT:    br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF26:![0-9]+]], !llvm.loop [[LOOP27:![0-9]+]]
4174;
4175; UNROLL-NO-IC-LABEL: @sink_into_replication_region(
4176; UNROLL-NO-IC-NEXT:  bb:
4177; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
4178; UNROLL-NO-IC-NEXT:    [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
4179; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
4180; UNROLL-NO-IC-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4181; UNROLL-NO-IC:       vector.ph:
4182; UNROLL-NO-IC-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP1]], 7
4183; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 8
4184; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
4185; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]]
4186; UNROLL-NO-IC-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
4187; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
4188; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
4189; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
4190; UNROLL-NO-IC:       vector.body:
4191; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE18:%.*]] ]
4192; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_UDIV_CONTINUE18]] ]
4193; UNROLL-NO-IC-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_UDIV_CONTINUE18]] ]
4194; UNROLL-NO-IC-NEXT:    [[VEC_PHI1:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[PRED_UDIV_CONTINUE18]] ]
4195; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
4196; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT2:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0
4197; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT3:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT2]], <4 x i32> poison, <4 x i32> zeroinitializer
4198; UNROLL-NO-IC-NEXT:    [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT3]], <i32 0, i32 1, i32 2, i32 3>
4199; UNROLL-NO-IC-NEXT:    [[VEC_IV4:%.*]] = add <4 x i32> [[BROADCAST_SPLAT3]], <i32 4, i32 5, i32 6, i32 7>
4200; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV]], [[BROADCAST_SPLAT]]
4201; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = icmp ule <4 x i32> [[VEC_IV4]], [[BROADCAST_SPLAT]]
4202; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
4203; UNROLL-NO-IC-NEXT:    br i1 [[TMP4]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
4204; UNROLL-NO-IC:       pred.udiv.if:
4205; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], 0
4206; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = udiv i32 219220132, [[TMP5]]
4207; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i32 0
4208; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE]]
4209; UNROLL-NO-IC:       pred.udiv.continue:
4210; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_UDIV_IF]] ]
4211; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
4212; UNROLL-NO-IC-NEXT:    br i1 [[TMP9]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]]
4213; UNROLL-NO-IC:       pred.udiv.if5:
4214; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = add i32 [[OFFSET_IDX]], -1
4215; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = udiv i32 219220132, [[TMP10]]
4216; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP11]], i32 1
4217; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE6]]
4218; UNROLL-NO-IC:       pred.udiv.continue6:
4219; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP12]], [[PRED_UDIV_IF5]] ]
4220; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
4221; UNROLL-NO-IC-NEXT:    br i1 [[TMP14]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8:%.*]]
4222; UNROLL-NO-IC:       pred.udiv.if7:
4223; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = add i32 [[OFFSET_IDX]], -2
4224; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = udiv i32 219220132, [[TMP15]]
4225; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP16]], i32 2
4226; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE8]]
4227; UNROLL-NO-IC:       pred.udiv.continue8:
4228; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP17]], [[PRED_UDIV_IF7]] ]
4229; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
4230; UNROLL-NO-IC-NEXT:    br i1 [[TMP19]], label [[PRED_UDIV_IF9:%.*]], label [[PRED_UDIV_CONTINUE10:%.*]]
4231; UNROLL-NO-IC:       pred.udiv.if9:
4232; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = add i32 [[OFFSET_IDX]], -3
4233; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = udiv i32 219220132, [[TMP20]]
4234; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP21]], i32 3
4235; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE10]]
4236; UNROLL-NO-IC:       pred.udiv.continue10:
4237; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = phi <4 x i32> [ [[TMP18]], [[PRED_UDIV_CONTINUE8]] ], [ [[TMP22]], [[PRED_UDIV_IF9]] ]
4238; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = extractelement <4 x i1> [[TMP3]], i32 0
4239; UNROLL-NO-IC-NEXT:    br i1 [[TMP24]], label [[PRED_UDIV_IF11:%.*]], label [[PRED_UDIV_CONTINUE12:%.*]]
4240; UNROLL-NO-IC:       pred.udiv.if11:
4241; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = add i32 [[OFFSET_IDX]], -4
4242; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = udiv i32 219220132, [[TMP25]]
4243; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = insertelement <4 x i32> poison, i32 [[TMP26]], i32 0
4244; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE12]]
4245; UNROLL-NO-IC:       pred.udiv.continue12:
4246; UNROLL-NO-IC-NEXT:    [[TMP28:%.*]] = phi <4 x i32> [ poison, [[PRED_UDIV_CONTINUE10]] ], [ [[TMP27]], [[PRED_UDIV_IF11]] ]
4247; UNROLL-NO-IC-NEXT:    [[TMP29:%.*]] = extractelement <4 x i1> [[TMP3]], i32 1
4248; UNROLL-NO-IC-NEXT:    br i1 [[TMP29]], label [[PRED_UDIV_IF13:%.*]], label [[PRED_UDIV_CONTINUE14:%.*]]
4249; UNROLL-NO-IC:       pred.udiv.if13:
4250; UNROLL-NO-IC-NEXT:    [[TMP30:%.*]] = add i32 [[OFFSET_IDX]], -5
4251; UNROLL-NO-IC-NEXT:    [[TMP31:%.*]] = udiv i32 219220132, [[TMP30]]
4252; UNROLL-NO-IC-NEXT:    [[TMP32:%.*]] = insertelement <4 x i32> [[TMP28]], i32 [[TMP31]], i32 1
4253; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE14]]
4254; UNROLL-NO-IC:       pred.udiv.continue14:
4255; UNROLL-NO-IC-NEXT:    [[TMP33:%.*]] = phi <4 x i32> [ [[TMP28]], [[PRED_UDIV_CONTINUE12]] ], [ [[TMP32]], [[PRED_UDIV_IF13]] ]
4256; UNROLL-NO-IC-NEXT:    [[TMP34:%.*]] = extractelement <4 x i1> [[TMP3]], i32 2
4257; UNROLL-NO-IC-NEXT:    br i1 [[TMP34]], label [[PRED_UDIV_IF15:%.*]], label [[PRED_UDIV_CONTINUE16:%.*]]
4258; UNROLL-NO-IC:       pred.udiv.if15:
4259; UNROLL-NO-IC-NEXT:    [[TMP35:%.*]] = add i32 [[OFFSET_IDX]], -6
4260; UNROLL-NO-IC-NEXT:    [[TMP36:%.*]] = udiv i32 219220132, [[TMP35]]
4261; UNROLL-NO-IC-NEXT:    [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i32 2
4262; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE16]]
4263; UNROLL-NO-IC:       pred.udiv.continue16:
4264; UNROLL-NO-IC-NEXT:    [[TMP38:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_UDIV_CONTINUE14]] ], [ [[TMP37]], [[PRED_UDIV_IF15]] ]
4265; UNROLL-NO-IC-NEXT:    [[TMP39:%.*]] = extractelement <4 x i1> [[TMP3]], i32 3
4266; UNROLL-NO-IC-NEXT:    br i1 [[TMP39]], label [[PRED_UDIV_IF17:%.*]], label [[PRED_UDIV_CONTINUE18]]
4267; UNROLL-NO-IC:       pred.udiv.if17:
4268; UNROLL-NO-IC-NEXT:    [[TMP40:%.*]] = add i32 [[OFFSET_IDX]], -7
4269; UNROLL-NO-IC-NEXT:    [[TMP41:%.*]] = udiv i32 219220132, [[TMP40]]
4270; UNROLL-NO-IC-NEXT:    [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i32 3
4271; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE18]]
4272; UNROLL-NO-IC:       pred.udiv.continue18:
4273; UNROLL-NO-IC-NEXT:    [[TMP43]] = phi <4 x i32> [ [[TMP38]], [[PRED_UDIV_CONTINUE16]] ], [ [[TMP42]], [[PRED_UDIV_IF17]] ]
4274; UNROLL-NO-IC-NEXT:    [[TMP44:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP23]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
4275; UNROLL-NO-IC-NEXT:    [[TMP45:%.*]] = shufflevector <4 x i32> [[TMP23]], <4 x i32> [[TMP43]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
4276; UNROLL-NO-IC-NEXT:    [[TMP46]] = add <4 x i32> [[VEC_PHI]], [[TMP44]]
4277; UNROLL-NO-IC-NEXT:    [[TMP47]] = add <4 x i32> [[VEC_PHI1]], [[TMP45]]
4278; UNROLL-NO-IC-NEXT:    [[TMP48:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP46]], <4 x i32> [[VEC_PHI]]
4279; UNROLL-NO-IC-NEXT:    [[TMP49:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> [[TMP47]], <4 x i32> [[VEC_PHI1]]
4280; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
4281; UNROLL-NO-IC-NEXT:    [[TMP50:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4282; UNROLL-NO-IC-NEXT:    br i1 [[TMP50]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF24:![0-9]+]], !llvm.loop [[LOOP25:![0-9]+]]
4283; UNROLL-NO-IC:       middle.block:
4284; UNROLL-NO-IC-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP49]], [[TMP48]]
4285; UNROLL-NO-IC-NEXT:    [[TMP51:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
4286; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP43]], i32 3
4287; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP43]], i32 2
4288; UNROLL-NO-IC-NEXT:    br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
4289; UNROLL-NO-IC:       scalar.ph:
4290; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
4291; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ]
4292; UNROLL-NO-IC-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ]
4293; UNROLL-NO-IC-NEXT:    br label [[BB2:%.*]]
4294; UNROLL-NO-IC:       bb1:
4295; UNROLL-NO-IC-NEXT:    [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ]
4296; UNROLL-NO-IC-NEXT:    ret i32 [[VAR]]
4297; UNROLL-NO-IC:       bb2:
4298; UNROLL-NO-IC-NEXT:    [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4299; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
4300; UNROLL-NO-IC-NEXT:    [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
4301; UNROLL-NO-IC-NEXT:    [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]]
4302; UNROLL-NO-IC-NEXT:    [[VAR7]] = udiv i32 219220132, [[VAR3]]
4303; UNROLL-NO-IC-NEXT:    [[VAR8]] = add nsw i32 [[VAR3]], -1
4304; UNROLL-NO-IC-NEXT:    [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
4305; UNROLL-NO-IC-NEXT:    br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF26:![0-9]+]], !llvm.loop [[LOOP27:![0-9]+]]
4306;
4307; UNROLL-NO-VF-LABEL: @sink_into_replication_region(
4308; UNROLL-NO-VF-NEXT:  bb:
4309; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
4310; UNROLL-NO-VF-NEXT:    [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
4311; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
4312; UNROLL-NO-VF-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4313; UNROLL-NO-VF:       vector.ph:
4314; UNROLL-NO-VF-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP1]], 1
4315; UNROLL-NO-VF-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2
4316; UNROLL-NO-VF-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
4317; UNROLL-NO-VF-NEXT:    [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]]
4318; UNROLL-NO-VF-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
4319; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
4320; UNROLL-NO-VF:       vector.body:
4321; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE5:%.*]] ]
4322; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[PRED_UDIV_CONTINUE5]] ]
4323; UNROLL-NO-VF-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[PRED_UDIV_CONTINUE5]] ]
4324; UNROLL-NO-VF-NEXT:    [[VEC_PHI1:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_UDIV_CONTINUE5]] ]
4325; UNROLL-NO-VF-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
4326; UNROLL-NO-VF-NEXT:    [[VEC_IV:%.*]] = add i32 [[INDEX]], 0
4327; UNROLL-NO-VF-NEXT:    [[VEC_IV3:%.*]] = add i32 [[INDEX]], 1
4328; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = icmp ule i32 [[VEC_IV]], [[TRIP_COUNT_MINUS_1]]
4329; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[VEC_IV3]], [[TRIP_COUNT_MINUS_1]]
4330; UNROLL-NO-VF-NEXT:    br i1 [[TMP2]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
4331; UNROLL-NO-VF:       pred.udiv.if:
4332; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i32 [[OFFSET_IDX]], 0
4333; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = udiv i32 219220132, [[INDUCTION]]
4334; UNROLL-NO-VF-NEXT:    br label [[PRED_UDIV_CONTINUE]]
4335; UNROLL-NO-VF:       pred.udiv.continue:
4336; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_UDIV_IF]] ]
4337; UNROLL-NO-VF-NEXT:    br i1 [[TMP3]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5]]
4338; UNROLL-NO-VF:       pred.udiv.if4:
4339; UNROLL-NO-VF-NEXT:    [[INDUCTION2:%.*]] = add i32 [[OFFSET_IDX]], -1
4340; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = udiv i32 219220132, [[INDUCTION2]]
4341; UNROLL-NO-VF-NEXT:    br label [[PRED_UDIV_CONTINUE5]]
4342; UNROLL-NO-VF:       pred.udiv.continue5:
4343; UNROLL-NO-VF-NEXT:    [[TMP7]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE]] ], [ [[TMP6]], [[PRED_UDIV_IF4]] ]
4344; UNROLL-NO-VF-NEXT:    [[TMP8]] = add i32 [[VEC_PHI]], [[VECTOR_RECUR]]
4345; UNROLL-NO-VF-NEXT:    [[TMP9]] = add i32 [[VEC_PHI1]], [[TMP5]]
4346; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = select i1 [[TMP2]], i32 [[TMP8]], i32 [[VEC_PHI]]
4347; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = select i1 [[TMP3]], i32 [[TMP9]], i32 [[VEC_PHI1]]
4348; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
4349; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4350; UNROLL-NO-VF-NEXT:    br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF23:![0-9]+]], !llvm.loop [[LOOP24:![0-9]+]]
4351; UNROLL-NO-VF:       middle.block:
4352; UNROLL-NO-VF-NEXT:    [[BIN_RDX:%.*]] = add i32 [[TMP11]], [[TMP10]]
4353; UNROLL-NO-VF-NEXT:    br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
4354; UNROLL-NO-VF:       scalar.ph:
4355; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
4356; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ]
4357; UNROLL-NO-VF-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
4358; UNROLL-NO-VF-NEXT:    br label [[BB2:%.*]]
4359; UNROLL-NO-VF:       bb1:
4360; UNROLL-NO-VF-NEXT:    [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
4361; UNROLL-NO-VF-NEXT:    ret i32 [[VAR]]
4362; UNROLL-NO-VF:       bb2:
4363; UNROLL-NO-VF-NEXT:    [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4364; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
4365; UNROLL-NO-VF-NEXT:    [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
4366; UNROLL-NO-VF-NEXT:    [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]]
4367; UNROLL-NO-VF-NEXT:    [[VAR7]] = udiv i32 219220132, [[VAR3]]
4368; UNROLL-NO-VF-NEXT:    [[VAR8]] = add nsw i32 [[VAR3]], -1
4369; UNROLL-NO-VF-NEXT:    [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
4370; UNROLL-NO-VF-NEXT:    br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF25:![0-9]+]], !llvm.loop [[LOOP26:![0-9]+]]
4371;
4372; SINK-AFTER-LABEL: @sink_into_replication_region(
4373; SINK-AFTER-NEXT:  bb:
4374; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
4375; SINK-AFTER-NEXT:    [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
4376; SINK-AFTER-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
4377; SINK-AFTER-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4378; SINK-AFTER:       vector.ph:
4379; SINK-AFTER-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP1]], 3
4380; SINK-AFTER-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4
4381; SINK-AFTER-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
4382; SINK-AFTER-NEXT:    [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]]
4383; SINK-AFTER-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
4384; SINK-AFTER-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
4385; SINK-AFTER-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
4386; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
4387; SINK-AFTER:       vector.body:
4388; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_UDIV_CONTINUE8:%.*]] ]
4389; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[PRED_UDIV_CONTINUE8]] ]
4390; SINK-AFTER-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[PRED_UDIV_CONTINUE8]] ]
4391; SINK-AFTER-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
4392; SINK-AFTER-NEXT:    [[BROADCAST_SPLATINSERT1:%.*]] = insertelement <4 x i32> poison, i32 [[INDEX]], i32 0
4393; SINK-AFTER-NEXT:    [[BROADCAST_SPLAT2:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT1]], <4 x i32> poison, <4 x i32> zeroinitializer
4394; SINK-AFTER-NEXT:    [[VEC_IV:%.*]] = add <4 x i32> [[BROADCAST_SPLAT2]], <i32 0, i32 1, i32 2, i32 3>
4395; SINK-AFTER-NEXT:    [[TMP2:%.*]] = icmp ule <4 x i32> [[VEC_IV]], [[BROADCAST_SPLAT]]
4396; SINK-AFTER-NEXT:    [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i32 0
4397; SINK-AFTER-NEXT:    br i1 [[TMP3]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
4398; SINK-AFTER:       pred.udiv.if:
4399; SINK-AFTER-NEXT:    [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], 0
4400; SINK-AFTER-NEXT:    [[TMP5:%.*]] = udiv i32 219220132, [[TMP4]]
4401; SINK-AFTER-NEXT:    [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i32 0
4402; SINK-AFTER-NEXT:    br label [[PRED_UDIV_CONTINUE]]
4403; SINK-AFTER:       pred.udiv.continue:
4404; SINK-AFTER-NEXT:    [[TMP7:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_UDIV_IF]] ]
4405; SINK-AFTER-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i32 1
4406; SINK-AFTER-NEXT:    br i1 [[TMP8]], label [[PRED_UDIV_IF3:%.*]], label [[PRED_UDIV_CONTINUE4:%.*]]
4407; SINK-AFTER:       pred.udiv.if3:
4408; SINK-AFTER-NEXT:    [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], -1
4409; SINK-AFTER-NEXT:    [[TMP10:%.*]] = udiv i32 219220132, [[TMP9]]
4410; SINK-AFTER-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP10]], i32 1
4411; SINK-AFTER-NEXT:    br label [[PRED_UDIV_CONTINUE4]]
4412; SINK-AFTER:       pred.udiv.continue4:
4413; SINK-AFTER-NEXT:    [[TMP12:%.*]] = phi <4 x i32> [ [[TMP7]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP11]], [[PRED_UDIV_IF3]] ]
4414; SINK-AFTER-NEXT:    [[TMP13:%.*]] = extractelement <4 x i1> [[TMP2]], i32 2
4415; SINK-AFTER-NEXT:    br i1 [[TMP13]], label [[PRED_UDIV_IF5:%.*]], label [[PRED_UDIV_CONTINUE6:%.*]]
4416; SINK-AFTER:       pred.udiv.if5:
4417; SINK-AFTER-NEXT:    [[TMP14:%.*]] = add i32 [[OFFSET_IDX]], -2
4418; SINK-AFTER-NEXT:    [[TMP15:%.*]] = udiv i32 219220132, [[TMP14]]
4419; SINK-AFTER-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP15]], i32 2
4420; SINK-AFTER-NEXT:    br label [[PRED_UDIV_CONTINUE6]]
4421; SINK-AFTER:       pred.udiv.continue6:
4422; SINK-AFTER-NEXT:    [[TMP17:%.*]] = phi <4 x i32> [ [[TMP12]], [[PRED_UDIV_CONTINUE4]] ], [ [[TMP16]], [[PRED_UDIV_IF5]] ]
4423; SINK-AFTER-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP2]], i32 3
4424; SINK-AFTER-NEXT:    br i1 [[TMP18]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8]]
4425; SINK-AFTER:       pred.udiv.if7:
4426; SINK-AFTER-NEXT:    [[TMP19:%.*]] = add i32 [[OFFSET_IDX]], -3
4427; SINK-AFTER-NEXT:    [[TMP20:%.*]] = udiv i32 219220132, [[TMP19]]
4428; SINK-AFTER-NEXT:    [[TMP21:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP20]], i32 3
4429; SINK-AFTER-NEXT:    br label [[PRED_UDIV_CONTINUE8]]
4430; SINK-AFTER:       pred.udiv.continue8:
4431; SINK-AFTER-NEXT:    [[TMP22]] = phi <4 x i32> [ [[TMP17]], [[PRED_UDIV_CONTINUE6]] ], [ [[TMP21]], [[PRED_UDIV_IF7]] ]
4432; SINK-AFTER-NEXT:    [[TMP23:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP22]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
4433; SINK-AFTER-NEXT:    [[TMP24]] = add <4 x i32> [[VEC_PHI]], [[TMP23]]
4434; SINK-AFTER-NEXT:    [[TMP25:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]]
4435; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
4436; SINK-AFTER-NEXT:    [[TMP26:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4437; SINK-AFTER-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF24:![0-9]+]], !llvm.loop [[LOOP25:![0-9]+]]
4438; SINK-AFTER:       middle.block:
4439; SINK-AFTER-NEXT:    [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP25]])
4440; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP22]], i32 3
4441; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP22]], i32 2
4442; SINK-AFTER-NEXT:    br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
4443; SINK-AFTER:       scalar.ph:
4444; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
4445; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ]
4446; SINK-AFTER-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ]
4447; SINK-AFTER-NEXT:    br label [[BB2:%.*]]
4448; SINK-AFTER:       bb1:
4449; SINK-AFTER-NEXT:    [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ]
4450; SINK-AFTER-NEXT:    ret i32 [[VAR]]
4451; SINK-AFTER:       bb2:
4452; SINK-AFTER-NEXT:    [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4453; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
4454; SINK-AFTER-NEXT:    [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
4455; SINK-AFTER-NEXT:    [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]]
4456; SINK-AFTER-NEXT:    [[VAR7]] = udiv i32 219220132, [[VAR3]]
4457; SINK-AFTER-NEXT:    [[VAR8]] = add nsw i32 [[VAR3]], -1
4458; SINK-AFTER-NEXT:    [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
4459; SINK-AFTER-NEXT:    br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF26:![0-9]+]], !llvm.loop [[LOOP27:![0-9]+]]
4460;
4461bb:
4462  br label %bb2
4463
4464  bb1:                                              ; preds = %bb2
4465  %var = phi i32 [ %var6, %bb2 ]
4466  ret i32 %var
4467
4468  bb2:                                              ; preds = %bb2, %bb
4469  %var3 = phi i32 [ %var8, %bb2 ], [ %y, %bb ]
4470  %var4 = phi i32 [ %var7, %bb2 ], [ 0, %bb ]
4471  %var5 = phi i32 [ %var6, %bb2 ], [ 0, %bb ]
4472  %var6 = add i32 %var5, %var4
4473  %var7 = udiv i32 219220132, %var3
4474  %var8 = add nsw i32 %var3, -1
4475  %var9 = icmp slt i32 %var3, 2
4476  br i1 %var9, label %bb1, label %bb2, !prof !2
4477}
4478
4479define i32 @sink_into_replication_region_multiple(i32 *%x, i32 %y) {
4480;
4481; CHECK-LABEL: @sink_into_replication_region_multiple(
4482; CHECK-NEXT:  bb:
4483; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4484; CHECK:       vector.ph:
4485; CHECK-NEXT:    [[TMP0:%.*]] = call i32 @llvm.smax.i32(i32 [[Y:%.*]], i32 1)
4486; CHECK-NEXT:    [[N_RND_UP:%.*]] = add nuw i32 [[TMP0]], 3
4487; CHECK-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -4
4488; CHECK-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i32 [[TMP0]], -1
4489; CHECK-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
4490; CHECK-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
4491; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
4492; CHECK:       vector.body:
4493; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE13:%.*]] ]
4494; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE13]] ]
4495; CHECK-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP20:%.*]], [[PRED_STORE_CONTINUE13]] ]
4496; CHECK-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[PRED_STORE_CONTINUE13]] ]
4497; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
4498; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], -1
4499; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], -2
4500; CHECK-NEXT:    [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -3
4501; CHECK-NEXT:    [[TMP4:%.*]] = icmp ule <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
4502; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x i1> [[TMP4]], i64 0
4503; CHECK-NEXT:    br i1 [[TMP5]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
4504; CHECK:       pred.udiv.if:
4505; CHECK-NEXT:    [[TMP6:%.*]] = udiv i32 219220132, [[OFFSET_IDX]]
4506; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <4 x i32> poison, i32 [[TMP6]], i64 0
4507; CHECK-NEXT:    br label [[PRED_UDIV_CONTINUE]]
4508; CHECK:       pred.udiv.continue:
4509; CHECK-NEXT:    [[TMP8:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP7]], [[PRED_UDIV_IF]] ]
4510; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x i1> [[TMP4]], i64 1
4511; CHECK-NEXT:    br i1 [[TMP9]], label [[PRED_UDIV_IF2:%.*]], label [[PRED_UDIV_CONTINUE3:%.*]]
4512; CHECK:       pred.udiv.if2:
4513; CHECK-NEXT:    [[TMP10:%.*]] = udiv i32 219220132, [[TMP1]]
4514; CHECK-NEXT:    [[TMP11:%.*]] = insertelement <4 x i32> [[TMP8]], i32 [[TMP10]], i64 1
4515; CHECK-NEXT:    br label [[PRED_UDIV_CONTINUE3]]
4516; CHECK:       pred.udiv.continue3:
4517; CHECK-NEXT:    [[TMP12:%.*]] = phi <4 x i32> [ [[TMP8]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP11]], [[PRED_UDIV_IF2]] ]
4518; CHECK-NEXT:    [[TMP13:%.*]] = extractelement <4 x i1> [[TMP4]], i64 2
4519; CHECK-NEXT:    br i1 [[TMP13]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]]
4520; CHECK:       pred.udiv.if4:
4521; CHECK-NEXT:    [[TMP14:%.*]] = udiv i32 219220132, [[TMP2]]
4522; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP14]], i64 2
4523; CHECK-NEXT:    br label [[PRED_UDIV_CONTINUE5]]
4524; CHECK:       pred.udiv.continue5:
4525; CHECK-NEXT:    [[TMP16:%.*]] = phi <4 x i32> [ [[TMP12]], [[PRED_UDIV_CONTINUE3]] ], [ [[TMP15]], [[PRED_UDIV_IF4]] ]
4526; CHECK-NEXT:    [[TMP17:%.*]] = extractelement <4 x i1> [[TMP4]], i64 3
4527; CHECK-NEXT:    br i1 [[TMP17]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]]
4528; CHECK:       pred.udiv.if6:
4529; CHECK-NEXT:    [[TMP18:%.*]] = udiv i32 219220132, [[TMP3]]
4530; CHECK-NEXT:    [[TMP19:%.*]] = insertelement <4 x i32> [[TMP16]], i32 [[TMP18]], i64 3
4531; CHECK-NEXT:    br label [[PRED_UDIV_CONTINUE7]]
4532; CHECK:       pred.udiv.continue7:
4533; CHECK-NEXT:    [[TMP20]] = phi <4 x i32> [ [[TMP16]], [[PRED_UDIV_CONTINUE5]] ], [ [[TMP19]], [[PRED_UDIV_IF6]] ]
4534; CHECK-NEXT:    [[TMP21:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP20]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
4535; CHECK-NEXT:    [[TMP22]] = add <4 x i32> [[VEC_PHI]], [[TMP21]]
4536; CHECK-NEXT:    [[TMP23:%.*]] = extractelement <4 x i1> [[TMP4]], i64 0
4537; CHECK-NEXT:    br i1 [[TMP23]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
4538; CHECK:       pred.store.if:
4539; CHECK-NEXT:    [[TMP24:%.*]] = sext i32 [[INDEX]] to i64
4540; CHECK-NEXT:    [[TMP25:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 [[TMP24]]
4541; CHECK-NEXT:    store i32 [[OFFSET_IDX]], i32* [[TMP25]], align 4
4542; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE]]
4543; CHECK:       pred.store.continue:
4544; CHECK-NEXT:    [[TMP26:%.*]] = extractelement <4 x i1> [[TMP4]], i64 1
4545; CHECK-NEXT:    br i1 [[TMP26]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]]
4546; CHECK:       pred.store.if8:
4547; CHECK-NEXT:    [[TMP27:%.*]] = or i32 [[INDEX]], 1
4548; CHECK-NEXT:    [[TMP28:%.*]] = sext i32 [[TMP27]] to i64
4549; CHECK-NEXT:    [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP28]]
4550; CHECK-NEXT:    store i32 [[TMP1]], i32* [[TMP29]], align 4
4551; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE9]]
4552; CHECK:       pred.store.continue9:
4553; CHECK-NEXT:    [[TMP30:%.*]] = extractelement <4 x i1> [[TMP4]], i64 2
4554; CHECK-NEXT:    br i1 [[TMP30]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]]
4555; CHECK:       pred.store.if10:
4556; CHECK-NEXT:    [[TMP31:%.*]] = or i32 [[INDEX]], 2
4557; CHECK-NEXT:    [[TMP32:%.*]] = sext i32 [[TMP31]] to i64
4558; CHECK-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP32]]
4559; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP33]], align 4
4560; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE11]]
4561; CHECK:       pred.store.continue11:
4562; CHECK-NEXT:    [[TMP34:%.*]] = extractelement <4 x i1> [[TMP4]], i64 3
4563; CHECK-NEXT:    br i1 [[TMP34]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13]]
4564; CHECK:       pred.store.if12:
4565; CHECK-NEXT:    [[TMP35:%.*]] = or i32 [[INDEX]], 3
4566; CHECK-NEXT:    [[TMP36:%.*]] = sext i32 [[TMP35]] to i64
4567; CHECK-NEXT:    [[TMP37:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP36]]
4568; CHECK-NEXT:    store i32 [[TMP3]], i32* [[TMP37]], align 4
4569; CHECK-NEXT:    br label [[PRED_STORE_CONTINUE13]]
4570; CHECK:       pred.store.continue13:
4571; CHECK-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
4572; CHECK-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
4573; CHECK-NEXT:    [[TMP38:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4574; CHECK-NEXT:    br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF24]], !llvm.loop [[LOOP28:![0-9]+]]
4575; CHECK:       middle.block:
4576; CHECK-NEXT:    [[TMP39:%.*]] = select <4 x i1> [[TMP4]], <4 x i32> [[TMP22]], <4 x i32> [[VEC_PHI]]
4577; CHECK-NEXT:    [[TMP40:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP39]])
4578; CHECK-NEXT:    br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
4579; CHECK:       scalar.ph:
4580; CHECK-NEXT:    br label [[BB2:%.*]]
4581; CHECK:       bb1:
4582; CHECK-NEXT:    [[VAR:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP40]], [[MIDDLE_BLOCK]] ]
4583; CHECK-NEXT:    ret i32 [[VAR]]
4584; CHECK:       bb2:
4585; CHECK-NEXT:    br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF26]], !llvm.loop [[LOOP29:![0-9]+]]
4586;
4587; UNROLL-LABEL: @sink_into_replication_region_multiple(
4588; UNROLL-NEXT:  bb:
4589; UNROLL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4590; UNROLL:       vector.ph:
4591; UNROLL-NEXT:    [[TMP0:%.*]] = call i32 @llvm.smax.i32(i32 [[Y:%.*]], i32 1)
4592; UNROLL-NEXT:    [[N_RND_UP:%.*]] = add nuw i32 [[TMP0]], 7
4593; UNROLL-NEXT:    [[N_VEC:%.*]] = and i32 [[N_RND_UP]], -8
4594; UNROLL-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = add nsw i32 [[TMP0]], -1
4595; UNROLL-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i64 0
4596; UNROLL-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
4597; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
4598; UNROLL:       vector.body:
4599; UNROLL-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE31:%.*]] ]
4600; UNROLL-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE31]] ]
4601; UNROLL-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP41:%.*]], [[PRED_STORE_CONTINUE31]] ]
4602; UNROLL-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP44:%.*]], [[PRED_STORE_CONTINUE31]] ]
4603; UNROLL-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP45:%.*]], [[PRED_STORE_CONTINUE31]] ]
4604; UNROLL-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
4605; UNROLL-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
4606; UNROLL-NEXT:    [[TMP1:%.*]] = add i32 [[OFFSET_IDX]], -1
4607; UNROLL-NEXT:    [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], -2
4608; UNROLL-NEXT:    [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -3
4609; UNROLL-NEXT:    [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], -4
4610; UNROLL-NEXT:    [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], -5
4611; UNROLL-NEXT:    [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], -6
4612; UNROLL-NEXT:    [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], -7
4613; UNROLL-NEXT:    [[TMP8:%.*]] = icmp ule <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
4614; UNROLL-NEXT:    [[TMP9:%.*]] = icmp ule <4 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT]]
4615; UNROLL-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP8]], i64 0
4616; UNROLL-NEXT:    br i1 [[TMP10]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
4617; UNROLL:       pred.udiv.if:
4618; UNROLL-NEXT:    [[TMP11:%.*]] = udiv i32 219220132, [[OFFSET_IDX]]
4619; UNROLL-NEXT:    [[TMP12:%.*]] = insertelement <4 x i32> poison, i32 [[TMP11]], i64 0
4620; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE]]
4621; UNROLL:       pred.udiv.continue:
4622; UNROLL-NEXT:    [[TMP13:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP12]], [[PRED_UDIV_IF]] ]
4623; UNROLL-NEXT:    [[TMP14:%.*]] = extractelement <4 x i1> [[TMP8]], i64 1
4624; UNROLL-NEXT:    br i1 [[TMP14]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]]
4625; UNROLL:       pred.udiv.if4:
4626; UNROLL-NEXT:    [[TMP15:%.*]] = udiv i32 219220132, [[TMP1]]
4627; UNROLL-NEXT:    [[TMP16:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP15]], i64 1
4628; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE5]]
4629; UNROLL:       pred.udiv.continue5:
4630; UNROLL-NEXT:    [[TMP17:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP16]], [[PRED_UDIV_IF4]] ]
4631; UNROLL-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP8]], i64 2
4632; UNROLL-NEXT:    br i1 [[TMP18]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]]
4633; UNROLL:       pred.udiv.if6:
4634; UNROLL-NEXT:    [[TMP19:%.*]] = udiv i32 219220132, [[TMP2]]
4635; UNROLL-NEXT:    [[TMP20:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP19]], i64 2
4636; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE7]]
4637; UNROLL:       pred.udiv.continue7:
4638; UNROLL-NEXT:    [[TMP21:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_UDIV_CONTINUE5]] ], [ [[TMP20]], [[PRED_UDIV_IF6]] ]
4639; UNROLL-NEXT:    [[TMP22:%.*]] = extractelement <4 x i1> [[TMP8]], i64 3
4640; UNROLL-NEXT:    br i1 [[TMP22]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]]
4641; UNROLL:       pred.udiv.if8:
4642; UNROLL-NEXT:    [[TMP23:%.*]] = udiv i32 219220132, [[TMP3]]
4643; UNROLL-NEXT:    [[TMP24:%.*]] = insertelement <4 x i32> [[TMP21]], i32 [[TMP23]], i64 3
4644; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE9]]
4645; UNROLL:       pred.udiv.continue9:
4646; UNROLL-NEXT:    [[TMP25:%.*]] = phi <4 x i32> [ [[TMP21]], [[PRED_UDIV_CONTINUE7]] ], [ [[TMP24]], [[PRED_UDIV_IF8]] ]
4647; UNROLL-NEXT:    [[TMP26:%.*]] = extractelement <4 x i1> [[TMP9]], i64 0
4648; UNROLL-NEXT:    br i1 [[TMP26]], label [[PRED_UDIV_IF10:%.*]], label [[PRED_UDIV_CONTINUE11:%.*]]
4649; UNROLL:       pred.udiv.if10:
4650; UNROLL-NEXT:    [[TMP27:%.*]] = udiv i32 219220132, [[TMP4]]
4651; UNROLL-NEXT:    [[TMP28:%.*]] = insertelement <4 x i32> poison, i32 [[TMP27]], i64 0
4652; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE11]]
4653; UNROLL:       pred.udiv.continue11:
4654; UNROLL-NEXT:    [[TMP29:%.*]] = phi <4 x i32> [ poison, [[PRED_UDIV_CONTINUE9]] ], [ [[TMP28]], [[PRED_UDIV_IF10]] ]
4655; UNROLL-NEXT:    [[TMP30:%.*]] = extractelement <4 x i1> [[TMP9]], i64 1
4656; UNROLL-NEXT:    br i1 [[TMP30]], label [[PRED_UDIV_IF12:%.*]], label [[PRED_UDIV_CONTINUE13:%.*]]
4657; UNROLL:       pred.udiv.if12:
4658; UNROLL-NEXT:    [[TMP31:%.*]] = udiv i32 219220132, [[TMP5]]
4659; UNROLL-NEXT:    [[TMP32:%.*]] = insertelement <4 x i32> [[TMP29]], i32 [[TMP31]], i64 1
4660; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE13]]
4661; UNROLL:       pred.udiv.continue13:
4662; UNROLL-NEXT:    [[TMP33:%.*]] = phi <4 x i32> [ [[TMP29]], [[PRED_UDIV_CONTINUE11]] ], [ [[TMP32]], [[PRED_UDIV_IF12]] ]
4663; UNROLL-NEXT:    [[TMP34:%.*]] = extractelement <4 x i1> [[TMP9]], i64 2
4664; UNROLL-NEXT:    br i1 [[TMP34]], label [[PRED_UDIV_IF14:%.*]], label [[PRED_UDIV_CONTINUE15:%.*]]
4665; UNROLL:       pred.udiv.if14:
4666; UNROLL-NEXT:    [[TMP35:%.*]] = udiv i32 219220132, [[TMP6]]
4667; UNROLL-NEXT:    [[TMP36:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP35]], i64 2
4668; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE15]]
4669; UNROLL:       pred.udiv.continue15:
4670; UNROLL-NEXT:    [[TMP37:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_UDIV_CONTINUE13]] ], [ [[TMP36]], [[PRED_UDIV_IF14]] ]
4671; UNROLL-NEXT:    [[TMP38:%.*]] = extractelement <4 x i1> [[TMP9]], i64 3
4672; UNROLL-NEXT:    br i1 [[TMP38]], label [[PRED_UDIV_IF16:%.*]], label [[PRED_UDIV_CONTINUE17:%.*]]
4673; UNROLL:       pred.udiv.if16:
4674; UNROLL-NEXT:    [[TMP39:%.*]] = udiv i32 219220132, [[TMP7]]
4675; UNROLL-NEXT:    [[TMP40:%.*]] = insertelement <4 x i32> [[TMP37]], i32 [[TMP39]], i64 3
4676; UNROLL-NEXT:    br label [[PRED_UDIV_CONTINUE17]]
4677; UNROLL:       pred.udiv.continue17:
4678; UNROLL-NEXT:    [[TMP41]] = phi <4 x i32> [ [[TMP37]], [[PRED_UDIV_CONTINUE15]] ], [ [[TMP40]], [[PRED_UDIV_IF16]] ]
4679; UNROLL-NEXT:    [[TMP42:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP25]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
4680; UNROLL-NEXT:    [[TMP43:%.*]] = shufflevector <4 x i32> [[TMP25]], <4 x i32> [[TMP41]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
4681; UNROLL-NEXT:    [[TMP44]] = add <4 x i32> [[VEC_PHI]], [[TMP42]]
4682; UNROLL-NEXT:    [[TMP45]] = add <4 x i32> [[VEC_PHI3]], [[TMP43]]
4683; UNROLL-NEXT:    [[TMP46:%.*]] = extractelement <4 x i1> [[TMP8]], i64 0
4684; UNROLL-NEXT:    br i1 [[TMP46]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
4685; UNROLL:       pred.store.if:
4686; UNROLL-NEXT:    [[TMP47:%.*]] = sext i32 [[INDEX]] to i64
4687; UNROLL-NEXT:    [[TMP48:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i64 [[TMP47]]
4688; UNROLL-NEXT:    store i32 [[OFFSET_IDX]], i32* [[TMP48]], align 4
4689; UNROLL-NEXT:    br label [[PRED_STORE_CONTINUE]]
4690; UNROLL:       pred.store.continue:
4691; UNROLL-NEXT:    [[TMP49:%.*]] = extractelement <4 x i1> [[TMP8]], i64 1
4692; UNROLL-NEXT:    br i1 [[TMP49]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19:%.*]]
4693; UNROLL:       pred.store.if18:
4694; UNROLL-NEXT:    [[TMP50:%.*]] = or i32 [[INDEX]], 1
4695; UNROLL-NEXT:    [[TMP51:%.*]] = sext i32 [[TMP50]] to i64
4696; UNROLL-NEXT:    [[TMP52:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP51]]
4697; UNROLL-NEXT:    store i32 [[TMP1]], i32* [[TMP52]], align 4
4698; UNROLL-NEXT:    br label [[PRED_STORE_CONTINUE19]]
4699; UNROLL:       pred.store.continue19:
4700; UNROLL-NEXT:    [[TMP53:%.*]] = extractelement <4 x i1> [[TMP8]], i64 2
4701; UNROLL-NEXT:    br i1 [[TMP53]], label [[PRED_STORE_IF20:%.*]], label [[PRED_STORE_CONTINUE21:%.*]]
4702; UNROLL:       pred.store.if20:
4703; UNROLL-NEXT:    [[TMP54:%.*]] = or i32 [[INDEX]], 2
4704; UNROLL-NEXT:    [[TMP55:%.*]] = sext i32 [[TMP54]] to i64
4705; UNROLL-NEXT:    [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP55]]
4706; UNROLL-NEXT:    store i32 [[TMP2]], i32* [[TMP56]], align 4
4707; UNROLL-NEXT:    br label [[PRED_STORE_CONTINUE21]]
4708; UNROLL:       pred.store.continue21:
4709; UNROLL-NEXT:    [[TMP57:%.*]] = extractelement <4 x i1> [[TMP8]], i64 3
4710; UNROLL-NEXT:    br i1 [[TMP57]], label [[PRED_STORE_IF22:%.*]], label [[PRED_STORE_CONTINUE23:%.*]]
4711; UNROLL:       pred.store.if22:
4712; UNROLL-NEXT:    [[TMP58:%.*]] = or i32 [[INDEX]], 3
4713; UNROLL-NEXT:    [[TMP59:%.*]] = sext i32 [[TMP58]] to i64
4714; UNROLL-NEXT:    [[TMP60:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP59]]
4715; UNROLL-NEXT:    store i32 [[TMP3]], i32* [[TMP60]], align 4
4716; UNROLL-NEXT:    br label [[PRED_STORE_CONTINUE23]]
4717; UNROLL:       pred.store.continue23:
4718; UNROLL-NEXT:    [[TMP61:%.*]] = extractelement <4 x i1> [[TMP9]], i64 0
4719; UNROLL-NEXT:    br i1 [[TMP61]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE25:%.*]]
4720; UNROLL:       pred.store.if24:
4721; UNROLL-NEXT:    [[TMP62:%.*]] = or i32 [[INDEX]], 4
4722; UNROLL-NEXT:    [[TMP63:%.*]] = sext i32 [[TMP62]] to i64
4723; UNROLL-NEXT:    [[TMP64:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP63]]
4724; UNROLL-NEXT:    store i32 [[TMP4]], i32* [[TMP64]], align 4
4725; UNROLL-NEXT:    br label [[PRED_STORE_CONTINUE25]]
4726; UNROLL:       pred.store.continue25:
4727; UNROLL-NEXT:    [[TMP65:%.*]] = extractelement <4 x i1> [[TMP9]], i64 1
4728; UNROLL-NEXT:    br i1 [[TMP65]], label [[PRED_STORE_IF26:%.*]], label [[PRED_STORE_CONTINUE27:%.*]]
4729; UNROLL:       pred.store.if26:
4730; UNROLL-NEXT:    [[TMP66:%.*]] = or i32 [[INDEX]], 5
4731; UNROLL-NEXT:    [[TMP67:%.*]] = sext i32 [[TMP66]] to i64
4732; UNROLL-NEXT:    [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP67]]
4733; UNROLL-NEXT:    store i32 [[TMP5]], i32* [[TMP68]], align 4
4734; UNROLL-NEXT:    br label [[PRED_STORE_CONTINUE27]]
4735; UNROLL:       pred.store.continue27:
4736; UNROLL-NEXT:    [[TMP69:%.*]] = extractelement <4 x i1> [[TMP9]], i64 2
4737; UNROLL-NEXT:    br i1 [[TMP69]], label [[PRED_STORE_IF28:%.*]], label [[PRED_STORE_CONTINUE29:%.*]]
4738; UNROLL:       pred.store.if28:
4739; UNROLL-NEXT:    [[TMP70:%.*]] = or i32 [[INDEX]], 6
4740; UNROLL-NEXT:    [[TMP71:%.*]] = sext i32 [[TMP70]] to i64
4741; UNROLL-NEXT:    [[TMP72:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP71]]
4742; UNROLL-NEXT:    store i32 [[TMP6]], i32* [[TMP72]], align 4
4743; UNROLL-NEXT:    br label [[PRED_STORE_CONTINUE29]]
4744; UNROLL:       pred.store.continue29:
4745; UNROLL-NEXT:    [[TMP73:%.*]] = extractelement <4 x i1> [[TMP9]], i64 3
4746; UNROLL-NEXT:    br i1 [[TMP73]], label [[PRED_STORE_IF30:%.*]], label [[PRED_STORE_CONTINUE31]]
4747; UNROLL:       pred.store.if30:
4748; UNROLL-NEXT:    [[TMP74:%.*]] = or i32 [[INDEX]], 7
4749; UNROLL-NEXT:    [[TMP75:%.*]] = sext i32 [[TMP74]] to i64
4750; UNROLL-NEXT:    [[TMP76:%.*]] = getelementptr inbounds i32, i32* [[X]], i64 [[TMP75]]
4751; UNROLL-NEXT:    store i32 [[TMP7]], i32* [[TMP76]], align 4
4752; UNROLL-NEXT:    br label [[PRED_STORE_CONTINUE31]]
4753; UNROLL:       pred.store.continue31:
4754; UNROLL-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
4755; UNROLL-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 8, i32 8, i32 8, i32 8>
4756; UNROLL-NEXT:    [[TMP77:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4757; UNROLL-NEXT:    br i1 [[TMP77]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF24]], !llvm.loop [[LOOP28:![0-9]+]]
4758; UNROLL:       middle.block:
4759; UNROLL-NEXT:    [[TMP78:%.*]] = select <4 x i1> [[TMP9]], <4 x i32> [[TMP45]], <4 x i32> [[VEC_PHI3]]
4760; UNROLL-NEXT:    [[TMP79:%.*]] = select <4 x i1> [[TMP8]], <4 x i32> [[TMP44]], <4 x i32> [[VEC_PHI]]
4761; UNROLL-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP78]], [[TMP79]]
4762; UNROLL-NEXT:    [[TMP80:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
4763; UNROLL-NEXT:    br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
4764; UNROLL:       scalar.ph:
4765; UNROLL-NEXT:    br label [[BB2:%.*]]
4766; UNROLL:       bb1:
4767; UNROLL-NEXT:    [[VAR:%.*]] = phi i32 [ undef, [[BB2]] ], [ [[TMP80]], [[MIDDLE_BLOCK]] ]
4768; UNROLL-NEXT:    ret i32 [[VAR]]
4769; UNROLL:       bb2:
4770; UNROLL-NEXT:    br i1 undef, label [[BB1]], label [[BB2]], !prof [[PROF26]], !llvm.loop [[LOOP29:![0-9]+]]
4771;
4772; UNROLL-NO-IC-LABEL: @sink_into_replication_region_multiple(
4773; UNROLL-NO-IC-NEXT:  bb:
4774; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
4775; UNROLL-NO-IC-NEXT:    [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
4776; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
4777; UNROLL-NO-IC-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4778; UNROLL-NO-IC:       vector.ph:
4779; UNROLL-NO-IC-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP1]], 7
4780; UNROLL-NO-IC-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 8
4781; UNROLL-NO-IC-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
4782; UNROLL-NO-IC-NEXT:    [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]]
4783; UNROLL-NO-IC-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
4784; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
4785; UNROLL-NO-IC-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
4786; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
4787; UNROLL-NO-IC:       vector.body:
4788; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE31:%.*]] ]
4789; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE31]] ]
4790; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP43:%.*]], [[PRED_STORE_CONTINUE31]] ]
4791; UNROLL-NO-IC-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP46:%.*]], [[PRED_STORE_CONTINUE31]] ]
4792; UNROLL-NO-IC-NEXT:    [[VEC_PHI3:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP47:%.*]], [[PRED_STORE_CONTINUE31]] ]
4793; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
4794; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
4795; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0
4796; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1
4797; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], -2
4798; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], -3
4799; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = add i32 [[OFFSET_IDX]], -4
4800; UNROLL-NO-IC-NEXT:    [[TMP7:%.*]] = add i32 [[OFFSET_IDX]], -5
4801; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = add i32 [[OFFSET_IDX]], -6
4802; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = add i32 [[OFFSET_IDX]], -7
4803; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = icmp ule <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
4804; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = icmp ule <4 x i32> [[STEP_ADD]], [[BROADCAST_SPLAT]]
4805; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0
4806; UNROLL-NO-IC-NEXT:    br i1 [[TMP12]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
4807; UNROLL-NO-IC:       pred.udiv.if:
4808; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = udiv i32 219220132, [[TMP2]]
4809; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = insertelement <4 x i32> poison, i32 [[TMP13]], i32 0
4810; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE]]
4811; UNROLL-NO-IC:       pred.udiv.continue:
4812; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP14]], [[PRED_UDIV_IF]] ]
4813; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = extractelement <4 x i1> [[TMP10]], i32 1
4814; UNROLL-NO-IC-NEXT:    br i1 [[TMP16]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]]
4815; UNROLL-NO-IC:       pred.udiv.if4:
4816; UNROLL-NO-IC-NEXT:    [[TMP17:%.*]] = udiv i32 219220132, [[TMP3]]
4817; UNROLL-NO-IC-NEXT:    [[TMP18:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP17]], i32 1
4818; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE5]]
4819; UNROLL-NO-IC:       pred.udiv.continue5:
4820; UNROLL-NO-IC-NEXT:    [[TMP19:%.*]] = phi <4 x i32> [ [[TMP15]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP18]], [[PRED_UDIV_IF4]] ]
4821; UNROLL-NO-IC-NEXT:    [[TMP20:%.*]] = extractelement <4 x i1> [[TMP10]], i32 2
4822; UNROLL-NO-IC-NEXT:    br i1 [[TMP20]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]]
4823; UNROLL-NO-IC:       pred.udiv.if6:
4824; UNROLL-NO-IC-NEXT:    [[TMP21:%.*]] = udiv i32 219220132, [[TMP4]]
4825; UNROLL-NO-IC-NEXT:    [[TMP22:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP21]], i32 2
4826; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE7]]
4827; UNROLL-NO-IC:       pred.udiv.continue7:
4828; UNROLL-NO-IC-NEXT:    [[TMP23:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_UDIV_CONTINUE5]] ], [ [[TMP22]], [[PRED_UDIV_IF6]] ]
4829; UNROLL-NO-IC-NEXT:    [[TMP24:%.*]] = extractelement <4 x i1> [[TMP10]], i32 3
4830; UNROLL-NO-IC-NEXT:    br i1 [[TMP24]], label [[PRED_UDIV_IF8:%.*]], label [[PRED_UDIV_CONTINUE9:%.*]]
4831; UNROLL-NO-IC:       pred.udiv.if8:
4832; UNROLL-NO-IC-NEXT:    [[TMP25:%.*]] = udiv i32 219220132, [[TMP5]]
4833; UNROLL-NO-IC-NEXT:    [[TMP26:%.*]] = insertelement <4 x i32> [[TMP23]], i32 [[TMP25]], i32 3
4834; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE9]]
4835; UNROLL-NO-IC:       pred.udiv.continue9:
4836; UNROLL-NO-IC-NEXT:    [[TMP27:%.*]] = phi <4 x i32> [ [[TMP23]], [[PRED_UDIV_CONTINUE7]] ], [ [[TMP26]], [[PRED_UDIV_IF8]] ]
4837; UNROLL-NO-IC-NEXT:    [[TMP28:%.*]] = extractelement <4 x i1> [[TMP11]], i32 0
4838; UNROLL-NO-IC-NEXT:    br i1 [[TMP28]], label [[PRED_UDIV_IF10:%.*]], label [[PRED_UDIV_CONTINUE11:%.*]]
4839; UNROLL-NO-IC:       pred.udiv.if10:
4840; UNROLL-NO-IC-NEXT:    [[TMP29:%.*]] = udiv i32 219220132, [[TMP6]]
4841; UNROLL-NO-IC-NEXT:    [[TMP30:%.*]] = insertelement <4 x i32> poison, i32 [[TMP29]], i32 0
4842; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE11]]
4843; UNROLL-NO-IC:       pred.udiv.continue11:
4844; UNROLL-NO-IC-NEXT:    [[TMP31:%.*]] = phi <4 x i32> [ poison, [[PRED_UDIV_CONTINUE9]] ], [ [[TMP30]], [[PRED_UDIV_IF10]] ]
4845; UNROLL-NO-IC-NEXT:    [[TMP32:%.*]] = extractelement <4 x i1> [[TMP11]], i32 1
4846; UNROLL-NO-IC-NEXT:    br i1 [[TMP32]], label [[PRED_UDIV_IF12:%.*]], label [[PRED_UDIV_CONTINUE13:%.*]]
4847; UNROLL-NO-IC:       pred.udiv.if12:
4848; UNROLL-NO-IC-NEXT:    [[TMP33:%.*]] = udiv i32 219220132, [[TMP7]]
4849; UNROLL-NO-IC-NEXT:    [[TMP34:%.*]] = insertelement <4 x i32> [[TMP31]], i32 [[TMP33]], i32 1
4850; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE13]]
4851; UNROLL-NO-IC:       pred.udiv.continue13:
4852; UNROLL-NO-IC-NEXT:    [[TMP35:%.*]] = phi <4 x i32> [ [[TMP31]], [[PRED_UDIV_CONTINUE11]] ], [ [[TMP34]], [[PRED_UDIV_IF12]] ]
4853; UNROLL-NO-IC-NEXT:    [[TMP36:%.*]] = extractelement <4 x i1> [[TMP11]], i32 2
4854; UNROLL-NO-IC-NEXT:    br i1 [[TMP36]], label [[PRED_UDIV_IF14:%.*]], label [[PRED_UDIV_CONTINUE15:%.*]]
4855; UNROLL-NO-IC:       pred.udiv.if14:
4856; UNROLL-NO-IC-NEXT:    [[TMP37:%.*]] = udiv i32 219220132, [[TMP8]]
4857; UNROLL-NO-IC-NEXT:    [[TMP38:%.*]] = insertelement <4 x i32> [[TMP35]], i32 [[TMP37]], i32 2
4858; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE15]]
4859; UNROLL-NO-IC:       pred.udiv.continue15:
4860; UNROLL-NO-IC-NEXT:    [[TMP39:%.*]] = phi <4 x i32> [ [[TMP35]], [[PRED_UDIV_CONTINUE13]] ], [ [[TMP38]], [[PRED_UDIV_IF14]] ]
4861; UNROLL-NO-IC-NEXT:    [[TMP40:%.*]] = extractelement <4 x i1> [[TMP11]], i32 3
4862; UNROLL-NO-IC-NEXT:    br i1 [[TMP40]], label [[PRED_UDIV_IF16:%.*]], label [[PRED_UDIV_CONTINUE17:%.*]]
4863; UNROLL-NO-IC:       pred.udiv.if16:
4864; UNROLL-NO-IC-NEXT:    [[TMP41:%.*]] = udiv i32 219220132, [[TMP9]]
4865; UNROLL-NO-IC-NEXT:    [[TMP42:%.*]] = insertelement <4 x i32> [[TMP39]], i32 [[TMP41]], i32 3
4866; UNROLL-NO-IC-NEXT:    br label [[PRED_UDIV_CONTINUE17]]
4867; UNROLL-NO-IC:       pred.udiv.continue17:
4868; UNROLL-NO-IC-NEXT:    [[TMP43]] = phi <4 x i32> [ [[TMP39]], [[PRED_UDIV_CONTINUE15]] ], [ [[TMP42]], [[PRED_UDIV_IF16]] ]
4869; UNROLL-NO-IC-NEXT:    [[TMP44:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP27]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
4870; UNROLL-NO-IC-NEXT:    [[TMP45:%.*]] = shufflevector <4 x i32> [[TMP27]], <4 x i32> [[TMP43]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
4871; UNROLL-NO-IC-NEXT:    [[TMP46]] = add <4 x i32> [[VEC_PHI]], [[TMP44]]
4872; UNROLL-NO-IC-NEXT:    [[TMP47]] = add <4 x i32> [[VEC_PHI3]], [[TMP45]]
4873; UNROLL-NO-IC-NEXT:    [[TMP48:%.*]] = extractelement <4 x i1> [[TMP10]], i32 0
4874; UNROLL-NO-IC-NEXT:    br i1 [[TMP48]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
4875; UNROLL-NO-IC:       pred.store.if:
4876; UNROLL-NO-IC-NEXT:    [[TMP49:%.*]] = add i32 [[INDEX]], 0
4877; UNROLL-NO-IC-NEXT:    [[TMP50:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP49]]
4878; UNROLL-NO-IC-NEXT:    store i32 [[TMP2]], i32* [[TMP50]], align 4
4879; UNROLL-NO-IC-NEXT:    br label [[PRED_STORE_CONTINUE]]
4880; UNROLL-NO-IC:       pred.store.continue:
4881; UNROLL-NO-IC-NEXT:    [[TMP51:%.*]] = extractelement <4 x i1> [[TMP10]], i32 1
4882; UNROLL-NO-IC-NEXT:    br i1 [[TMP51]], label [[PRED_STORE_IF18:%.*]], label [[PRED_STORE_CONTINUE19:%.*]]
4883; UNROLL-NO-IC:       pred.store.if18:
4884; UNROLL-NO-IC-NEXT:    [[TMP52:%.*]] = add i32 [[INDEX]], 1
4885; UNROLL-NO-IC-NEXT:    [[TMP53:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP52]]
4886; UNROLL-NO-IC-NEXT:    store i32 [[TMP3]], i32* [[TMP53]], align 4
4887; UNROLL-NO-IC-NEXT:    br label [[PRED_STORE_CONTINUE19]]
4888; UNROLL-NO-IC:       pred.store.continue19:
4889; UNROLL-NO-IC-NEXT:    [[TMP54:%.*]] = extractelement <4 x i1> [[TMP10]], i32 2
4890; UNROLL-NO-IC-NEXT:    br i1 [[TMP54]], label [[PRED_STORE_IF20:%.*]], label [[PRED_STORE_CONTINUE21:%.*]]
4891; UNROLL-NO-IC:       pred.store.if20:
4892; UNROLL-NO-IC-NEXT:    [[TMP55:%.*]] = add i32 [[INDEX]], 2
4893; UNROLL-NO-IC-NEXT:    [[TMP56:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP55]]
4894; UNROLL-NO-IC-NEXT:    store i32 [[TMP4]], i32* [[TMP56]], align 4
4895; UNROLL-NO-IC-NEXT:    br label [[PRED_STORE_CONTINUE21]]
4896; UNROLL-NO-IC:       pred.store.continue21:
4897; UNROLL-NO-IC-NEXT:    [[TMP57:%.*]] = extractelement <4 x i1> [[TMP10]], i32 3
4898; UNROLL-NO-IC-NEXT:    br i1 [[TMP57]], label [[PRED_STORE_IF22:%.*]], label [[PRED_STORE_CONTINUE23:%.*]]
4899; UNROLL-NO-IC:       pred.store.if22:
4900; UNROLL-NO-IC-NEXT:    [[TMP58:%.*]] = add i32 [[INDEX]], 3
4901; UNROLL-NO-IC-NEXT:    [[TMP59:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP58]]
4902; UNROLL-NO-IC-NEXT:    store i32 [[TMP5]], i32* [[TMP59]], align 4
4903; UNROLL-NO-IC-NEXT:    br label [[PRED_STORE_CONTINUE23]]
4904; UNROLL-NO-IC:       pred.store.continue23:
4905; UNROLL-NO-IC-NEXT:    [[TMP60:%.*]] = extractelement <4 x i1> [[TMP11]], i32 0
4906; UNROLL-NO-IC-NEXT:    br i1 [[TMP60]], label [[PRED_STORE_IF24:%.*]], label [[PRED_STORE_CONTINUE25:%.*]]
4907; UNROLL-NO-IC:       pred.store.if24:
4908; UNROLL-NO-IC-NEXT:    [[TMP61:%.*]] = add i32 [[INDEX]], 4
4909; UNROLL-NO-IC-NEXT:    [[TMP62:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP61]]
4910; UNROLL-NO-IC-NEXT:    store i32 [[TMP6]], i32* [[TMP62]], align 4
4911; UNROLL-NO-IC-NEXT:    br label [[PRED_STORE_CONTINUE25]]
4912; UNROLL-NO-IC:       pred.store.continue25:
4913; UNROLL-NO-IC-NEXT:    [[TMP63:%.*]] = extractelement <4 x i1> [[TMP11]], i32 1
4914; UNROLL-NO-IC-NEXT:    br i1 [[TMP63]], label [[PRED_STORE_IF26:%.*]], label [[PRED_STORE_CONTINUE27:%.*]]
4915; UNROLL-NO-IC:       pred.store.if26:
4916; UNROLL-NO-IC-NEXT:    [[TMP64:%.*]] = add i32 [[INDEX]], 5
4917; UNROLL-NO-IC-NEXT:    [[TMP65:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP64]]
4918; UNROLL-NO-IC-NEXT:    store i32 [[TMP7]], i32* [[TMP65]], align 4
4919; UNROLL-NO-IC-NEXT:    br label [[PRED_STORE_CONTINUE27]]
4920; UNROLL-NO-IC:       pred.store.continue27:
4921; UNROLL-NO-IC-NEXT:    [[TMP66:%.*]] = extractelement <4 x i1> [[TMP11]], i32 2
4922; UNROLL-NO-IC-NEXT:    br i1 [[TMP66]], label [[PRED_STORE_IF28:%.*]], label [[PRED_STORE_CONTINUE29:%.*]]
4923; UNROLL-NO-IC:       pred.store.if28:
4924; UNROLL-NO-IC-NEXT:    [[TMP67:%.*]] = add i32 [[INDEX]], 6
4925; UNROLL-NO-IC-NEXT:    [[TMP68:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP67]]
4926; UNROLL-NO-IC-NEXT:    store i32 [[TMP8]], i32* [[TMP68]], align 4
4927; UNROLL-NO-IC-NEXT:    br label [[PRED_STORE_CONTINUE29]]
4928; UNROLL-NO-IC:       pred.store.continue29:
4929; UNROLL-NO-IC-NEXT:    [[TMP69:%.*]] = extractelement <4 x i1> [[TMP11]], i32 3
4930; UNROLL-NO-IC-NEXT:    br i1 [[TMP69]], label [[PRED_STORE_IF30:%.*]], label [[PRED_STORE_CONTINUE31]]
4931; UNROLL-NO-IC:       pred.store.if30:
4932; UNROLL-NO-IC-NEXT:    [[TMP70:%.*]] = add i32 [[INDEX]], 7
4933; UNROLL-NO-IC-NEXT:    [[TMP71:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP70]]
4934; UNROLL-NO-IC-NEXT:    store i32 [[TMP9]], i32* [[TMP71]], align 4
4935; UNROLL-NO-IC-NEXT:    br label [[PRED_STORE_CONTINUE31]]
4936; UNROLL-NO-IC:       pred.store.continue31:
4937; UNROLL-NO-IC-NEXT:    [[TMP72:%.*]] = select <4 x i1> [[TMP10]], <4 x i32> [[TMP46]], <4 x i32> [[VEC_PHI]]
4938; UNROLL-NO-IC-NEXT:    [[TMP73:%.*]] = select <4 x i1> [[TMP11]], <4 x i32> [[TMP47]], <4 x i32> [[VEC_PHI3]]
4939; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 8
4940; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[STEP_ADD]], <i32 4, i32 4, i32 4, i32 4>
4941; UNROLL-NO-IC-NEXT:    [[TMP74:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
4942; UNROLL-NO-IC-NEXT:    br i1 [[TMP74]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF24]], !llvm.loop [[LOOP28:![0-9]+]]
4943; UNROLL-NO-IC:       middle.block:
4944; UNROLL-NO-IC-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP73]], [[TMP72]]
4945; UNROLL-NO-IC-NEXT:    [[TMP75:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[BIN_RDX]])
4946; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP43]], i32 3
4947; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP43]], i32 2
4948; UNROLL-NO-IC-NEXT:    br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
4949; UNROLL-NO-IC:       scalar.ph:
4950; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
4951; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ]
4952; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ]
4953; UNROLL-NO-IC-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP75]], [[MIDDLE_BLOCK]] ]
4954; UNROLL-NO-IC-NEXT:    br label [[BB2:%.*]]
4955; UNROLL-NO-IC:       bb1:
4956; UNROLL-NO-IC-NEXT:    [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[TMP75]], [[MIDDLE_BLOCK]] ]
4957; UNROLL-NO-IC-NEXT:    ret i32 [[VAR]]
4958; UNROLL-NO-IC:       bb2:
4959; UNROLL-NO-IC-NEXT:    [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
4960; UNROLL-NO-IC-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
4961; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
4962; UNROLL-NO-IC-NEXT:    [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
4963; UNROLL-NO-IC-NEXT:    [[G:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[IV]]
4964; UNROLL-NO-IC-NEXT:    [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]]
4965; UNROLL-NO-IC-NEXT:    [[VAR7]] = udiv i32 219220132, [[VAR3]]
4966; UNROLL-NO-IC-NEXT:    store i32 [[VAR3]], i32* [[G]], align 4
4967; UNROLL-NO-IC-NEXT:    [[VAR8]] = add nsw i32 [[VAR3]], -1
4968; UNROLL-NO-IC-NEXT:    [[IV_NEXT]] = add nsw i32 [[IV]], 1
4969; UNROLL-NO-IC-NEXT:    [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
4970; UNROLL-NO-IC-NEXT:    br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF26]], !llvm.loop [[LOOP29:![0-9]+]]
4971;
4972; UNROLL-NO-VF-LABEL: @sink_into_replication_region_multiple(
4973; UNROLL-NO-VF-NEXT:  bb:
4974; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
4975; UNROLL-NO-VF-NEXT:    [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
4976; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
4977; UNROLL-NO-VF-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
4978; UNROLL-NO-VF:       vector.ph:
4979; UNROLL-NO-VF-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP1]], 1
4980; UNROLL-NO-VF-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 2
4981; UNROLL-NO-VF-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
4982; UNROLL-NO-VF-NEXT:    [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]]
4983; UNROLL-NO-VF-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
4984; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
4985; UNROLL-NO-VF:       vector.body:
4986; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE10:%.*]] ]
4987; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[PRED_STORE_CONTINUE10]] ]
4988; UNROLL-NO-VF-NEXT:    [[VEC_PHI:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[PRED_STORE_CONTINUE10]] ]
4989; UNROLL-NO-VF-NEXT:    [[VEC_PHI2:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP9:%.*]], [[PRED_STORE_CONTINUE10]] ]
4990; UNROLL-NO-VF-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
4991; UNROLL-NO-VF-NEXT:    [[INDUCTION4:%.*]] = add i32 [[OFFSET_IDX]], 0
4992; UNROLL-NO-VF-NEXT:    [[INDUCTION5:%.*]] = add i32 [[OFFSET_IDX]], -1
4993; UNROLL-NO-VF-NEXT:    [[VEC_IV:%.*]] = add i32 [[INDEX]], 0
4994; UNROLL-NO-VF-NEXT:    [[VEC_IV6:%.*]] = add i32 [[INDEX]], 1
4995; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = icmp ule i32 [[VEC_IV]], [[TRIP_COUNT_MINUS_1]]
4996; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = icmp ule i32 [[VEC_IV6]], [[TRIP_COUNT_MINUS_1]]
4997; UNROLL-NO-VF-NEXT:    br i1 [[TMP2]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
4998; UNROLL-NO-VF:       pred.udiv.if:
4999; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = udiv i32 219220132, [[INDUCTION4]]
5000; UNROLL-NO-VF-NEXT:    br label [[PRED_UDIV_CONTINUE]]
5001; UNROLL-NO-VF:       pred.udiv.continue:
5002; UNROLL-NO-VF-NEXT:    [[TMP5:%.*]] = phi i32 [ poison, [[VECTOR_BODY]] ], [ [[TMP4]], [[PRED_UDIV_IF]] ]
5003; UNROLL-NO-VF-NEXT:    br i1 [[TMP3]], label [[PRED_UDIV_IF7:%.*]], label [[PRED_UDIV_CONTINUE8:%.*]]
5004; UNROLL-NO-VF:       pred.udiv.if7:
5005; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = udiv i32 219220132, [[INDUCTION5]]
5006; UNROLL-NO-VF-NEXT:    br label [[PRED_UDIV_CONTINUE8]]
5007; UNROLL-NO-VF:       pred.udiv.continue8:
5008; UNROLL-NO-VF-NEXT:    [[TMP7]] = phi i32 [ poison, [[PRED_UDIV_CONTINUE]] ], [ [[TMP6]], [[PRED_UDIV_IF7]] ]
5009; UNROLL-NO-VF-NEXT:    [[TMP8]] = add i32 [[VEC_PHI]], [[VECTOR_RECUR]]
5010; UNROLL-NO-VF-NEXT:    [[TMP9]] = add i32 [[VEC_PHI2]], [[TMP5]]
5011; UNROLL-NO-VF-NEXT:    br i1 [[TMP2]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
5012; UNROLL-NO-VF:       pred.store.if:
5013; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i32 [[INDEX]], 0
5014; UNROLL-NO-VF-NEXT:    [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[INDUCTION]]
5015; UNROLL-NO-VF-NEXT:    store i32 [[INDUCTION4]], i32* [[TMP10]], align 4
5016; UNROLL-NO-VF-NEXT:    br label [[PRED_STORE_CONTINUE]]
5017; UNROLL-NO-VF:       pred.store.continue:
5018; UNROLL-NO-VF-NEXT:    br i1 [[TMP3]], label [[PRED_STORE_IF9:%.*]], label [[PRED_STORE_CONTINUE10]]
5019; UNROLL-NO-VF:       pred.store.if9:
5020; UNROLL-NO-VF-NEXT:    [[INDUCTION3:%.*]] = add i32 [[INDEX]], 1
5021; UNROLL-NO-VF-NEXT:    [[TMP11:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[INDUCTION3]]
5022; UNROLL-NO-VF-NEXT:    store i32 [[INDUCTION5]], i32* [[TMP11]], align 4
5023; UNROLL-NO-VF-NEXT:    br label [[PRED_STORE_CONTINUE10]]
5024; UNROLL-NO-VF:       pred.store.continue10:
5025; UNROLL-NO-VF-NEXT:    [[TMP12:%.*]] = select i1 [[TMP2]], i32 [[TMP8]], i32 [[VEC_PHI]]
5026; UNROLL-NO-VF-NEXT:    [[TMP13:%.*]] = select i1 [[TMP3]], i32 [[TMP9]], i32 [[VEC_PHI2]]
5027; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 2
5028; UNROLL-NO-VF-NEXT:    [[TMP14:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
5029; UNROLL-NO-VF-NEXT:    br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF23]], !llvm.loop [[LOOP27:![0-9]+]]
5030; UNROLL-NO-VF:       middle.block:
5031; UNROLL-NO-VF-NEXT:    [[BIN_RDX:%.*]] = add i32 [[TMP13]], [[TMP12]]
5032; UNROLL-NO-VF-NEXT:    br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
5033; UNROLL-NO-VF:       scalar.ph:
5034; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ]
5035; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ]
5036; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ]
5037; UNROLL-NO-VF-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
5038; UNROLL-NO-VF-NEXT:    br label [[BB2:%.*]]
5039; UNROLL-NO-VF:       bb1:
5040; UNROLL-NO-VF-NEXT:    [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[BIN_RDX]], [[MIDDLE_BLOCK]] ]
5041; UNROLL-NO-VF-NEXT:    ret i32 [[VAR]]
5042; UNROLL-NO-VF:       bb2:
5043; UNROLL-NO-VF-NEXT:    [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
5044; UNROLL-NO-VF-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
5045; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
5046; UNROLL-NO-VF-NEXT:    [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
5047; UNROLL-NO-VF-NEXT:    [[G:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[IV]]
5048; UNROLL-NO-VF-NEXT:    [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]]
5049; UNROLL-NO-VF-NEXT:    [[VAR7]] = udiv i32 219220132, [[VAR3]]
5050; UNROLL-NO-VF-NEXT:    store i32 [[VAR3]], i32* [[G]], align 4
5051; UNROLL-NO-VF-NEXT:    [[VAR8]] = add nsw i32 [[VAR3]], -1
5052; UNROLL-NO-VF-NEXT:    [[IV_NEXT]] = add nsw i32 [[IV]], 1
5053; UNROLL-NO-VF-NEXT:    [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
5054; UNROLL-NO-VF-NEXT:    br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF25]], !llvm.loop [[LOOP28:![0-9]+]]
5055;
5056; SINK-AFTER-LABEL: @sink_into_replication_region_multiple(
5057; SINK-AFTER-NEXT:  bb:
5058; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i32 [[Y:%.*]], 1
5059; SINK-AFTER-NEXT:    [[SMIN:%.*]] = call i32 @llvm.smin.i32(i32 [[Y]], i32 1)
5060; SINK-AFTER-NEXT:    [[TMP1:%.*]] = sub i32 [[TMP0]], [[SMIN]]
5061; SINK-AFTER-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5062; SINK-AFTER:       vector.ph:
5063; SINK-AFTER-NEXT:    [[N_RND_UP:%.*]] = add i32 [[TMP1]], 3
5064; SINK-AFTER-NEXT:    [[N_MOD_VF:%.*]] = urem i32 [[N_RND_UP]], 4
5065; SINK-AFTER-NEXT:    [[N_VEC:%.*]] = sub i32 [[N_RND_UP]], [[N_MOD_VF]]
5066; SINK-AFTER-NEXT:    [[IND_END:%.*]] = sub i32 [[Y]], [[N_VEC]]
5067; SINK-AFTER-NEXT:    [[TRIP_COUNT_MINUS_1:%.*]] = sub i32 [[TMP1]], 1
5068; SINK-AFTER-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[TRIP_COUNT_MINUS_1]], i32 0
5069; SINK-AFTER-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer
5070; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
5071; SINK-AFTER:       vector.body:
5072; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE13:%.*]] ]
5073; SINK-AFTER-NEXT:    [[VEC_IND:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_STORE_CONTINUE13]] ]
5074; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP22:%.*]], [[PRED_STORE_CONTINUE13]] ]
5075; SINK-AFTER-NEXT:    [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP24:%.*]], [[PRED_STORE_CONTINUE13]] ]
5076; SINK-AFTER-NEXT:    [[OFFSET_IDX:%.*]] = sub i32 [[Y]], [[INDEX]]
5077; SINK-AFTER-NEXT:    [[TMP2:%.*]] = add i32 [[OFFSET_IDX]], 0
5078; SINK-AFTER-NEXT:    [[TMP3:%.*]] = add i32 [[OFFSET_IDX]], -1
5079; SINK-AFTER-NEXT:    [[TMP4:%.*]] = add i32 [[OFFSET_IDX]], -2
5080; SINK-AFTER-NEXT:    [[TMP5:%.*]] = add i32 [[OFFSET_IDX]], -3
5081; SINK-AFTER-NEXT:    [[TMP6:%.*]] = icmp ule <4 x i32> [[VEC_IND]], [[BROADCAST_SPLAT]]
5082; SINK-AFTER-NEXT:    [[TMP7:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
5083; SINK-AFTER-NEXT:    br i1 [[TMP7]], label [[PRED_UDIV_IF:%.*]], label [[PRED_UDIV_CONTINUE:%.*]]
5084; SINK-AFTER:       pred.udiv.if:
5085; SINK-AFTER-NEXT:    [[TMP8:%.*]] = udiv i32 219220132, [[TMP2]]
5086; SINK-AFTER-NEXT:    [[TMP9:%.*]] = insertelement <4 x i32> poison, i32 [[TMP8]], i32 0
5087; SINK-AFTER-NEXT:    br label [[PRED_UDIV_CONTINUE]]
5088; SINK-AFTER:       pred.udiv.continue:
5089; SINK-AFTER-NEXT:    [[TMP10:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_UDIV_IF]] ]
5090; SINK-AFTER-NEXT:    [[TMP11:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1
5091; SINK-AFTER-NEXT:    br i1 [[TMP11]], label [[PRED_UDIV_IF2:%.*]], label [[PRED_UDIV_CONTINUE3:%.*]]
5092; SINK-AFTER:       pred.udiv.if2:
5093; SINK-AFTER-NEXT:    [[TMP12:%.*]] = udiv i32 219220132, [[TMP3]]
5094; SINK-AFTER-NEXT:    [[TMP13:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP12]], i32 1
5095; SINK-AFTER-NEXT:    br label [[PRED_UDIV_CONTINUE3]]
5096; SINK-AFTER:       pred.udiv.continue3:
5097; SINK-AFTER-NEXT:    [[TMP14:%.*]] = phi <4 x i32> [ [[TMP10]], [[PRED_UDIV_CONTINUE]] ], [ [[TMP13]], [[PRED_UDIV_IF2]] ]
5098; SINK-AFTER-NEXT:    [[TMP15:%.*]] = extractelement <4 x i1> [[TMP6]], i32 2
5099; SINK-AFTER-NEXT:    br i1 [[TMP15]], label [[PRED_UDIV_IF4:%.*]], label [[PRED_UDIV_CONTINUE5:%.*]]
5100; SINK-AFTER:       pred.udiv.if4:
5101; SINK-AFTER-NEXT:    [[TMP16:%.*]] = udiv i32 219220132, [[TMP4]]
5102; SINK-AFTER-NEXT:    [[TMP17:%.*]] = insertelement <4 x i32> [[TMP14]], i32 [[TMP16]], i32 2
5103; SINK-AFTER-NEXT:    br label [[PRED_UDIV_CONTINUE5]]
5104; SINK-AFTER:       pred.udiv.continue5:
5105; SINK-AFTER-NEXT:    [[TMP18:%.*]] = phi <4 x i32> [ [[TMP14]], [[PRED_UDIV_CONTINUE3]] ], [ [[TMP17]], [[PRED_UDIV_IF4]] ]
5106; SINK-AFTER-NEXT:    [[TMP19:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3
5107; SINK-AFTER-NEXT:    br i1 [[TMP19]], label [[PRED_UDIV_IF6:%.*]], label [[PRED_UDIV_CONTINUE7:%.*]]
5108; SINK-AFTER:       pred.udiv.if6:
5109; SINK-AFTER-NEXT:    [[TMP20:%.*]] = udiv i32 219220132, [[TMP5]]
5110; SINK-AFTER-NEXT:    [[TMP21:%.*]] = insertelement <4 x i32> [[TMP18]], i32 [[TMP20]], i32 3
5111; SINK-AFTER-NEXT:    br label [[PRED_UDIV_CONTINUE7]]
5112; SINK-AFTER:       pred.udiv.continue7:
5113; SINK-AFTER-NEXT:    [[TMP22]] = phi <4 x i32> [ [[TMP18]], [[PRED_UDIV_CONTINUE5]] ], [ [[TMP21]], [[PRED_UDIV_IF6]] ]
5114; SINK-AFTER-NEXT:    [[TMP23:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP22]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
5115; SINK-AFTER-NEXT:    [[TMP24]] = add <4 x i32> [[VEC_PHI]], [[TMP23]]
5116; SINK-AFTER-NEXT:    [[TMP25:%.*]] = extractelement <4 x i1> [[TMP6]], i32 0
5117; SINK-AFTER-NEXT:    br i1 [[TMP25]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
5118; SINK-AFTER:       pred.store.if:
5119; SINK-AFTER-NEXT:    [[TMP26:%.*]] = add i32 [[INDEX]], 0
5120; SINK-AFTER-NEXT:    [[TMP27:%.*]] = getelementptr inbounds i32, i32* [[X:%.*]], i32 [[TMP26]]
5121; SINK-AFTER-NEXT:    store i32 [[TMP2]], i32* [[TMP27]], align 4
5122; SINK-AFTER-NEXT:    br label [[PRED_STORE_CONTINUE]]
5123; SINK-AFTER:       pred.store.continue:
5124; SINK-AFTER-NEXT:    [[TMP28:%.*]] = extractelement <4 x i1> [[TMP6]], i32 1
5125; SINK-AFTER-NEXT:    br i1 [[TMP28]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]]
5126; SINK-AFTER:       pred.store.if8:
5127; SINK-AFTER-NEXT:    [[TMP29:%.*]] = add i32 [[INDEX]], 1
5128; SINK-AFTER-NEXT:    [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP29]]
5129; SINK-AFTER-NEXT:    store i32 [[TMP3]], i32* [[TMP30]], align 4
5130; SINK-AFTER-NEXT:    br label [[PRED_STORE_CONTINUE9]]
5131; SINK-AFTER:       pred.store.continue9:
5132; SINK-AFTER-NEXT:    [[TMP31:%.*]] = extractelement <4 x i1> [[TMP6]], i32 2
5133; SINK-AFTER-NEXT:    br i1 [[TMP31]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]]
5134; SINK-AFTER:       pred.store.if10:
5135; SINK-AFTER-NEXT:    [[TMP32:%.*]] = add i32 [[INDEX]], 2
5136; SINK-AFTER-NEXT:    [[TMP33:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP32]]
5137; SINK-AFTER-NEXT:    store i32 [[TMP4]], i32* [[TMP33]], align 4
5138; SINK-AFTER-NEXT:    br label [[PRED_STORE_CONTINUE11]]
5139; SINK-AFTER:       pred.store.continue11:
5140; SINK-AFTER-NEXT:    [[TMP34:%.*]] = extractelement <4 x i1> [[TMP6]], i32 3
5141; SINK-AFTER-NEXT:    br i1 [[TMP34]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13]]
5142; SINK-AFTER:       pred.store.if12:
5143; SINK-AFTER-NEXT:    [[TMP35:%.*]] = add i32 [[INDEX]], 3
5144; SINK-AFTER-NEXT:    [[TMP36:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[TMP35]]
5145; SINK-AFTER-NEXT:    store i32 [[TMP5]], i32* [[TMP36]], align 4
5146; SINK-AFTER-NEXT:    br label [[PRED_STORE_CONTINUE13]]
5147; SINK-AFTER:       pred.store.continue13:
5148; SINK-AFTER-NEXT:    [[TMP37:%.*]] = select <4 x i1> [[TMP6]], <4 x i32> [[TMP24]], <4 x i32> [[VEC_PHI]]
5149; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add i32 [[INDEX]], 4
5150; SINK-AFTER-NEXT:    [[VEC_IND_NEXT]] = add <4 x i32> [[VEC_IND]], <i32 4, i32 4, i32 4, i32 4>
5151; SINK-AFTER-NEXT:    [[TMP38:%.*]] = icmp eq i32 [[INDEX_NEXT]], [[N_VEC]]
5152; SINK-AFTER-NEXT:    br i1 [[TMP38]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !prof [[PROF24]], !llvm.loop [[LOOP28:![0-9]+]]
5153; SINK-AFTER:       middle.block:
5154; SINK-AFTER-NEXT:    [[TMP39:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP37]])
5155; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP22]], i32 3
5156; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP22]], i32 2
5157; SINK-AFTER-NEXT:    br i1 true, label [[BB1:%.*]], label [[SCALAR_PH]]
5158; SINK-AFTER:       scalar.ph:
5159; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[BB:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
5160; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i32 [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[Y]], [[BB]] ]
5161; SINK-AFTER-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi i32 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[BB]] ]
5162; SINK-AFTER-NEXT:    [[BC_MERGE_RDX:%.*]] = phi i32 [ 0, [[BB]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ]
5163; SINK-AFTER-NEXT:    br label [[BB2:%.*]]
5164; SINK-AFTER:       bb1:
5165; SINK-AFTER-NEXT:    [[VAR:%.*]] = phi i32 [ [[VAR6:%.*]], [[BB2]] ], [ [[TMP39]], [[MIDDLE_BLOCK]] ]
5166; SINK-AFTER-NEXT:    ret i32 [[VAR]]
5167; SINK-AFTER:       bb2:
5168; SINK-AFTER-NEXT:    [[VAR3:%.*]] = phi i32 [ [[VAR8:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
5169; SINK-AFTER-NEXT:    [[IV:%.*]] = phi i32 [ [[IV_NEXT:%.*]], [[BB2]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
5170; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[VAR7:%.*]], [[BB2]] ], [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ]
5171; SINK-AFTER-NEXT:    [[VAR5:%.*]] = phi i32 [ [[VAR6]], [[BB2]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ]
5172; SINK-AFTER-NEXT:    [[G:%.*]] = getelementptr inbounds i32, i32* [[X]], i32 [[IV]]
5173; SINK-AFTER-NEXT:    [[VAR6]] = add i32 [[VAR5]], [[SCALAR_RECUR]]
5174; SINK-AFTER-NEXT:    [[VAR7]] = udiv i32 219220132, [[VAR3]]
5175; SINK-AFTER-NEXT:    store i32 [[VAR3]], i32* [[G]], align 4
5176; SINK-AFTER-NEXT:    [[VAR8]] = add nsw i32 [[VAR3]], -1
5177; SINK-AFTER-NEXT:    [[IV_NEXT]] = add nsw i32 [[IV]], 1
5178; SINK-AFTER-NEXT:    [[VAR9:%.*]] = icmp slt i32 [[VAR3]], 2
5179; SINK-AFTER-NEXT:    br i1 [[VAR9]], label [[BB1]], label [[BB2]], !prof [[PROF26]], !llvm.loop [[LOOP29:![0-9]+]]
5180;
5181bb:
5182  br label %bb2
5183
5184  bb1:                                              ; preds = %bb2
5185  %var = phi i32 [ %var6, %bb2 ]
5186  ret i32 %var
5187
5188  bb2:                                              ; preds = %bb2, %bb
5189  %var3 = phi i32 [ %var8, %bb2 ], [ %y, %bb ]
5190  %iv = phi i32 [ %iv.next, %bb2 ], [ 0, %bb ]
5191  %var4 = phi i32 [ %var7, %bb2 ], [ 0, %bb ]
5192  %var5 = phi i32 [ %var6, %bb2 ], [ 0, %bb ]
5193  %g = getelementptr inbounds i32, i32* %x, i32 %iv
5194  %var6 = add i32 %var5, %var4
5195  %var7 = udiv i32 219220132, %var3
5196  store i32 %var3, i32* %g, align 4
5197  %var8 = add nsw i32 %var3, -1
5198  %iv.next = add nsw i32 %iv, 1
5199  %var9 = icmp slt i32 %var3, 2
5200  br i1 %var9, label %bb1, label %bb2, !prof !2
5201}
5202
5203; %vec.dead will be marked as dead instruction in the vector loop and no recipe
5204; will be created for it. Make sure a valid sink target is used.
5205define void @sink_after_dead_inst(i32* %A.ptr) {
5206; CHECK-LABEL: @sink_after_dead_inst(
5207; CHECK-NEXT:  entry:
5208; CHECK-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5209; CHECK:       vector.ph:
5210; CHECK-NEXT:    br label [[VECTOR_BODY:%.*]]
5211; CHECK:       vector.body:
5212; CHECK-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5213; CHECK-NEXT:    [[OFFSET_IDX:%.*]] = zext i32 [[INDEX]] to i64
5214; CHECK-NEXT:    [[SEXT:%.*]] = shl i64 [[OFFSET_IDX]], 48
5215; CHECK-NEXT:    [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 48
5216; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr i32, i32* [[A_PTR:%.*]], i64 [[TMP0]]
5217; CHECK-NEXT:    [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>*
5218; CHECK-NEXT:    store <4 x i32> zeroinitializer, <4 x i32>* [[TMP2]], align 4
5219; CHECK-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
5220; CHECK-NEXT:    [[TMP3:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
5221; CHECK-NEXT:    br i1 [[TMP3]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
5222; CHECK:       middle.block:
5223; CHECK-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
5224; CHECK:       scalar.ph:
5225; CHECK-NEXT:    br label [[LOOP:%.*]]
5226; CHECK:       loop:
5227; CHECK-NEXT:    br i1 undef, label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP31:![0-9]+]]
5228; CHECK:       for.end:
5229; CHECK-NEXT:    ret void
5230;
5231; UNROLL-LABEL: @sink_after_dead_inst(
5232; UNROLL-NEXT:  entry:
5233; UNROLL-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5234; UNROLL:       vector.ph:
5235; UNROLL-NEXT:    br label [[VECTOR_BODY:%.*]]
5236; UNROLL:       vector.body:
5237; UNROLL-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5238; UNROLL-NEXT:    [[OFFSET_IDX:%.*]] = zext i32 [[INDEX]] to i64
5239; UNROLL-NEXT:    [[SEXT:%.*]] = shl i64 [[OFFSET_IDX]], 48
5240; UNROLL-NEXT:    [[TMP0:%.*]] = ashr exact i64 [[SEXT]], 48
5241; UNROLL-NEXT:    [[TMP1:%.*]] = getelementptr i32, i32* [[A_PTR:%.*]], i64 [[TMP0]]
5242; UNROLL-NEXT:    [[TMP2:%.*]] = bitcast i32* [[TMP1]] to <4 x i32>*
5243; UNROLL-NEXT:    store <4 x i32> zeroinitializer, <4 x i32>* [[TMP2]], align 4
5244; UNROLL-NEXT:    [[TMP3:%.*]] = getelementptr i32, i32* [[TMP1]], i64 4
5245; UNROLL-NEXT:    [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>*
5246; UNROLL-NEXT:    store <4 x i32> zeroinitializer, <4 x i32>* [[TMP4]], align 4
5247; UNROLL-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
5248; UNROLL-NEXT:    [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
5249; UNROLL-NEXT:    br i1 [[TMP5]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
5250; UNROLL:       middle.block:
5251; UNROLL-NEXT:    br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]]
5252; UNROLL:       scalar.ph:
5253; UNROLL-NEXT:    br label [[LOOP:%.*]]
5254; UNROLL:       loop:
5255; UNROLL-NEXT:    br i1 undef, label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP31:![0-9]+]]
5256; UNROLL:       for.end:
5257; UNROLL-NEXT:    ret void
5258;
5259; UNROLL-NO-IC-LABEL: @sink_after_dead_inst(
5260; UNROLL-NO-IC-NEXT:  entry:
5261; UNROLL-NO-IC-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5262; UNROLL-NO-IC:       vector.ph:
5263; UNROLL-NO-IC-NEXT:    br label [[VECTOR_BODY:%.*]]
5264; UNROLL-NO-IC:       vector.body:
5265; UNROLL-NO-IC-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5266; UNROLL-NO-IC-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
5267; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP7:%.*]], [[VECTOR_BODY]] ]
5268; UNROLL-NO-IC-NEXT:    [[STEP_ADD:%.*]] = add <4 x i16> [[VEC_IND]], <i16 4, i16 4, i16 4, i16 4>
5269; UNROLL-NO-IC-NEXT:    [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
5270; UNROLL-NO-IC-NEXT:    [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0
5271; UNROLL-NO-IC-NEXT:    [[TMP1:%.*]] = add i16 [[OFFSET_IDX]], 4
5272; UNROLL-NO-IC-NEXT:    [[TMP2:%.*]] = add <4 x i16> [[VEC_IND]], <i16 1, i16 1, i16 1, i16 1>
5273; UNROLL-NO-IC-NEXT:    [[TMP3:%.*]] = add <4 x i16> [[STEP_ADD]], <i16 1, i16 1, i16 1, i16 1>
5274; UNROLL-NO-IC-NEXT:    [[TMP4:%.*]] = or <4 x i16> [[TMP2]], [[TMP2]]
5275; UNROLL-NO-IC-NEXT:    [[TMP5:%.*]] = or <4 x i16> [[TMP3]], [[TMP3]]
5276; UNROLL-NO-IC-NEXT:    [[TMP6:%.*]] = zext <4 x i16> [[TMP4]] to <4 x i32>
5277; UNROLL-NO-IC-NEXT:    [[TMP7]] = zext <4 x i16> [[TMP5]] to <4 x i32>
5278; UNROLL-NO-IC-NEXT:    [[TMP8:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP6]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
5279; UNROLL-NO-IC-NEXT:    [[TMP9:%.*]] = shufflevector <4 x i32> [[TMP6]], <4 x i32> [[TMP7]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
5280; UNROLL-NO-IC-NEXT:    [[TMP10:%.*]] = getelementptr i32, i32* [[A_PTR:%.*]], i16 [[TMP0]]
5281; UNROLL-NO-IC-NEXT:    [[TMP11:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[TMP1]]
5282; UNROLL-NO-IC-NEXT:    [[TMP12:%.*]] = getelementptr i32, i32* [[TMP10]], i32 0
5283; UNROLL-NO-IC-NEXT:    [[TMP13:%.*]] = bitcast i32* [[TMP12]] to <4 x i32>*
5284; UNROLL-NO-IC-NEXT:    store <4 x i32> zeroinitializer, <4 x i32>* [[TMP13]], align 4
5285; UNROLL-NO-IC-NEXT:    [[TMP14:%.*]] = getelementptr i32, i32* [[TMP10]], i32 4
5286; UNROLL-NO-IC-NEXT:    [[TMP15:%.*]] = bitcast i32* [[TMP14]] to <4 x i32>*
5287; UNROLL-NO-IC-NEXT:    store <4 x i32> zeroinitializer, <4 x i32>* [[TMP15]], align 4
5288; UNROLL-NO-IC-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 8
5289; UNROLL-NO-IC-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[STEP_ADD]], <i16 4, i16 4, i16 4, i16 4>
5290; UNROLL-NO-IC-NEXT:    [[TMP16:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
5291; UNROLL-NO-IC-NEXT:    br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
5292; UNROLL-NO-IC:       middle.block:
5293; UNROLL-NO-IC-NEXT:    [[CMP_N:%.*]] = icmp eq i32 16, 16
5294; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP7]], i32 3
5295; UNROLL-NO-IC-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP7]], i32 2
5296; UNROLL-NO-IC-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
5297; UNROLL-NO-IC:       scalar.ph:
5298; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
5299; UNROLL-NO-IC-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
5300; UNROLL-NO-IC-NEXT:    br label [[LOOP:%.*]]
5301; UNROLL-NO-IC:       loop:
5302; UNROLL-NO-IC-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
5303; UNROLL-NO-IC-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ]
5304; UNROLL-NO-IC-NEXT:    [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR]], 15
5305; UNROLL-NO-IC-NEXT:    [[C:%.*]] = icmp eq i1 [[CMP]], true
5306; UNROLL-NO-IC-NEXT:    [[VEC_DEAD:%.*]] = and i1 [[C]], true
5307; UNROLL-NO-IC-NEXT:    [[IV_NEXT]] = add i16 [[IV]], 1
5308; UNROLL-NO-IC-NEXT:    [[B1:%.*]] = or i16 [[IV_NEXT]], [[IV_NEXT]]
5309; UNROLL-NO-IC-NEXT:    [[B3:%.*]] = and i1 [[CMP]], [[C]]
5310; UNROLL-NO-IC-NEXT:    [[FOR_PREV]] = zext i16 [[B1]] to i32
5311; UNROLL-NO-IC-NEXT:    [[EXT:%.*]] = zext i1 [[B3]] to i32
5312; UNROLL-NO-IC-NEXT:    [[A_GEP:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[IV]]
5313; UNROLL-NO-IC-NEXT:    store i32 0, i32* [[A_GEP]], align 4
5314; UNROLL-NO-IC-NEXT:    br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP31:![0-9]+]]
5315; UNROLL-NO-IC:       for.end:
5316; UNROLL-NO-IC-NEXT:    ret void
5317;
5318; UNROLL-NO-VF-LABEL: @sink_after_dead_inst(
5319; UNROLL-NO-VF-NEXT:  entry:
5320; UNROLL-NO-VF-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5321; UNROLL-NO-VF:       vector.ph:
5322; UNROLL-NO-VF-NEXT:    br label [[VECTOR_BODY:%.*]]
5323; UNROLL-NO-VF:       vector.body:
5324; UNROLL-NO-VF-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5325; UNROLL-NO-VF-NEXT:    [[VECTOR_RECUR:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ]
5326; UNROLL-NO-VF-NEXT:    [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
5327; UNROLL-NO-VF-NEXT:    [[INDUCTION:%.*]] = add i16 [[OFFSET_IDX]], 0
5328; UNROLL-NO-VF-NEXT:    [[INDUCTION1:%.*]] = add i16 [[OFFSET_IDX]], 1
5329; UNROLL-NO-VF-NEXT:    [[TMP0:%.*]] = add i16 [[INDUCTION]], 1
5330; UNROLL-NO-VF-NEXT:    [[TMP1:%.*]] = add i16 [[INDUCTION1]], 1
5331; UNROLL-NO-VF-NEXT:    [[TMP2:%.*]] = or i16 [[TMP0]], [[TMP0]]
5332; UNROLL-NO-VF-NEXT:    [[TMP3:%.*]] = or i16 [[TMP1]], [[TMP1]]
5333; UNROLL-NO-VF-NEXT:    [[TMP4:%.*]] = zext i16 [[TMP2]] to i32
5334; UNROLL-NO-VF-NEXT:    [[TMP5]] = zext i16 [[TMP3]] to i32
5335; UNROLL-NO-VF-NEXT:    [[TMP6:%.*]] = getelementptr i32, i32* [[A_PTR:%.*]], i16 [[INDUCTION]]
5336; UNROLL-NO-VF-NEXT:    [[TMP7:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[INDUCTION1]]
5337; UNROLL-NO-VF-NEXT:    store i32 0, i32* [[TMP6]], align 4
5338; UNROLL-NO-VF-NEXT:    store i32 0, i32* [[TMP7]], align 4
5339; UNROLL-NO-VF-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 2
5340; UNROLL-NO-VF-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
5341; UNROLL-NO-VF-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP29:![0-9]+]]
5342; UNROLL-NO-VF:       middle.block:
5343; UNROLL-NO-VF-NEXT:    [[CMP_N:%.*]] = icmp eq i32 16, 16
5344; UNROLL-NO-VF-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
5345; UNROLL-NO-VF:       scalar.ph:
5346; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[TMP5]], [[MIDDLE_BLOCK]] ]
5347; UNROLL-NO-VF-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
5348; UNROLL-NO-VF-NEXT:    br label [[LOOP:%.*]]
5349; UNROLL-NO-VF:       loop:
5350; UNROLL-NO-VF-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
5351; UNROLL-NO-VF-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ]
5352; UNROLL-NO-VF-NEXT:    [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR]], 15
5353; UNROLL-NO-VF-NEXT:    [[C:%.*]] = icmp eq i1 [[CMP]], true
5354; UNROLL-NO-VF-NEXT:    [[VEC_DEAD:%.*]] = and i1 [[C]], true
5355; UNROLL-NO-VF-NEXT:    [[IV_NEXT]] = add i16 [[IV]], 1
5356; UNROLL-NO-VF-NEXT:    [[B1:%.*]] = or i16 [[IV_NEXT]], [[IV_NEXT]]
5357; UNROLL-NO-VF-NEXT:    [[B3:%.*]] = and i1 [[CMP]], [[C]]
5358; UNROLL-NO-VF-NEXT:    [[FOR_PREV]] = zext i16 [[B1]] to i32
5359; UNROLL-NO-VF-NEXT:    [[EXT:%.*]] = zext i1 [[B3]] to i32
5360; UNROLL-NO-VF-NEXT:    [[A_GEP:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[IV]]
5361; UNROLL-NO-VF-NEXT:    store i32 0, i32* [[A_GEP]], align 4
5362; UNROLL-NO-VF-NEXT:    br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP30:![0-9]+]]
5363; UNROLL-NO-VF:       for.end:
5364; UNROLL-NO-VF-NEXT:    ret void
5365;
5366; SINK-AFTER-LABEL: @sink_after_dead_inst(
5367; SINK-AFTER-NEXT:  entry:
5368; SINK-AFTER-NEXT:    br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
5369; SINK-AFTER:       vector.ph:
5370; SINK-AFTER-NEXT:    br label [[VECTOR_BODY:%.*]]
5371; SINK-AFTER:       vector.body:
5372; SINK-AFTER-NEXT:    [[INDEX:%.*]] = phi i32 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
5373; SINK-AFTER-NEXT:    [[VEC_IND:%.*]] = phi <4 x i16> [ <i16 0, i16 1, i16 2, i16 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
5374; SINK-AFTER-NEXT:    [[VECTOR_RECUR:%.*]] = phi <4 x i32> [ <i32 poison, i32 poison, i32 poison, i32 0>, [[VECTOR_PH]] ], [ [[TMP3:%.*]], [[VECTOR_BODY]] ]
5375; SINK-AFTER-NEXT:    [[OFFSET_IDX:%.*]] = trunc i32 [[INDEX]] to i16
5376; SINK-AFTER-NEXT:    [[TMP0:%.*]] = add i16 [[OFFSET_IDX]], 0
5377; SINK-AFTER-NEXT:    [[TMP1:%.*]] = add <4 x i16> [[VEC_IND]], <i16 1, i16 1, i16 1, i16 1>
5378; SINK-AFTER-NEXT:    [[TMP2:%.*]] = or <4 x i16> [[TMP1]], [[TMP1]]
5379; SINK-AFTER-NEXT:    [[TMP3]] = zext <4 x i16> [[TMP2]] to <4 x i32>
5380; SINK-AFTER-NEXT:    [[TMP4:%.*]] = shufflevector <4 x i32> [[VECTOR_RECUR]], <4 x i32> [[TMP3]], <4 x i32> <i32 3, i32 4, i32 5, i32 6>
5381; SINK-AFTER-NEXT:    [[TMP5:%.*]] = getelementptr i32, i32* [[A_PTR:%.*]], i16 [[TMP0]]
5382; SINK-AFTER-NEXT:    [[TMP6:%.*]] = getelementptr i32, i32* [[TMP5]], i32 0
5383; SINK-AFTER-NEXT:    [[TMP7:%.*]] = bitcast i32* [[TMP6]] to <4 x i32>*
5384; SINK-AFTER-NEXT:    store <4 x i32> zeroinitializer, <4 x i32>* [[TMP7]], align 4
5385; SINK-AFTER-NEXT:    [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4
5386; SINK-AFTER-NEXT:    [[VEC_IND_NEXT]] = add <4 x i16> [[VEC_IND]], <i16 4, i16 4, i16 4, i16 4>
5387; SINK-AFTER-NEXT:    [[TMP8:%.*]] = icmp eq i32 [[INDEX_NEXT]], 16
5388; SINK-AFTER-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP30:![0-9]+]]
5389; SINK-AFTER:       middle.block:
5390; SINK-AFTER-NEXT:    [[CMP_N:%.*]] = icmp eq i32 16, 16
5391; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT:%.*]] = extractelement <4 x i32> [[TMP3]], i32 3
5392; SINK-AFTER-NEXT:    [[VECTOR_RECUR_EXTRACT_FOR_PHI:%.*]] = extractelement <4 x i32> [[TMP3]], i32 2
5393; SINK-AFTER-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
5394; SINK-AFTER:       scalar.ph:
5395; SINK-AFTER-NEXT:    [[SCALAR_RECUR_INIT:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[VECTOR_RECUR_EXTRACT]], [[MIDDLE_BLOCK]] ]
5396; SINK-AFTER-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i16 [ 16, [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY]] ]
5397; SINK-AFTER-NEXT:    br label [[LOOP:%.*]]
5398; SINK-AFTER:       loop:
5399; SINK-AFTER-NEXT:    [[IV:%.*]] = phi i16 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[LOOP]] ]
5400; SINK-AFTER-NEXT:    [[SCALAR_RECUR:%.*]] = phi i32 [ [[SCALAR_RECUR_INIT]], [[SCALAR_PH]] ], [ [[FOR_PREV:%.*]], [[LOOP]] ]
5401; SINK-AFTER-NEXT:    [[CMP:%.*]] = icmp eq i32 [[SCALAR_RECUR]], 15
5402; SINK-AFTER-NEXT:    [[C:%.*]] = icmp eq i1 [[CMP]], true
5403; SINK-AFTER-NEXT:    [[VEC_DEAD:%.*]] = and i1 [[C]], true
5404; SINK-AFTER-NEXT:    [[IV_NEXT]] = add i16 [[IV]], 1
5405; SINK-AFTER-NEXT:    [[B1:%.*]] = or i16 [[IV_NEXT]], [[IV_NEXT]]
5406; SINK-AFTER-NEXT:    [[B3:%.*]] = and i1 [[CMP]], [[C]]
5407; SINK-AFTER-NEXT:    [[FOR_PREV]] = zext i16 [[B1]] to i32
5408; SINK-AFTER-NEXT:    [[EXT:%.*]] = zext i1 [[B3]] to i32
5409; SINK-AFTER-NEXT:    [[A_GEP:%.*]] = getelementptr i32, i32* [[A_PTR]], i16 [[IV]]
5410; SINK-AFTER-NEXT:    store i32 0, i32* [[A_GEP]], align 4
5411; SINK-AFTER-NEXT:    br i1 [[VEC_DEAD]], label [[FOR_END]], label [[LOOP]], !llvm.loop [[LOOP31:![0-9]+]]
5412; SINK-AFTER:       for.end:
5413; SINK-AFTER-NEXT:    ret void
5414;
5415entry:
5416  br label %loop
5417
5418loop:
5419  %iv = phi i16 [ 0, %entry ], [ %iv.next, %loop ]
5420  %for = phi i32 [ 0, %entry ], [ %for.prev, %loop ]
5421  %cmp = icmp eq i32 %for, 15
5422  %C = icmp eq i1 %cmp, true
5423  %vec.dead = and i1 %C, 1
5424  %iv.next = add i16 %iv, 1
5425  %B1 = or i16 %iv.next, %iv.next
5426  %B3 = and i1 %cmp, %C
5427  %for.prev = zext i16 %B1 to i32
5428
5429  %ext = zext i1 %B3 to i32
5430  %A.gep = getelementptr i32, i32* %A.ptr, i16 %iv
5431  store i32 0, i32* %A.gep
5432  br i1 %vec.dead, label %for.end, label %loop
5433
5434for.end:
5435  ret void
5436}
5437
5438!2 = !{!"branch_weights", i32 1, i32 1}
5439