1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL1 %s
3; RUN: opt < %s  -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL2 %s
4; RUN: opt < %s  -loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -dce -instcombine -S | FileCheck --check-prefix VEC1_INTERL2 %s
5; RUN: opt < %s  -loop-vectorize -force-vector-interleave=1 -force-vector-width=2 -dce -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -instcombine -simplifycfg -simplifycfg-require-and-preserve-domtree=1 -keep-loops=false -S | FileCheck --check-prefix VEC2_INTERL1_PRED_STORE %s
6
7@fp_inc = common global float 0.000000e+00, align 4
8
9;void fp_iv_loop1(float init, float * __restrict__ A, int N) {
10;  float x = init;
11;  for (int i=0; i < N; ++i) {
12;    A[i] = x;
13;    x -= fp_inc;
14;  }
15;}
16
17
18
19
20define void @fp_iv_loop1_fast_FMF(float %init, float* noalias nocapture %A, i32 %N) {
21; VEC4_INTERL1-LABEL: @fp_iv_loop1_fast_FMF(
22; VEC4_INTERL1-NEXT:  entry:
23; VEC4_INTERL1-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
24; VEC4_INTERL1-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
25; VEC4_INTERL1:       for.body.lr.ph:
26; VEC4_INTERL1-NEXT:    [[FPINC:%.*]] = load float, float* @fp_inc, align 4
27; VEC4_INTERL1-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
28; VEC4_INTERL1-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
29; VEC4_INTERL1-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
30; VEC4_INTERL1-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
31; VEC4_INTERL1-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
32; VEC4_INTERL1:       vector.ph:
33; VEC4_INTERL1-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
34; VEC4_INTERL1-NEXT:    [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
35; VEC4_INTERL1-NEXT:    [[TMP3:%.*]] = fmul fast float [[FPINC]], [[CAST_CRD]]
36; VEC4_INTERL1-NEXT:    [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]]
37; VEC4_INTERL1-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
38; VEC4_INTERL1-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
39; VEC4_INTERL1-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
40; VEC4_INTERL1-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
41; VEC4_INTERL1-NEXT:    [[TMP4:%.*]] = fmul fast <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
42; VEC4_INTERL1-NEXT:    [[INDUCTION:%.*]] = fsub fast <4 x float> [[DOTSPLAT]], [[TMP4]]
43; VEC4_INTERL1-NEXT:    [[TMP5:%.*]] = fmul fast float [[FPINC]], 4.000000e+00
44; VEC4_INTERL1-NEXT:    [[DOTSPLATINSERT4:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0
45; VEC4_INTERL1-NEXT:    [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT4]], <4 x float> poison, <4 x i32> zeroinitializer
46; VEC4_INTERL1-NEXT:    br label [[VECTOR_BODY:%.*]]
47; VEC4_INTERL1:       vector.body:
48; VEC4_INTERL1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
49; VEC4_INTERL1-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
50; VEC4_INTERL1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
51; VEC4_INTERL1-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
52; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND]], <4 x float>* [[TMP7]], align 4
53; VEC4_INTERL1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
54; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT]] = fsub fast <4 x float> [[VEC_IND]], [[DOTSPLAT5]]
55; VEC4_INTERL1-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
56; VEC4_INTERL1-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
57; VEC4_INTERL1:       middle.block:
58; VEC4_INTERL1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
59; VEC4_INTERL1-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
60; VEC4_INTERL1:       scalar.ph:
61; VEC4_INTERL1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ]
62; VEC4_INTERL1-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ]
63; VEC4_INTERL1-NEXT:    br label [[FOR_BODY:%.*]]
64; VEC4_INTERL1:       for.body:
65; VEC4_INTERL1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
66; VEC4_INTERL1-NEXT:    [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
67; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
68; VEC4_INTERL1-NEXT:    store float [[X_05]], float* [[ARRAYIDX]], align 4
69; VEC4_INTERL1-NEXT:    [[ADD]] = fsub fast float [[X_05]], [[FPINC]]
70; VEC4_INTERL1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
71; VEC4_INTERL1-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
72; VEC4_INTERL1-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
73; VEC4_INTERL1-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
74; VEC4_INTERL1:       for.end.loopexit:
75; VEC4_INTERL1-NEXT:    br label [[FOR_END]]
76; VEC4_INTERL1:       for.end:
77; VEC4_INTERL1-NEXT:    ret void
78;
79; VEC4_INTERL2-LABEL: @fp_iv_loop1_fast_FMF(
80; VEC4_INTERL2-NEXT:  entry:
81; VEC4_INTERL2-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
82; VEC4_INTERL2-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
83; VEC4_INTERL2:       for.body.lr.ph:
84; VEC4_INTERL2-NEXT:    [[FPINC:%.*]] = load float, float* @fp_inc, align 4
85; VEC4_INTERL2-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
86; VEC4_INTERL2-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
87; VEC4_INTERL2-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
88; VEC4_INTERL2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7
89; VEC4_INTERL2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
90; VEC4_INTERL2:       vector.ph:
91; VEC4_INTERL2-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584
92; VEC4_INTERL2-NEXT:    [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
93; VEC4_INTERL2-NEXT:    [[TMP3:%.*]] = fmul fast float [[FPINC]], [[CAST_CRD]]
94; VEC4_INTERL2-NEXT:    [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]]
95; VEC4_INTERL2-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
96; VEC4_INTERL2-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
97; VEC4_INTERL2-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
98; VEC4_INTERL2-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
99; VEC4_INTERL2-NEXT:    [[TMP4:%.*]] = fmul fast <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
100; VEC4_INTERL2-NEXT:    [[INDUCTION:%.*]] = fsub fast <4 x float> [[DOTSPLAT]], [[TMP4]]
101; VEC4_INTERL2-NEXT:    [[TMP5:%.*]] = fmul fast float [[FPINC]], 4.000000e+00
102; VEC4_INTERL2-NEXT:    [[DOTSPLATINSERT4:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0
103; VEC4_INTERL2-NEXT:    [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT4]], <4 x float> poison, <4 x i32> zeroinitializer
104; VEC4_INTERL2-NEXT:    br label [[VECTOR_BODY:%.*]]
105; VEC4_INTERL2:       vector.body:
106; VEC4_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
107; VEC4_INTERL2-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
108; VEC4_INTERL2-NEXT:    [[STEP_ADD:%.*]] = fsub fast <4 x float> [[VEC_IND]], [[DOTSPLAT5]]
109; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
110; VEC4_INTERL2-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
111; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND]], <4 x float>* [[TMP7]], align 4
112; VEC4_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP6]], i64 4
113; VEC4_INTERL2-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
114; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD]], <4 x float>* [[TMP9]], align 4
115; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
116; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT]] = fsub fast <4 x float> [[STEP_ADD]], [[DOTSPLAT5]]
117; VEC4_INTERL2-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
118; VEC4_INTERL2-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
119; VEC4_INTERL2:       middle.block:
120; VEC4_INTERL2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
121; VEC4_INTERL2-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
122; VEC4_INTERL2:       scalar.ph:
123; VEC4_INTERL2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ]
124; VEC4_INTERL2-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ]
125; VEC4_INTERL2-NEXT:    br label [[FOR_BODY:%.*]]
126; VEC4_INTERL2:       for.body:
127; VEC4_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
128; VEC4_INTERL2-NEXT:    [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
129; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
130; VEC4_INTERL2-NEXT:    store float [[X_05]], float* [[ARRAYIDX]], align 4
131; VEC4_INTERL2-NEXT:    [[ADD]] = fsub fast float [[X_05]], [[FPINC]]
132; VEC4_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
133; VEC4_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
134; VEC4_INTERL2-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
135; VEC4_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
136; VEC4_INTERL2:       for.end.loopexit:
137; VEC4_INTERL2-NEXT:    br label [[FOR_END]]
138; VEC4_INTERL2:       for.end:
139; VEC4_INTERL2-NEXT:    ret void
140;
141; VEC1_INTERL2-LABEL: @fp_iv_loop1_fast_FMF(
142; VEC1_INTERL2-NEXT:  entry:
143; VEC1_INTERL2-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
144; VEC1_INTERL2-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
145; VEC1_INTERL2:       for.body.lr.ph:
146; VEC1_INTERL2-NEXT:    [[FPINC:%.*]] = load float, float* @fp_inc, align 4
147; VEC1_INTERL2-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
148; VEC1_INTERL2-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
149; VEC1_INTERL2-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
150; VEC1_INTERL2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0
151; VEC1_INTERL2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
152; VEC1_INTERL2:       vector.ph:
153; VEC1_INTERL2-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590
154; VEC1_INTERL2-NEXT:    [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
155; VEC1_INTERL2-NEXT:    [[TMP3:%.*]] = fmul fast float [[FPINC]], [[CAST_CRD]]
156; VEC1_INTERL2-NEXT:    [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]]
157; VEC1_INTERL2-NEXT:    br label [[VECTOR_BODY:%.*]]
158; VEC1_INTERL2:       vector.body:
159; VEC1_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
160; VEC1_INTERL2-NEXT:    [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1
161; VEC1_INTERL2-NEXT:    [[TMP4:%.*]] = sitofp i64 [[INDEX]] to float
162; VEC1_INTERL2-NEXT:    [[TMP5:%.*]] = fmul fast float [[FPINC]], [[TMP4]]
163; VEC1_INTERL2-NEXT:    [[OFFSET_IDX:%.*]] = fsub fast float [[INIT]], [[TMP5]]
164; VEC1_INTERL2-NEXT:    [[TMP6:%.*]] = fsub fast float [[OFFSET_IDX]], [[FPINC]]
165; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
166; VEC1_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]]
167; VEC1_INTERL2-NEXT:    store float [[OFFSET_IDX]], float* [[TMP7]], align 4
168; VEC1_INTERL2-NEXT:    store float [[TMP6]], float* [[TMP8]], align 4
169; VEC1_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
170; VEC1_INTERL2-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
171; VEC1_INTERL2-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
172; VEC1_INTERL2:       middle.block:
173; VEC1_INTERL2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
174; VEC1_INTERL2-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
175; VEC1_INTERL2:       scalar.ph:
176; VEC1_INTERL2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ]
177; VEC1_INTERL2-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ]
178; VEC1_INTERL2-NEXT:    br label [[FOR_BODY:%.*]]
179; VEC1_INTERL2:       for.body:
180; VEC1_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
181; VEC1_INTERL2-NEXT:    [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
182; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
183; VEC1_INTERL2-NEXT:    store float [[X_05]], float* [[ARRAYIDX]], align 4
184; VEC1_INTERL2-NEXT:    [[ADD]] = fsub fast float [[X_05]], [[FPINC]]
185; VEC1_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
186; VEC1_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
187; VEC1_INTERL2-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
188; VEC1_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
189; VEC1_INTERL2:       for.end.loopexit:
190; VEC1_INTERL2-NEXT:    br label [[FOR_END]]
191; VEC1_INTERL2:       for.end:
192; VEC1_INTERL2-NEXT:    ret void
193;
194; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop1_fast_FMF(
195; VEC2_INTERL1_PRED_STORE-NEXT:  entry:
196; VEC2_INTERL1_PRED_STORE-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
197; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
198; VEC2_INTERL1_PRED_STORE:       for.body.lr.ph:
199; VEC2_INTERL1_PRED_STORE-NEXT:    [[FPINC:%.*]] = load float, float* @fp_inc, align 4
200; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
201; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
202; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
203; VEC2_INTERL1_PRED_STORE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0
204; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]]
205; VEC2_INTERL1_PRED_STORE:       vector.ph:
206; VEC2_INTERL1_PRED_STORE-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590
207; VEC2_INTERL1_PRED_STORE-NEXT:    [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
208; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP3:%.*]] = fmul fast float [[FPINC]], [[CAST_CRD]]
209; VEC2_INTERL1_PRED_STORE-NEXT:    [[IND_END:%.*]] = fsub fast float [[INIT:%.*]], [[TMP3]]
210; VEC2_INTERL1_PRED_STORE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
211; VEC2_INTERL1_PRED_STORE-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
212; VEC2_INTERL1_PRED_STORE-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0
213; VEC2_INTERL1_PRED_STORE-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer
214; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP4:%.*]] = fmul fast <2 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00>
215; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDUCTION:%.*]] = fsub fast <2 x float> [[DOTSPLAT]], [[TMP4]]
216; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP5:%.*]] = fmul fast float [[FPINC]], 2.000000e+00
217; VEC2_INTERL1_PRED_STORE-NEXT:    [[DOTSPLATINSERT4:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i64 0
218; VEC2_INTERL1_PRED_STORE-NEXT:    [[DOTSPLAT5:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT4]], <2 x float> poison, <2 x i32> zeroinitializer
219; VEC2_INTERL1_PRED_STORE-NEXT:    br label [[VECTOR_BODY:%.*]]
220; VEC2_INTERL1_PRED_STORE:       vector.body:
221; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
222; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
223; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
224; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <2 x float>*
225; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND]], <2 x float>* [[TMP7]], align 4
226; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
227; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT]] = fsub fast <2 x float> [[VEC_IND]], [[DOTSPLAT5]]
228; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
229; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
230; VEC2_INTERL1_PRED_STORE:       middle.block:
231; VEC2_INTERL1_PRED_STORE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
232; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]]
233; VEC2_INTERL1_PRED_STORE:       for.body:
234; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ]
235; VEC2_INTERL1_PRED_STORE-NEXT:    [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ]
236; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
237; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[X_05]], float* [[ARRAYIDX]], align 4
238; VEC2_INTERL1_PRED_STORE-NEXT:    [[ADD]] = fsub fast float [[X_05]], [[FPINC]]
239; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
240; VEC2_INTERL1_PRED_STORE-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
241; VEC2_INTERL1_PRED_STORE-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
242; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]]
243; VEC2_INTERL1_PRED_STORE:       for.end:
244; VEC2_INTERL1_PRED_STORE-NEXT:    ret void
245;
246entry:
247  %cmp4 = icmp sgt i32 %N, 0
248  br i1 %cmp4, label %for.body.lr.ph, label %for.end
249
250for.body.lr.ph:                                   ; preds = %entry
251  %fpinc = load float, float* @fp_inc, align 4
252  br label %for.body
253
254for.body:                                         ; preds = %for.body, %for.body.lr.ph
255  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
256  %x.05 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ]
257  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
258  store float %x.05, float* %arrayidx, align 4
259  %add = fsub fast float %x.05, %fpinc
260  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
261  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
262  %exitcond = icmp eq i32 %lftr.wideiv, %N
263  br i1 %exitcond, label %for.end.loopexit, label %for.body
264
265for.end.loopexit:                                 ; preds = %for.body
266  br label %for.end
267
268for.end:                                          ; preds = %for.end.loopexit, %entry
269  ret void
270}
271
272; We do not need the full 'fast' FMF to vectorize the loop, but the code can't become
273; 'fast' spontaneously - FMF should propagate from the original IR.
274
275define void @fp_iv_loop1_reassoc_FMF(float %init, float* noalias nocapture %A, i32 %N) {
276;
277;
278;
279;
280; VEC4_INTERL1-LABEL: @fp_iv_loop1_reassoc_FMF(
281; VEC4_INTERL1-NEXT:  entry:
282; VEC4_INTERL1-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
283; VEC4_INTERL1-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
284; VEC4_INTERL1:       for.body.lr.ph:
285; VEC4_INTERL1-NEXT:    [[FPINC:%.*]] = load float, float* @fp_inc, align 4
286; VEC4_INTERL1-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
287; VEC4_INTERL1-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
288; VEC4_INTERL1-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
289; VEC4_INTERL1-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
290; VEC4_INTERL1-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
291; VEC4_INTERL1:       vector.ph:
292; VEC4_INTERL1-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
293; VEC4_INTERL1-NEXT:    [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
294; VEC4_INTERL1-NEXT:    [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[CAST_CRD]]
295; VEC4_INTERL1-NEXT:    [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP3]]
296; VEC4_INTERL1-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
297; VEC4_INTERL1-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
298; VEC4_INTERL1-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
299; VEC4_INTERL1-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
300; VEC4_INTERL1-NEXT:    [[TMP4:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
301; VEC4_INTERL1-NEXT:    [[INDUCTION:%.*]] = fsub reassoc <4 x float> [[DOTSPLAT]], [[TMP4]]
302; VEC4_INTERL1-NEXT:    [[TMP5:%.*]] = fmul reassoc float [[FPINC]], 4.000000e+00
303; VEC4_INTERL1-NEXT:    [[DOTSPLATINSERT4:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0
304; VEC4_INTERL1-NEXT:    [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT4]], <4 x float> poison, <4 x i32> zeroinitializer
305; VEC4_INTERL1-NEXT:    br label [[VECTOR_BODY:%.*]]
306; VEC4_INTERL1:       vector.body:
307; VEC4_INTERL1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
308; VEC4_INTERL1-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
309; VEC4_INTERL1-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
310; VEC4_INTERL1-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
311; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND]], <4 x float>* [[TMP7]], align 4
312; VEC4_INTERL1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
313; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT]] = fsub reassoc <4 x float> [[VEC_IND]], [[DOTSPLAT5]]
314; VEC4_INTERL1-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
315; VEC4_INTERL1-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
316; VEC4_INTERL1:       middle.block:
317; VEC4_INTERL1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
318; VEC4_INTERL1-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
319; VEC4_INTERL1:       scalar.ph:
320; VEC4_INTERL1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ]
321; VEC4_INTERL1-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ]
322; VEC4_INTERL1-NEXT:    br label [[FOR_BODY:%.*]]
323; VEC4_INTERL1:       for.body:
324; VEC4_INTERL1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
325; VEC4_INTERL1-NEXT:    [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
326; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
327; VEC4_INTERL1-NEXT:    store float [[X_05]], float* [[ARRAYIDX]], align 4
328; VEC4_INTERL1-NEXT:    [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]]
329; VEC4_INTERL1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
330; VEC4_INTERL1-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
331; VEC4_INTERL1-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
332; VEC4_INTERL1-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
333; VEC4_INTERL1:       for.end.loopexit:
334; VEC4_INTERL1-NEXT:    br label [[FOR_END]]
335; VEC4_INTERL1:       for.end:
336; VEC4_INTERL1-NEXT:    ret void
337;
338; VEC4_INTERL2-LABEL: @fp_iv_loop1_reassoc_FMF(
339; VEC4_INTERL2-NEXT:  entry:
340; VEC4_INTERL2-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
341; VEC4_INTERL2-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
342; VEC4_INTERL2:       for.body.lr.ph:
343; VEC4_INTERL2-NEXT:    [[FPINC:%.*]] = load float, float* @fp_inc, align 4
344; VEC4_INTERL2-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
345; VEC4_INTERL2-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
346; VEC4_INTERL2-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
347; VEC4_INTERL2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7
348; VEC4_INTERL2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
349; VEC4_INTERL2:       vector.ph:
350; VEC4_INTERL2-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584
351; VEC4_INTERL2-NEXT:    [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
352; VEC4_INTERL2-NEXT:    [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[CAST_CRD]]
353; VEC4_INTERL2-NEXT:    [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP3]]
354; VEC4_INTERL2-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
355; VEC4_INTERL2-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
356; VEC4_INTERL2-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <4 x float> poison, float [[FPINC]], i64 0
357; VEC4_INTERL2-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT2]], <4 x float> poison, <4 x i32> zeroinitializer
358; VEC4_INTERL2-NEXT:    [[TMP4:%.*]] = fmul reassoc <4 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
359; VEC4_INTERL2-NEXT:    [[INDUCTION:%.*]] = fsub reassoc <4 x float> [[DOTSPLAT]], [[TMP4]]
360; VEC4_INTERL2-NEXT:    [[TMP5:%.*]] = fmul reassoc float [[FPINC]], 4.000000e+00
361; VEC4_INTERL2-NEXT:    [[DOTSPLATINSERT4:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0
362; VEC4_INTERL2-NEXT:    [[DOTSPLAT5:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT4]], <4 x float> poison, <4 x i32> zeroinitializer
363; VEC4_INTERL2-NEXT:    br label [[VECTOR_BODY:%.*]]
364; VEC4_INTERL2:       vector.body:
365; VEC4_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
366; VEC4_INTERL2-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
367; VEC4_INTERL2-NEXT:    [[STEP_ADD:%.*]] = fsub reassoc <4 x float> [[VEC_IND]], [[DOTSPLAT5]]
368; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
369; VEC4_INTERL2-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
370; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND]], <4 x float>* [[TMP7]], align 4
371; VEC4_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[TMP6]], i64 4
372; VEC4_INTERL2-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
373; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD]], <4 x float>* [[TMP9]], align 4
374; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
375; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT]] = fsub reassoc <4 x float> [[STEP_ADD]], [[DOTSPLAT5]]
376; VEC4_INTERL2-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
377; VEC4_INTERL2-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
378; VEC4_INTERL2:       middle.block:
379; VEC4_INTERL2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
380; VEC4_INTERL2-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
381; VEC4_INTERL2:       scalar.ph:
382; VEC4_INTERL2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ]
383; VEC4_INTERL2-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ]
384; VEC4_INTERL2-NEXT:    br label [[FOR_BODY:%.*]]
385; VEC4_INTERL2:       for.body:
386; VEC4_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
387; VEC4_INTERL2-NEXT:    [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
388; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
389; VEC4_INTERL2-NEXT:    store float [[X_05]], float* [[ARRAYIDX]], align 4
390; VEC4_INTERL2-NEXT:    [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]]
391; VEC4_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
392; VEC4_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
393; VEC4_INTERL2-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
394; VEC4_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
395; VEC4_INTERL2:       for.end.loopexit:
396; VEC4_INTERL2-NEXT:    br label [[FOR_END]]
397; VEC4_INTERL2:       for.end:
398; VEC4_INTERL2-NEXT:    ret void
399;
400; VEC1_INTERL2-LABEL: @fp_iv_loop1_reassoc_FMF(
401; VEC1_INTERL2-NEXT:  entry:
402; VEC1_INTERL2-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
403; VEC1_INTERL2-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
404; VEC1_INTERL2:       for.body.lr.ph:
405; VEC1_INTERL2-NEXT:    [[FPINC:%.*]] = load float, float* @fp_inc, align 4
406; VEC1_INTERL2-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
407; VEC1_INTERL2-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
408; VEC1_INTERL2-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
409; VEC1_INTERL2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0
410; VEC1_INTERL2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
411; VEC1_INTERL2:       vector.ph:
412; VEC1_INTERL2-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590
413; VEC1_INTERL2-NEXT:    [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
414; VEC1_INTERL2-NEXT:    [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[CAST_CRD]]
415; VEC1_INTERL2-NEXT:    [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP3]]
416; VEC1_INTERL2-NEXT:    br label [[VECTOR_BODY:%.*]]
417; VEC1_INTERL2:       vector.body:
418; VEC1_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
419; VEC1_INTERL2-NEXT:    [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1
420; VEC1_INTERL2-NEXT:    [[TMP4:%.*]] = sitofp i64 [[INDEX]] to float
421; VEC1_INTERL2-NEXT:    [[TMP5:%.*]] = fmul reassoc float [[FPINC]], [[TMP4]]
422; VEC1_INTERL2-NEXT:    [[OFFSET_IDX:%.*]] = fsub reassoc float [[INIT]], [[TMP5]]
423; VEC1_INTERL2-NEXT:    [[TMP6:%.*]] = fmul reassoc float [[FPINC]], 0.000000e+00
424; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = fsub reassoc float [[OFFSET_IDX]], [[TMP6]]
425; VEC1_INTERL2-NEXT:    [[TMP8:%.*]] = fsub reassoc float [[OFFSET_IDX]], [[FPINC]]
426; VEC1_INTERL2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
427; VEC1_INTERL2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]]
428; VEC1_INTERL2-NEXT:    store float [[TMP7]], float* [[TMP9]], align 4
429; VEC1_INTERL2-NEXT:    store float [[TMP8]], float* [[TMP10]], align 4
430; VEC1_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
431; VEC1_INTERL2-NEXT:    [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
432; VEC1_INTERL2-NEXT:    br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP3:![0-9]+]]
433; VEC1_INTERL2:       middle.block:
434; VEC1_INTERL2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
435; VEC1_INTERL2-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
436; VEC1_INTERL2:       scalar.ph:
437; VEC1_INTERL2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ]
438; VEC1_INTERL2-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ]
439; VEC1_INTERL2-NEXT:    br label [[FOR_BODY:%.*]]
440; VEC1_INTERL2:       for.body:
441; VEC1_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
442; VEC1_INTERL2-NEXT:    [[X_05:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
443; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
444; VEC1_INTERL2-NEXT:    store float [[X_05]], float* [[ARRAYIDX]], align 4
445; VEC1_INTERL2-NEXT:    [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]]
446; VEC1_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
447; VEC1_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
448; VEC1_INTERL2-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
449; VEC1_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
450; VEC1_INTERL2:       for.end.loopexit:
451; VEC1_INTERL2-NEXT:    br label [[FOR_END]]
452; VEC1_INTERL2:       for.end:
453; VEC1_INTERL2-NEXT:    ret void
454;
455; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop1_reassoc_FMF(
456; VEC2_INTERL1_PRED_STORE-NEXT:  entry:
457; VEC2_INTERL1_PRED_STORE-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
458; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
459; VEC2_INTERL1_PRED_STORE:       for.body.lr.ph:
460; VEC2_INTERL1_PRED_STORE-NEXT:    [[FPINC:%.*]] = load float, float* @fp_inc, align 4
461; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
462; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
463; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
464; VEC2_INTERL1_PRED_STORE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0
465; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]]
466; VEC2_INTERL1_PRED_STORE:       vector.ph:
467; VEC2_INTERL1_PRED_STORE-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590
468; VEC2_INTERL1_PRED_STORE-NEXT:    [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
469; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP3:%.*]] = fmul reassoc float [[FPINC]], [[CAST_CRD]]
470; VEC2_INTERL1_PRED_STORE-NEXT:    [[IND_END:%.*]] = fsub reassoc float [[INIT:%.*]], [[TMP3]]
471; VEC2_INTERL1_PRED_STORE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
472; VEC2_INTERL1_PRED_STORE-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
473; VEC2_INTERL1_PRED_STORE-NEXT:    [[DOTSPLATINSERT2:%.*]] = insertelement <2 x float> poison, float [[FPINC]], i64 0
474; VEC2_INTERL1_PRED_STORE-NEXT:    [[DOTSPLAT3:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT2]], <2 x float> poison, <2 x i32> zeroinitializer
475; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP4:%.*]] = fmul reassoc <2 x float> [[DOTSPLAT3]], <float 0.000000e+00, float 1.000000e+00>
476; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDUCTION:%.*]] = fsub reassoc <2 x float> [[DOTSPLAT]], [[TMP4]]
477; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP5:%.*]] = fmul reassoc float [[FPINC]], 2.000000e+00
478; VEC2_INTERL1_PRED_STORE-NEXT:    [[DOTSPLATINSERT4:%.*]] = insertelement <2 x float> poison, float [[TMP5]], i64 0
479; VEC2_INTERL1_PRED_STORE-NEXT:    [[DOTSPLAT5:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT4]], <2 x float> poison, <2 x i32> zeroinitializer
480; VEC2_INTERL1_PRED_STORE-NEXT:    br label [[VECTOR_BODY:%.*]]
481; VEC2_INTERL1_PRED_STORE:       vector.body:
482; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
483; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
484; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
485; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <2 x float>*
486; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND]], <2 x float>* [[TMP7]], align 4
487; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
488; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT]] = fsub reassoc <2 x float> [[VEC_IND]], [[DOTSPLAT5]]
489; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
490; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]]
491; VEC2_INTERL1_PRED_STORE:       middle.block:
492; VEC2_INTERL1_PRED_STORE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
493; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]]
494; VEC2_INTERL1_PRED_STORE:       for.body:
495; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ]
496; VEC2_INTERL1_PRED_STORE-NEXT:    [[X_05:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ]
497; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
498; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[X_05]], float* [[ARRAYIDX]], align 4
499; VEC2_INTERL1_PRED_STORE-NEXT:    [[ADD]] = fsub reassoc float [[X_05]], [[FPINC]]
500; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
501; VEC2_INTERL1_PRED_STORE-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
502; VEC2_INTERL1_PRED_STORE-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
503; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
504; VEC2_INTERL1_PRED_STORE:       for.end:
505; VEC2_INTERL1_PRED_STORE-NEXT:    ret void
506;
507entry:
508  %cmp4 = icmp sgt i32 %N, 0
509  br i1 %cmp4, label %for.body.lr.ph, label %for.end
510
511for.body.lr.ph:                                   ; preds = %entry
512  %fpinc = load float, float* @fp_inc, align 4
513  br label %for.body
514
515for.body:                                         ; preds = %for.body, %for.body.lr.ph
516  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
517  %x.05 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ]
518  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
519  store float %x.05, float* %arrayidx, align 4
520  %add = fsub reassoc float %x.05, %fpinc
521  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
522  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
523  %exitcond = icmp eq i32 %lftr.wideiv, %N
524  br i1 %exitcond, label %for.end.loopexit, label %for.body
525
526for.end.loopexit:                                 ; preds = %for.body
527  br label %for.end
528
529for.end:                                          ; preds = %for.end.loopexit, %entry
530  ret void
531}
532
533;void fp_iv_loop2(float init, float * __restrict__ A, int N) {
534;  float x = init;
535;  for (int i=0; i < N; ++i) {
536;    A[i] = x;
537;    x += 0.5;
538;  }
539;}
540
541
542define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 {
543; VEC4_INTERL1-LABEL: @fp_iv_loop2(
544; VEC4_INTERL1-NEXT:  entry:
545; VEC4_INTERL1-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
546; VEC4_INTERL1-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
547; VEC4_INTERL1:       for.body.preheader:
548; VEC4_INTERL1-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
549; VEC4_INTERL1-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
550; VEC4_INTERL1-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
551; VEC4_INTERL1-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
552; VEC4_INTERL1-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
553; VEC4_INTERL1:       vector.ph:
554; VEC4_INTERL1-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
555; VEC4_INTERL1-NEXT:    [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
556; VEC4_INTERL1-NEXT:    [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01
557; VEC4_INTERL1-NEXT:    [[IND_END:%.*]] = fadd fast float [[TMP3]], [[INIT:%.*]]
558; VEC4_INTERL1-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
559; VEC4_INTERL1-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
560; VEC4_INTERL1-NEXT:    [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float 1.500000e+00>
561; VEC4_INTERL1-NEXT:    br label [[VECTOR_BODY:%.*]]
562; VEC4_INTERL1:       vector.body:
563; VEC4_INTERL1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
564; VEC4_INTERL1-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
565; VEC4_INTERL1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
566; VEC4_INTERL1-NEXT:    [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>*
567; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND]], <4 x float>* [[TMP5]], align 4
568; VEC4_INTERL1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
569; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
570; VEC4_INTERL1-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
571; VEC4_INTERL1-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
572; VEC4_INTERL1:       middle.block:
573; VEC4_INTERL1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
574; VEC4_INTERL1-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
575; VEC4_INTERL1:       scalar.ph:
576; VEC4_INTERL1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
577; VEC4_INTERL1-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_PREHEADER]] ]
578; VEC4_INTERL1-NEXT:    br label [[FOR_BODY:%.*]]
579; VEC4_INTERL1:       for.body:
580; VEC4_INTERL1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
581; VEC4_INTERL1-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
582; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
583; VEC4_INTERL1-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
584; VEC4_INTERL1-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
585; VEC4_INTERL1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
586; VEC4_INTERL1-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
587; VEC4_INTERL1-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
588; VEC4_INTERL1-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
589; VEC4_INTERL1:       for.end.loopexit:
590; VEC4_INTERL1-NEXT:    br label [[FOR_END]]
591; VEC4_INTERL1:       for.end:
592; VEC4_INTERL1-NEXT:    ret void
593;
594; VEC4_INTERL2-LABEL: @fp_iv_loop2(
595; VEC4_INTERL2-NEXT:  entry:
596; VEC4_INTERL2-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
597; VEC4_INTERL2-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
598; VEC4_INTERL2:       for.body.preheader:
599; VEC4_INTERL2-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
600; VEC4_INTERL2-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
601; VEC4_INTERL2-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
602; VEC4_INTERL2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7
603; VEC4_INTERL2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
604; VEC4_INTERL2:       vector.ph:
605; VEC4_INTERL2-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584
606; VEC4_INTERL2-NEXT:    [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
607; VEC4_INTERL2-NEXT:    [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01
608; VEC4_INTERL2-NEXT:    [[IND_END:%.*]] = fadd fast float [[TMP3]], [[INIT:%.*]]
609; VEC4_INTERL2-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
610; VEC4_INTERL2-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
611; VEC4_INTERL2-NEXT:    [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float 1.500000e+00>
612; VEC4_INTERL2-NEXT:    br label [[VECTOR_BODY:%.*]]
613; VEC4_INTERL2:       vector.body:
614; VEC4_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
615; VEC4_INTERL2-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
616; VEC4_INTERL2-NEXT:    [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
617; VEC4_INTERL2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
618; VEC4_INTERL2-NEXT:    [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>*
619; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND]], <4 x float>* [[TMP5]], align 4
620; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 4
621; VEC4_INTERL2-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
622; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD]], <4 x float>* [[TMP7]], align 4
623; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
624; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
625; VEC4_INTERL2-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
626; VEC4_INTERL2-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
627; VEC4_INTERL2:       middle.block:
628; VEC4_INTERL2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
629; VEC4_INTERL2-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
630; VEC4_INTERL2:       scalar.ph:
631; VEC4_INTERL2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
632; VEC4_INTERL2-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_PREHEADER]] ]
633; VEC4_INTERL2-NEXT:    br label [[FOR_BODY:%.*]]
634; VEC4_INTERL2:       for.body:
635; VEC4_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
636; VEC4_INTERL2-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
637; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
638; VEC4_INTERL2-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
639; VEC4_INTERL2-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
640; VEC4_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
641; VEC4_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
642; VEC4_INTERL2-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
643; VEC4_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
644; VEC4_INTERL2:       for.end.loopexit:
645; VEC4_INTERL2-NEXT:    br label [[FOR_END]]
646; VEC4_INTERL2:       for.end:
647; VEC4_INTERL2-NEXT:    ret void
648;
649; VEC1_INTERL2-LABEL: @fp_iv_loop2(
650; VEC1_INTERL2-NEXT:  entry:
651; VEC1_INTERL2-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
652; VEC1_INTERL2-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
653; VEC1_INTERL2:       for.body.preheader:
654; VEC1_INTERL2-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
655; VEC1_INTERL2-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
656; VEC1_INTERL2-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
657; VEC1_INTERL2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0
658; VEC1_INTERL2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
659; VEC1_INTERL2:       vector.ph:
660; VEC1_INTERL2-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590
661; VEC1_INTERL2-NEXT:    [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
662; VEC1_INTERL2-NEXT:    [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01
663; VEC1_INTERL2-NEXT:    [[IND_END:%.*]] = fadd fast float [[TMP3]], [[INIT:%.*]]
664; VEC1_INTERL2-NEXT:    br label [[VECTOR_BODY:%.*]]
665; VEC1_INTERL2:       vector.body:
666; VEC1_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
667; VEC1_INTERL2-NEXT:    [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1
668; VEC1_INTERL2-NEXT:    [[TMP4:%.*]] = sitofp i64 [[INDEX]] to float
669; VEC1_INTERL2-NEXT:    [[TMP5:%.*]] = fmul fast float [[TMP4]], 5.000000e-01
670; VEC1_INTERL2-NEXT:    [[OFFSET_IDX:%.*]] = fadd fast float [[TMP5]], [[INIT]]
671; VEC1_INTERL2-NEXT:    [[TMP6:%.*]] = fadd fast float [[OFFSET_IDX]], 5.000000e-01
672; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
673; VEC1_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]]
674; VEC1_INTERL2-NEXT:    store float [[OFFSET_IDX]], float* [[TMP7]], align 4
675; VEC1_INTERL2-NEXT:    store float [[TMP6]], float* [[TMP8]], align 4
676; VEC1_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
677; VEC1_INTERL2-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
678; VEC1_INTERL2-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]]
679; VEC1_INTERL2:       middle.block:
680; VEC1_INTERL2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
681; VEC1_INTERL2-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
682; VEC1_INTERL2:       scalar.ph:
683; VEC1_INTERL2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
684; VEC1_INTERL2-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_PREHEADER]] ]
685; VEC1_INTERL2-NEXT:    br label [[FOR_BODY:%.*]]
686; VEC1_INTERL2:       for.body:
687; VEC1_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
688; VEC1_INTERL2-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
689; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
690; VEC1_INTERL2-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
691; VEC1_INTERL2-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
692; VEC1_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
693; VEC1_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
694; VEC1_INTERL2-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
695; VEC1_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
696; VEC1_INTERL2:       for.end.loopexit:
697; VEC1_INTERL2-NEXT:    br label [[FOR_END]]
698; VEC1_INTERL2:       for.end:
699; VEC1_INTERL2-NEXT:    ret void
700;
701; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop2(
702; VEC2_INTERL1_PRED_STORE-NEXT:  entry:
703; VEC2_INTERL1_PRED_STORE-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
704; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
705; VEC2_INTERL1_PRED_STORE:       for.body.preheader:
706; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
707; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
708; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
709; VEC2_INTERL1_PRED_STORE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0
710; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]]
711; VEC2_INTERL1_PRED_STORE:       vector.ph:
712; VEC2_INTERL1_PRED_STORE-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590
713; VEC2_INTERL1_PRED_STORE-NEXT:    [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
714; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01
715; VEC2_INTERL1_PRED_STORE-NEXT:    [[IND_END:%.*]] = fadd fast float [[TMP3]], [[INIT:%.*]]
716; VEC2_INTERL1_PRED_STORE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
717; VEC2_INTERL1_PRED_STORE-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
718; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], <float 0.000000e+00, float 5.000000e-01>
719; VEC2_INTERL1_PRED_STORE-NEXT:    br label [[VECTOR_BODY:%.*]]
720; VEC2_INTERL1_PRED_STORE:       vector.body:
721; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
722; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
723; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
724; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP5:%.*]] = bitcast float* [[TMP4]] to <2 x float>*
725; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND]], <2 x float>* [[TMP5]], align 4
726; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
727; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], <float 1.000000e+00, float 1.000000e+00>
728; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
729; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]]
730; VEC2_INTERL1_PRED_STORE:       middle.block:
731; VEC2_INTERL1_PRED_STORE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
732; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]]
733; VEC2_INTERL1_PRED_STORE:       for.body:
734; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
735; VEC2_INTERL1_PRED_STORE-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_PREHEADER]] ]
736; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
737; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
738; VEC2_INTERL1_PRED_STORE-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
739; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
740; VEC2_INTERL1_PRED_STORE-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
741; VEC2_INTERL1_PRED_STORE-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
742; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
743; VEC2_INTERL1_PRED_STORE:       for.end:
744; VEC2_INTERL1_PRED_STORE-NEXT:    ret void
745;
746entry:
747  %cmp4 = icmp sgt i32 %N, 0
748  br i1 %cmp4, label %for.body.preheader, label %for.end
749
750for.body.preheader:                               ; preds = %entry
751  br label %for.body
752
753for.body:                                         ; preds = %for.body.preheader, %for.body
754  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
755  %x.06 = phi float [ %conv1, %for.body ], [ %init, %for.body.preheader ]
756  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
757  store float %x.06, float* %arrayidx, align 4
758  %conv1 = fadd fast float %x.06, 5.000000e-01
759  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
760  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
761  %exitcond = icmp eq i32 %lftr.wideiv, %N
762  br i1 %exitcond, label %for.end.loopexit, label %for.body
763
764for.end.loopexit:                                 ; preds = %for.body
765  br label %for.end
766
767for.end:                                          ; preds = %for.end.loopexit, %entry
768  ret void
769}
770
771;void fp_iv_loop3(float init, float * __restrict__ A, float * __restrict__ B, float * __restrict__ C, int N) {
772;  int i = 0;
773;  float x = init;
774;  float y = 0.1;
775;  for (; i < N; ++i) {
776;    A[i] = x;
777;    x += fp_inc;
778;    y -= 0.5;
779;    B[i] = x + y;
780;    C[i] = y;
781;  }
782;}
783
784
785define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalias nocapture %B, float* noalias nocapture %C, i32 %N) {
786; VEC4_INTERL1-LABEL: @fp_iv_loop3(
787; VEC4_INTERL1-NEXT:  entry:
788; VEC4_INTERL1-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
789; VEC4_INTERL1-NEXT:    br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
790; VEC4_INTERL1:       for.body.lr.ph:
791; VEC4_INTERL1-NEXT:    [[TMP0:%.*]] = load float, float* @fp_inc, align 4
792; VEC4_INTERL1-NEXT:    [[TMP1:%.*]] = add i32 [[N]], -1
793; VEC4_INTERL1-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
794; VEC4_INTERL1-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
795; VEC4_INTERL1-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 3
796; VEC4_INTERL1-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
797; VEC4_INTERL1:       vector.ph:
798; VEC4_INTERL1-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP3]], 8589934588
799; VEC4_INTERL1-NEXT:    [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
800; VEC4_INTERL1-NEXT:    [[TMP4:%.*]] = fmul fast float [[CAST_CRD]], -5.000000e-01
801; VEC4_INTERL1-NEXT:    [[IND_END:%.*]] = fadd fast float [[TMP4]], 0x3FB99999A0000000
802; VEC4_INTERL1-NEXT:    [[CAST_CRD3:%.*]] = sitofp i64 [[N_VEC]] to float
803; VEC4_INTERL1-NEXT:    [[TMP5:%.*]] = fmul fast float [[TMP0]], [[CAST_CRD3]]
804; VEC4_INTERL1-NEXT:    [[IND_END4:%.*]] = fadd fast float [[TMP5]], [[INIT:%.*]]
805; VEC4_INTERL1-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
806; VEC4_INTERL1-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
807; VEC4_INTERL1-NEXT:    [[DOTSPLATINSERT5:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
808; VEC4_INTERL1-NEXT:    [[DOTSPLAT6:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT5]], <4 x float> poison, <4 x i32> zeroinitializer
809; VEC4_INTERL1-NEXT:    [[TMP6:%.*]] = fmul fast <4 x float> [[DOTSPLAT6]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
810; VEC4_INTERL1-NEXT:    [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], [[TMP6]]
811; VEC4_INTERL1-NEXT:    [[TMP7:%.*]] = fmul fast float [[TMP0]], 4.000000e+00
812; VEC4_INTERL1-NEXT:    [[DOTSPLATINSERT7:%.*]] = insertelement <4 x float> poison, float [[TMP7]], i64 0
813; VEC4_INTERL1-NEXT:    [[DOTSPLAT8:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT7]], <4 x float> poison, <4 x i32> zeroinitializer
814; VEC4_INTERL1-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
815; VEC4_INTERL1-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
816; VEC4_INTERL1-NEXT:    br label [[VECTOR_BODY:%.*]]
817; VEC4_INTERL1:       vector.body:
818; VEC4_INTERL1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
819; VEC4_INTERL1-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ <float 0x3FB99999A0000000, float 0xBFD99999A0000000, float 0xBFECCCCCC0000000, float 0xBFF6666660000000>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
820; VEC4_INTERL1-NEXT:    [[VEC_IND9:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VECTOR_BODY]] ]
821; VEC4_INTERL1-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
822; VEC4_INTERL1-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
823; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND9]], <4 x float>* [[TMP9]], align 4
824; VEC4_INTERL1-NEXT:    [[TMP10:%.*]] = fadd fast <4 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]]
825; VEC4_INTERL1-NEXT:    [[TMP11:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
826; VEC4_INTERL1-NEXT:    [[TMP12:%.*]] = fadd fast <4 x float> [[TMP11]], [[TMP10]]
827; VEC4_INTERL1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
828; VEC4_INTERL1-NEXT:    [[TMP14:%.*]] = bitcast float* [[TMP13]] to <4 x float>*
829; VEC4_INTERL1-NEXT:    store <4 x float> [[TMP12]], <4 x float>* [[TMP14]], align 4
830; VEC4_INTERL1-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]]
831; VEC4_INTERL1-NEXT:    [[TMP16:%.*]] = bitcast float* [[TMP15]] to <4 x float>*
832; VEC4_INTERL1-NEXT:    store <4 x float> [[TMP11]], <4 x float>* [[TMP16]], align 4
833; VEC4_INTERL1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
834; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float -2.000000e+00, float -2.000000e+00, float -2.000000e+00, float -2.000000e+00>
835; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT10]] = fadd fast <4 x float> [[VEC_IND9]], [[DOTSPLAT8]]
836; VEC4_INTERL1-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
837; VEC4_INTERL1-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
838; VEC4_INTERL1:       middle.block:
839; VEC4_INTERL1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
840; VEC4_INTERL1-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
841; VEC4_INTERL1:       scalar.ph:
842; VEC4_INTERL1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ]
843; VEC4_INTERL1-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ]
844; VEC4_INTERL1-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi float [ [[IND_END4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ]
845; VEC4_INTERL1-NEXT:    br label [[FOR_BODY:%.*]]
846; VEC4_INTERL1:       for.body:
847; VEC4_INTERL1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
848; VEC4_INTERL1-NEXT:    [[Y_012:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[CONV1:%.*]], [[FOR_BODY]] ]
849; VEC4_INTERL1-NEXT:    [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
850; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
851; VEC4_INTERL1-NEXT:    store float [[X_011]], float* [[ARRAYIDX]], align 4
852; VEC4_INTERL1-NEXT:    [[ADD]] = fadd fast float [[X_011]], [[TMP0]]
853; VEC4_INTERL1-NEXT:    [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01
854; VEC4_INTERL1-NEXT:    [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]]
855; VEC4_INTERL1-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]]
856; VEC4_INTERL1-NEXT:    store float [[ADD2]], float* [[ARRAYIDX4]], align 4
857; VEC4_INTERL1-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDVARS_IV]]
858; VEC4_INTERL1-NEXT:    store float [[CONV1]], float* [[ARRAYIDX6]], align 4
859; VEC4_INTERL1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
860; VEC4_INTERL1-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
861; VEC4_INTERL1-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
862; VEC4_INTERL1-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
863; VEC4_INTERL1:       for.end.loopexit:
864; VEC4_INTERL1-NEXT:    br label [[FOR_END]]
865; VEC4_INTERL1:       for.end:
866; VEC4_INTERL1-NEXT:    ret void
867;
868; VEC4_INTERL2-LABEL: @fp_iv_loop3(
869; VEC4_INTERL2-NEXT:  entry:
870; VEC4_INTERL2-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
871; VEC4_INTERL2-NEXT:    br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
872; VEC4_INTERL2:       for.body.lr.ph:
873; VEC4_INTERL2-NEXT:    [[TMP0:%.*]] = load float, float* @fp_inc, align 4
874; VEC4_INTERL2-NEXT:    [[TMP1:%.*]] = add i32 [[N]], -1
875; VEC4_INTERL2-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
876; VEC4_INTERL2-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
877; VEC4_INTERL2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP1]], 7
878; VEC4_INTERL2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
879; VEC4_INTERL2:       vector.ph:
880; VEC4_INTERL2-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP3]], 8589934584
881; VEC4_INTERL2-NEXT:    [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
882; VEC4_INTERL2-NEXT:    [[TMP4:%.*]] = fmul fast float [[CAST_CRD]], -5.000000e-01
883; VEC4_INTERL2-NEXT:    [[IND_END:%.*]] = fadd fast float [[TMP4]], 0x3FB99999A0000000
884; VEC4_INTERL2-NEXT:    [[CAST_CRD3:%.*]] = sitofp i64 [[N_VEC]] to float
885; VEC4_INTERL2-NEXT:    [[TMP5:%.*]] = fmul fast float [[TMP0]], [[CAST_CRD3]]
886; VEC4_INTERL2-NEXT:    [[IND_END4:%.*]] = fadd fast float [[TMP5]], [[INIT:%.*]]
887; VEC4_INTERL2-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[INIT]], i64 0
888; VEC4_INTERL2-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
889; VEC4_INTERL2-NEXT:    [[DOTSPLATINSERT6:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
890; VEC4_INTERL2-NEXT:    [[DOTSPLAT7:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT6]], <4 x float> poison, <4 x i32> zeroinitializer
891; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = fmul fast <4 x float> [[DOTSPLAT7]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00>
892; VEC4_INTERL2-NEXT:    [[INDUCTION:%.*]] = fadd fast <4 x float> [[DOTSPLAT]], [[TMP6]]
893; VEC4_INTERL2-NEXT:    [[TMP7:%.*]] = fmul fast float [[TMP0]], 4.000000e+00
894; VEC4_INTERL2-NEXT:    [[DOTSPLATINSERT8:%.*]] = insertelement <4 x float> poison, float [[TMP7]], i64 0
895; VEC4_INTERL2-NEXT:    [[DOTSPLAT9:%.*]] = shufflevector <4 x float> [[DOTSPLATINSERT8]], <4 x float> poison, <4 x i32> zeroinitializer
896; VEC4_INTERL2-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
897; VEC4_INTERL2-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer
898; VEC4_INTERL2-NEXT:    [[BROADCAST_SPLATINSERT14:%.*]] = insertelement <4 x float> poison, float [[TMP0]], i64 0
899; VEC4_INTERL2-NEXT:    [[BROADCAST_SPLAT15:%.*]] = shufflevector <4 x float> [[BROADCAST_SPLATINSERT14]], <4 x float> poison, <4 x i32> zeroinitializer
900; VEC4_INTERL2-NEXT:    br label [[VECTOR_BODY:%.*]]
901; VEC4_INTERL2:       vector.body:
902; VEC4_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
903; VEC4_INTERL2-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ <float 0x3FB99999A0000000, float 0xBFD99999A0000000, float 0xBFECCCCCC0000000, float 0xBFF6666660000000>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
904; VEC4_INTERL2-NEXT:    [[VEC_IND10:%.*]] = phi <4 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT13:%.*]], [[VECTOR_BODY]] ]
905; VEC4_INTERL2-NEXT:    [[STEP_ADD11:%.*]] = fadd fast <4 x float> [[VEC_IND10]], [[DOTSPLAT9]]
906; VEC4_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
907; VEC4_INTERL2-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
908; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND10]], <4 x float>* [[TMP9]], align 4
909; VEC4_INTERL2-NEXT:    [[TMP10:%.*]] = getelementptr inbounds float, float* [[TMP8]], i64 4
910; VEC4_INTERL2-NEXT:    [[TMP11:%.*]] = bitcast float* [[TMP10]] to <4 x float>*
911; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD11]], <4 x float>* [[TMP11]], align 4
912; VEC4_INTERL2-NEXT:    [[TMP12:%.*]] = fadd fast <4 x float> [[VEC_IND10]], [[BROADCAST_SPLAT]]
913; VEC4_INTERL2-NEXT:    [[TMP13:%.*]] = fadd fast <4 x float> [[STEP_ADD11]], [[BROADCAST_SPLAT15]]
914; VEC4_INTERL2-NEXT:    [[TMP14:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float -5.000000e-01, float -5.000000e-01, float -5.000000e-01, float -5.000000e-01>
915; VEC4_INTERL2-NEXT:    [[TMP15:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float -2.500000e+00, float -2.500000e+00, float -2.500000e+00, float -2.500000e+00>
916; VEC4_INTERL2-NEXT:    [[TMP16:%.*]] = fadd fast <4 x float> [[TMP14]], [[TMP12]]
917; VEC4_INTERL2-NEXT:    [[TMP17:%.*]] = fadd fast <4 x float> [[TMP15]], [[TMP13]]
918; VEC4_INTERL2-NEXT:    [[TMP18:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
919; VEC4_INTERL2-NEXT:    [[TMP19:%.*]] = bitcast float* [[TMP18]] to <4 x float>*
920; VEC4_INTERL2-NEXT:    store <4 x float> [[TMP16]], <4 x float>* [[TMP19]], align 4
921; VEC4_INTERL2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 4
922; VEC4_INTERL2-NEXT:    [[TMP21:%.*]] = bitcast float* [[TMP20]] to <4 x float>*
923; VEC4_INTERL2-NEXT:    store <4 x float> [[TMP17]], <4 x float>* [[TMP21]], align 4
924; VEC4_INTERL2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]]
925; VEC4_INTERL2-NEXT:    [[TMP23:%.*]] = bitcast float* [[TMP22]] to <4 x float>*
926; VEC4_INTERL2-NEXT:    store <4 x float> [[TMP14]], <4 x float>* [[TMP23]], align 4
927; VEC4_INTERL2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 4
928; VEC4_INTERL2-NEXT:    [[TMP25:%.*]] = bitcast float* [[TMP24]] to <4 x float>*
929; VEC4_INTERL2-NEXT:    store <4 x float> [[TMP15]], <4 x float>* [[TMP25]], align 4
930; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
931; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float -4.000000e+00, float -4.000000e+00, float -4.000000e+00, float -4.000000e+00>
932; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT13]] = fadd fast <4 x float> [[STEP_ADD11]], [[DOTSPLAT9]]
933; VEC4_INTERL2-NEXT:    [[TMP26:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
934; VEC4_INTERL2-NEXT:    br i1 [[TMP26]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
935; VEC4_INTERL2:       middle.block:
936; VEC4_INTERL2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
937; VEC4_INTERL2-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
938; VEC4_INTERL2:       scalar.ph:
939; VEC4_INTERL2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ]
940; VEC4_INTERL2-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ]
941; VEC4_INTERL2-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi float [ [[IND_END4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ]
942; VEC4_INTERL2-NEXT:    br label [[FOR_BODY:%.*]]
943; VEC4_INTERL2:       for.body:
944; VEC4_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
945; VEC4_INTERL2-NEXT:    [[Y_012:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[CONV1:%.*]], [[FOR_BODY]] ]
946; VEC4_INTERL2-NEXT:    [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
947; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
948; VEC4_INTERL2-NEXT:    store float [[X_011]], float* [[ARRAYIDX]], align 4
949; VEC4_INTERL2-NEXT:    [[ADD]] = fadd fast float [[X_011]], [[TMP0]]
950; VEC4_INTERL2-NEXT:    [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01
951; VEC4_INTERL2-NEXT:    [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]]
952; VEC4_INTERL2-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]]
953; VEC4_INTERL2-NEXT:    store float [[ADD2]], float* [[ARRAYIDX4]], align 4
954; VEC4_INTERL2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDVARS_IV]]
955; VEC4_INTERL2-NEXT:    store float [[CONV1]], float* [[ARRAYIDX6]], align 4
956; VEC4_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
957; VEC4_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
958; VEC4_INTERL2-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
959; VEC4_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
960; VEC4_INTERL2:       for.end.loopexit:
961; VEC4_INTERL2-NEXT:    br label [[FOR_END]]
962; VEC4_INTERL2:       for.end:
963; VEC4_INTERL2-NEXT:    ret void
964;
965; VEC1_INTERL2-LABEL: @fp_iv_loop3(
966; VEC1_INTERL2-NEXT:  entry:
967; VEC1_INTERL2-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
968; VEC1_INTERL2-NEXT:    br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
969; VEC1_INTERL2:       for.body.lr.ph:
970; VEC1_INTERL2-NEXT:    [[TMP0:%.*]] = load float, float* @fp_inc, align 4
971; VEC1_INTERL2-NEXT:    [[TMP1:%.*]] = add i32 [[N]], -1
972; VEC1_INTERL2-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
973; VEC1_INTERL2-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
974; VEC1_INTERL2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP1]], 0
975; VEC1_INTERL2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
976; VEC1_INTERL2:       vector.ph:
977; VEC1_INTERL2-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP3]], 8589934590
978; VEC1_INTERL2-NEXT:    [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
979; VEC1_INTERL2-NEXT:    [[TMP4:%.*]] = fmul fast float [[CAST_CRD]], -5.000000e-01
980; VEC1_INTERL2-NEXT:    [[IND_END:%.*]] = fadd fast float [[TMP4]], 0x3FB99999A0000000
981; VEC1_INTERL2-NEXT:    [[CAST_CRD3:%.*]] = sitofp i64 [[N_VEC]] to float
982; VEC1_INTERL2-NEXT:    [[TMP5:%.*]] = fmul fast float [[TMP0]], [[CAST_CRD3]]
983; VEC1_INTERL2-NEXT:    [[IND_END4:%.*]] = fadd fast float [[TMP5]], [[INIT:%.*]]
984; VEC1_INTERL2-NEXT:    br label [[VECTOR_BODY:%.*]]
985; VEC1_INTERL2:       vector.body:
986; VEC1_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
987; VEC1_INTERL2-NEXT:    [[INDUCTION5:%.*]] = or i64 [[INDEX]], 1
988; VEC1_INTERL2-NEXT:    [[TMP6:%.*]] = sitofp i64 [[INDEX]] to float
989; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = fmul fast float [[TMP6]], -5.000000e-01
990; VEC1_INTERL2-NEXT:    [[TMP8:%.*]] = sitofp i64 [[INDEX]] to float
991; VEC1_INTERL2-NEXT:    [[TMP9:%.*]] = fmul fast float [[TMP0]], [[TMP8]]
992; VEC1_INTERL2-NEXT:    [[OFFSET_IDX8:%.*]] = fadd fast float [[TMP9]], [[INIT]]
993; VEC1_INTERL2-NEXT:    [[TMP10:%.*]] = fadd fast float [[OFFSET_IDX8]], [[TMP0]]
994; VEC1_INTERL2-NEXT:    [[TMP11:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
995; VEC1_INTERL2-NEXT:    [[TMP12:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION5]]
996; VEC1_INTERL2-NEXT:    store float [[OFFSET_IDX8]], float* [[TMP11]], align 4
997; VEC1_INTERL2-NEXT:    store float [[TMP10]], float* [[TMP12]], align 4
998; VEC1_INTERL2-NEXT:    [[TMP13:%.*]] = fadd fast float [[OFFSET_IDX8]], [[TMP0]]
999; VEC1_INTERL2-NEXT:    [[TMP14:%.*]] = fadd fast float [[TMP10]], [[TMP0]]
1000; VEC1_INTERL2-NEXT:    [[TMP15:%.*]] = fadd fast float [[TMP7]], 0xBFD99999A0000000
1001; VEC1_INTERL2-NEXT:    [[TMP16:%.*]] = fadd fast float [[TMP7]], 0xBFECCCCCC0000000
1002; VEC1_INTERL2-NEXT:    [[TMP17:%.*]] = fadd fast float [[TMP15]], [[TMP13]]
1003; VEC1_INTERL2-NEXT:    [[TMP18:%.*]] = fadd fast float [[TMP16]], [[TMP14]]
1004; VEC1_INTERL2-NEXT:    [[TMP19:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
1005; VEC1_INTERL2-NEXT:    [[TMP20:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDUCTION5]]
1006; VEC1_INTERL2-NEXT:    store float [[TMP17]], float* [[TMP19]], align 4
1007; VEC1_INTERL2-NEXT:    store float [[TMP18]], float* [[TMP20]], align 4
1008; VEC1_INTERL2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]]
1009; VEC1_INTERL2-NEXT:    [[TMP22:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDUCTION5]]
1010; VEC1_INTERL2-NEXT:    store float [[TMP15]], float* [[TMP21]], align 4
1011; VEC1_INTERL2-NEXT:    store float [[TMP16]], float* [[TMP22]], align 4
1012; VEC1_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1013; VEC1_INTERL2-NEXT:    [[TMP23:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1014; VEC1_INTERL2-NEXT:    br i1 [[TMP23]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]]
1015; VEC1_INTERL2:       middle.block:
1016; VEC1_INTERL2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
1017; VEC1_INTERL2-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
1018; VEC1_INTERL2:       scalar.ph:
1019; VEC1_INTERL2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ]
1020; VEC1_INTERL2-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ]
1021; VEC1_INTERL2-NEXT:    [[BC_RESUME_VAL2:%.*]] = phi float [ [[IND_END4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ]
1022; VEC1_INTERL2-NEXT:    br label [[FOR_BODY:%.*]]
1023; VEC1_INTERL2:       for.body:
1024; VEC1_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ]
1025; VEC1_INTERL2-NEXT:    [[Y_012:%.*]] = phi float [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ], [ [[CONV1:%.*]], [[FOR_BODY]] ]
1026; VEC1_INTERL2-NEXT:    [[X_011:%.*]] = phi float [ [[BC_RESUME_VAL2]], [[SCALAR_PH]] ], [ [[ADD:%.*]], [[FOR_BODY]] ]
1027; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
1028; VEC1_INTERL2-NEXT:    store float [[X_011]], float* [[ARRAYIDX]], align 4
1029; VEC1_INTERL2-NEXT:    [[ADD]] = fadd fast float [[X_011]], [[TMP0]]
1030; VEC1_INTERL2-NEXT:    [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01
1031; VEC1_INTERL2-NEXT:    [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]]
1032; VEC1_INTERL2-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]]
1033; VEC1_INTERL2-NEXT:    store float [[ADD2]], float* [[ARRAYIDX4]], align 4
1034; VEC1_INTERL2-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDVARS_IV]]
1035; VEC1_INTERL2-NEXT:    store float [[CONV1]], float* [[ARRAYIDX6]], align 4
1036; VEC1_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1037; VEC1_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
1038; VEC1_INTERL2-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
1039; VEC1_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
1040; VEC1_INTERL2:       for.end.loopexit:
1041; VEC1_INTERL2-NEXT:    br label [[FOR_END]]
1042; VEC1_INTERL2:       for.end:
1043; VEC1_INTERL2-NEXT:    ret void
1044;
1045; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop3(
1046; VEC2_INTERL1_PRED_STORE-NEXT:  entry:
1047; VEC2_INTERL1_PRED_STORE-NEXT:    [[CMP9:%.*]] = icmp sgt i32 [[N:%.*]], 0
1048; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[CMP9]], label [[FOR_BODY_LR_PH:%.*]], label [[FOR_END:%.*]]
1049; VEC2_INTERL1_PRED_STORE:       for.body.lr.ph:
1050; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP0:%.*]] = load float, float* @fp_inc, align 4
1051; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP1:%.*]] = add i32 [[N]], -1
1052; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP2:%.*]] = zext i32 [[TMP1]] to i64
1053; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP3:%.*]] = add nuw nsw i64 [[TMP2]], 1
1054; VEC2_INTERL1_PRED_STORE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP1]], 0
1055; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]]
1056; VEC2_INTERL1_PRED_STORE:       vector.ph:
1057; VEC2_INTERL1_PRED_STORE-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP3]], 8589934590
1058; VEC2_INTERL1_PRED_STORE-NEXT:    [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
1059; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP4:%.*]] = fmul fast float [[CAST_CRD]], -5.000000e-01
1060; VEC2_INTERL1_PRED_STORE-NEXT:    [[IND_END:%.*]] = fadd fast float [[TMP4]], 0x3FB99999A0000000
1061; VEC2_INTERL1_PRED_STORE-NEXT:    [[CAST_CRD3:%.*]] = sitofp i64 [[N_VEC]] to float
1062; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP5:%.*]] = fmul fast float [[TMP0]], [[CAST_CRD3]]
1063; VEC2_INTERL1_PRED_STORE-NEXT:    [[IND_END4:%.*]] = fadd fast float [[TMP5]], [[INIT:%.*]]
1064; VEC2_INTERL1_PRED_STORE-NEXT:    [[DOTSPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[INIT]], i64 0
1065; VEC2_INTERL1_PRED_STORE-NEXT:    [[DOTSPLAT:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
1066; VEC2_INTERL1_PRED_STORE-NEXT:    [[DOTSPLATINSERT5:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0
1067; VEC2_INTERL1_PRED_STORE-NEXT:    [[DOTSPLAT6:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT5]], <2 x float> poison, <2 x i32> zeroinitializer
1068; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP6:%.*]] = fmul fast <2 x float> [[DOTSPLAT6]], <float 0.000000e+00, float 1.000000e+00>
1069; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDUCTION:%.*]] = fadd fast <2 x float> [[DOTSPLAT]], [[TMP6]]
1070; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP7:%.*]] = fmul fast float [[TMP0]], 2.000000e+00
1071; VEC2_INTERL1_PRED_STORE-NEXT:    [[DOTSPLATINSERT7:%.*]] = insertelement <2 x float> poison, float [[TMP7]], i64 0
1072; VEC2_INTERL1_PRED_STORE-NEXT:    [[DOTSPLAT8:%.*]] = shufflevector <2 x float> [[DOTSPLATINSERT7]], <2 x float> poison, <2 x i32> zeroinitializer
1073; VEC2_INTERL1_PRED_STORE-NEXT:    [[BROADCAST_SPLATINSERT:%.*]] = insertelement <2 x float> poison, float [[TMP0]], i64 0
1074; VEC2_INTERL1_PRED_STORE-NEXT:    [[BROADCAST_SPLAT:%.*]] = shufflevector <2 x float> [[BROADCAST_SPLATINSERT]], <2 x float> poison, <2 x i32> zeroinitializer
1075; VEC2_INTERL1_PRED_STORE-NEXT:    br label [[VECTOR_BODY:%.*]]
1076; VEC2_INTERL1_PRED_STORE:       vector.body:
1077; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1078; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND:%.*]] = phi <2 x float> [ <float 0x3FB99999A0000000, float 0xBFD99999A0000000>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1079; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND9:%.*]] = phi <2 x float> [ [[INDUCTION]], [[VECTOR_PH]] ], [ [[VEC_IND_NEXT10:%.*]], [[VECTOR_BODY]] ]
1080; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
1081; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP8]] to <2 x float>*
1082; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND9]], <2 x float>* [[TMP9]], align 4
1083; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP10:%.*]] = fadd fast <2 x float> [[VEC_IND9]], [[BROADCAST_SPLAT]]
1084; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP11:%.*]] = fadd fast <2 x float> [[VEC_IND]], <float -5.000000e-01, float -5.000000e-01>
1085; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP12:%.*]] = fadd fast <2 x float> [[TMP11]], [[TMP10]]
1086; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]]
1087; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP14:%.*]] = bitcast float* [[TMP13]] to <2 x float>*
1088; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[TMP12]], <2 x float>* [[TMP14]], align 4
1089; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP15:%.*]] = getelementptr inbounds float, float* [[C:%.*]], i64 [[INDEX]]
1090; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP16:%.*]] = bitcast float* [[TMP15]] to <2 x float>*
1091; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[TMP11]], <2 x float>* [[TMP16]], align 4
1092; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1093; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], <float -1.000000e+00, float -1.000000e+00>
1094; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT10]] = fadd fast <2 x float> [[VEC_IND9]], [[DOTSPLAT8]]
1095; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1096; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP8:![0-9]+]]
1097; VEC2_INTERL1_PRED_STORE:       middle.block:
1098; VEC2_INTERL1_PRED_STORE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP3]], [[N_VEC]]
1099; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]]
1100; VEC2_INTERL1_PRED_STORE:       for.body:
1101; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_LR_PH]] ]
1102; VEC2_INTERL1_PRED_STORE-NEXT:    [[Y_012:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 0x3FB99999A0000000, [[FOR_BODY_LR_PH]] ]
1103; VEC2_INTERL1_PRED_STORE-NEXT:    [[X_011:%.*]] = phi float [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[IND_END4]], [[MIDDLE_BLOCK]] ], [ [[INIT]], [[FOR_BODY_LR_PH]] ]
1104; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
1105; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[X_011]], float* [[ARRAYIDX]], align 4
1106; VEC2_INTERL1_PRED_STORE-NEXT:    [[ADD]] = fadd fast float [[X_011]], [[TMP0]]
1107; VEC2_INTERL1_PRED_STORE-NEXT:    [[CONV1]] = fadd fast float [[Y_012]], -5.000000e-01
1108; VEC2_INTERL1_PRED_STORE-NEXT:    [[ADD2:%.*]] = fadd fast float [[CONV1]], [[ADD]]
1109; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]]
1110; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[ADD2]], float* [[ARRAYIDX4]], align 4
1111; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[C]], i64 [[INDVARS_IV]]
1112; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[CONV1]], float* [[ARRAYIDX6]], align 4
1113; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1114; VEC2_INTERL1_PRED_STORE-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
1115; VEC2_INTERL1_PRED_STORE-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
1116; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
1117; VEC2_INTERL1_PRED_STORE:       for.end:
1118; VEC2_INTERL1_PRED_STORE-NEXT:    ret void
1119;
1120entry:
1121  %cmp9 = icmp sgt i32 %N, 0
1122  br i1 %cmp9, label %for.body.lr.ph, label %for.end
1123
1124for.body.lr.ph:                                   ; preds = %entry
1125  %0 = load float, float* @fp_inc, align 4
1126  br label %for.body
1127
1128for.body:                                         ; preds = %for.body, %for.body.lr.ph
1129  %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ]
1130  %y.012 = phi float [ 0x3FB99999A0000000, %for.body.lr.ph ], [ %conv1, %for.body ]
1131  %x.011 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ]
1132  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
1133  store float %x.011, float* %arrayidx, align 4
1134  %add = fadd fast float %x.011, %0
1135  %conv1 = fadd fast float %y.012, -5.000000e-01
1136  %add2 = fadd fast float %conv1, %add
1137  %arrayidx4 = getelementptr inbounds float, float* %B, i64 %indvars.iv
1138  store float %add2, float* %arrayidx4, align 4
1139  %arrayidx6 = getelementptr inbounds float, float* %C, i64 %indvars.iv
1140  store float %conv1, float* %arrayidx6, align 4
1141  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1142  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1143  %exitcond = icmp eq i32 %lftr.wideiv, %N
1144  br i1 %exitcond, label %for.end.loopexit, label %for.body
1145
1146for.end.loopexit:
1147  br label %for.end
1148
1149for.end:
1150  ret void
1151}
1152
1153; Start and step values are constants. There is no 'fmul' operation in this case
1154;void fp_iv_loop4(float * __restrict__ A, int N) {
1155;  float x = 1.0;
1156;  for (int i=0; i < N; ++i) {
1157;    A[i] = x;
1158;    x += 0.5;
1159;  }
1160;}
1161
1162
1163define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) {
1164; VEC4_INTERL1-LABEL: @fp_iv_loop4(
1165; VEC4_INTERL1-NEXT:  entry:
1166; VEC4_INTERL1-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
1167; VEC4_INTERL1-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
1168; VEC4_INTERL1:       for.body.preheader:
1169; VEC4_INTERL1-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
1170; VEC4_INTERL1-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
1171; VEC4_INTERL1-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
1172; VEC4_INTERL1-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3
1173; VEC4_INTERL1-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1174; VEC4_INTERL1:       vector.ph:
1175; VEC4_INTERL1-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588
1176; VEC4_INTERL1-NEXT:    [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
1177; VEC4_INTERL1-NEXT:    [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01
1178; VEC4_INTERL1-NEXT:    [[IND_END:%.*]] = fadd fast float [[TMP3]], 1.000000e+00
1179; VEC4_INTERL1-NEXT:    br label [[VECTOR_BODY:%.*]]
1180; VEC4_INTERL1:       vector.body:
1181; VEC4_INTERL1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1182; VEC4_INTERL1-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ <float 1.000000e+00, float 1.500000e+00, float 2.000000e+00, float 2.500000e+00>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1183; VEC4_INTERL1-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
1184; VEC4_INTERL1-NEXT:    [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>*
1185; VEC4_INTERL1-NEXT:    store <4 x float> [[VEC_IND]], <4 x float>* [[TMP5]], align 4
1186; VEC4_INTERL1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1187; VEC4_INTERL1-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
1188; VEC4_INTERL1-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1189; VEC4_INTERL1-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
1190; VEC4_INTERL1:       middle.block:
1191; VEC4_INTERL1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1192; VEC4_INTERL1-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
1193; VEC4_INTERL1:       scalar.ph:
1194; VEC4_INTERL1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
1195; VEC4_INTERL1-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ]
1196; VEC4_INTERL1-NEXT:    br label [[FOR_BODY:%.*]]
1197; VEC4_INTERL1:       for.body:
1198; VEC4_INTERL1-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1199; VEC4_INTERL1-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
1200; VEC4_INTERL1-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
1201; VEC4_INTERL1-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
1202; VEC4_INTERL1-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
1203; VEC4_INTERL1-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1204; VEC4_INTERL1-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
1205; VEC4_INTERL1-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
1206; VEC4_INTERL1-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
1207; VEC4_INTERL1:       for.end.loopexit:
1208; VEC4_INTERL1-NEXT:    br label [[FOR_END]]
1209; VEC4_INTERL1:       for.end:
1210; VEC4_INTERL1-NEXT:    ret void
1211;
1212; VEC4_INTERL2-LABEL: @fp_iv_loop4(
1213; VEC4_INTERL2-NEXT:  entry:
1214; VEC4_INTERL2-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
1215; VEC4_INTERL2-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
1216; VEC4_INTERL2:       for.body.preheader:
1217; VEC4_INTERL2-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
1218; VEC4_INTERL2-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
1219; VEC4_INTERL2-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
1220; VEC4_INTERL2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 7
1221; VEC4_INTERL2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1222; VEC4_INTERL2:       vector.ph:
1223; VEC4_INTERL2-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934584
1224; VEC4_INTERL2-NEXT:    [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
1225; VEC4_INTERL2-NEXT:    [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01
1226; VEC4_INTERL2-NEXT:    [[IND_END:%.*]] = fadd fast float [[TMP3]], 1.000000e+00
1227; VEC4_INTERL2-NEXT:    br label [[VECTOR_BODY:%.*]]
1228; VEC4_INTERL2:       vector.body:
1229; VEC4_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1230; VEC4_INTERL2-NEXT:    [[VEC_IND:%.*]] = phi <4 x float> [ <float 1.000000e+00, float 1.500000e+00, float 2.000000e+00, float 2.500000e+00>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1231; VEC4_INTERL2-NEXT:    [[STEP_ADD:%.*]] = fadd fast <4 x float> [[VEC_IND]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00>
1232; VEC4_INTERL2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
1233; VEC4_INTERL2-NEXT:    [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>*
1234; VEC4_INTERL2-NEXT:    store <4 x float> [[VEC_IND]], <4 x float>* [[TMP5]], align 4
1235; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP4]], i64 4
1236; VEC4_INTERL2-NEXT:    [[TMP7:%.*]] = bitcast float* [[TMP6]] to <4 x float>*
1237; VEC4_INTERL2-NEXT:    store <4 x float> [[STEP_ADD]], <4 x float>* [[TMP7]], align 4
1238; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1239; VEC4_INTERL2-NEXT:    [[VEC_IND_NEXT]] = fadd fast <4 x float> [[VEC_IND]], <float 4.000000e+00, float 4.000000e+00, float 4.000000e+00, float 4.000000e+00>
1240; VEC4_INTERL2-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1241; VEC4_INTERL2-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
1242; VEC4_INTERL2:       middle.block:
1243; VEC4_INTERL2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1244; VEC4_INTERL2-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
1245; VEC4_INTERL2:       scalar.ph:
1246; VEC4_INTERL2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
1247; VEC4_INTERL2-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ]
1248; VEC4_INTERL2-NEXT:    br label [[FOR_BODY:%.*]]
1249; VEC4_INTERL2:       for.body:
1250; VEC4_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1251; VEC4_INTERL2-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
1252; VEC4_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
1253; VEC4_INTERL2-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
1254; VEC4_INTERL2-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
1255; VEC4_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1256; VEC4_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
1257; VEC4_INTERL2-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
1258; VEC4_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
1259; VEC4_INTERL2:       for.end.loopexit:
1260; VEC4_INTERL2-NEXT:    br label [[FOR_END]]
1261; VEC4_INTERL2:       for.end:
1262; VEC4_INTERL2-NEXT:    ret void
1263;
1264; VEC1_INTERL2-LABEL: @fp_iv_loop4(
1265; VEC1_INTERL2-NEXT:  entry:
1266; VEC1_INTERL2-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
1267; VEC1_INTERL2-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
1268; VEC1_INTERL2:       for.body.preheader:
1269; VEC1_INTERL2-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
1270; VEC1_INTERL2-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
1271; VEC1_INTERL2-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
1272; VEC1_INTERL2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0
1273; VEC1_INTERL2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1274; VEC1_INTERL2:       vector.ph:
1275; VEC1_INTERL2-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590
1276; VEC1_INTERL2-NEXT:    [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
1277; VEC1_INTERL2-NEXT:    [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01
1278; VEC1_INTERL2-NEXT:    [[IND_END:%.*]] = fadd fast float [[TMP3]], 1.000000e+00
1279; VEC1_INTERL2-NEXT:    br label [[VECTOR_BODY:%.*]]
1280; VEC1_INTERL2:       vector.body:
1281; VEC1_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1282; VEC1_INTERL2-NEXT:    [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1
1283; VEC1_INTERL2-NEXT:    [[TMP4:%.*]] = sitofp i64 [[INDEX]] to float
1284; VEC1_INTERL2-NEXT:    [[TMP5:%.*]] = fmul fast float [[TMP4]], 5.000000e-01
1285; VEC1_INTERL2-NEXT:    [[OFFSET_IDX:%.*]] = fadd fast float [[TMP5]], 1.000000e+00
1286; VEC1_INTERL2-NEXT:    [[TMP6:%.*]] = fadd fast float [[TMP5]], 1.500000e+00
1287; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
1288; VEC1_INTERL2-NEXT:    [[TMP8:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]]
1289; VEC1_INTERL2-NEXT:    store float [[OFFSET_IDX]], float* [[TMP7]], align 4
1290; VEC1_INTERL2-NEXT:    store float [[TMP6]], float* [[TMP8]], align 4
1291; VEC1_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1292; VEC1_INTERL2-NEXT:    [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1293; VEC1_INTERL2-NEXT:    br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP9:![0-9]+]]
1294; VEC1_INTERL2:       middle.block:
1295; VEC1_INTERL2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1296; VEC1_INTERL2-NEXT:    br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]]
1297; VEC1_INTERL2:       scalar.ph:
1298; VEC1_INTERL2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
1299; VEC1_INTERL2-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi float [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ]
1300; VEC1_INTERL2-NEXT:    br label [[FOR_BODY:%.*]]
1301; VEC1_INTERL2:       for.body:
1302; VEC1_INTERL2-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1303; VEC1_INTERL2-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
1304; VEC1_INTERL2-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
1305; VEC1_INTERL2-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
1306; VEC1_INTERL2-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
1307; VEC1_INTERL2-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1308; VEC1_INTERL2-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
1309; VEC1_INTERL2-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
1310; VEC1_INTERL2-NEXT:    br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
1311; VEC1_INTERL2:       for.end.loopexit:
1312; VEC1_INTERL2-NEXT:    br label [[FOR_END]]
1313; VEC1_INTERL2:       for.end:
1314; VEC1_INTERL2-NEXT:    ret void
1315;
1316; VEC2_INTERL1_PRED_STORE-LABEL: @fp_iv_loop4(
1317; VEC2_INTERL1_PRED_STORE-NEXT:  entry:
1318; VEC2_INTERL1_PRED_STORE-NEXT:    [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0
1319; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]]
1320; VEC2_INTERL1_PRED_STORE:       for.body.preheader:
1321; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP0:%.*]] = add i32 [[N]], -1
1322; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP1:%.*]] = zext i32 [[TMP0]] to i64
1323; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1
1324; VEC2_INTERL1_PRED_STORE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp eq i32 [[TMP0]], 0
1325; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]]
1326; VEC2_INTERL1_PRED_STORE:       vector.ph:
1327; VEC2_INTERL1_PRED_STORE-NEXT:    [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934590
1328; VEC2_INTERL1_PRED_STORE-NEXT:    [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
1329; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP3:%.*]] = fmul fast float [[CAST_CRD]], 5.000000e-01
1330; VEC2_INTERL1_PRED_STORE-NEXT:    [[IND_END:%.*]] = fadd fast float [[TMP3]], 1.000000e+00
1331; VEC2_INTERL1_PRED_STORE-NEXT:    br label [[VECTOR_BODY:%.*]]
1332; VEC2_INTERL1_PRED_STORE:       vector.body:
1333; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ]
1334; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND:%.*]] = phi <2 x float> [ <float 1.000000e+00, float 1.500000e+00>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[VECTOR_BODY]] ]
1335; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
1336; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP5:%.*]] = bitcast float* [[TMP4]] to <2 x float>*
1337; VEC2_INTERL1_PRED_STORE-NEXT:    store <2 x float> [[VEC_IND]], <2 x float>* [[TMP5]], align 4
1338; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1339; VEC2_INTERL1_PRED_STORE-NEXT:    [[VEC_IND_NEXT]] = fadd fast <2 x float> [[VEC_IND]], <float 1.000000e+00, float 1.000000e+00>
1340; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1341; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP10:![0-9]+]]
1342; VEC2_INTERL1_PRED_STORE:       middle.block:
1343; VEC2_INTERL1_PRED_STORE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]]
1344; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[CMP_N]], label [[FOR_END]], label [[FOR_BODY]]
1345; VEC2_INTERL1_PRED_STORE:       for.body:
1346; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ]
1347; VEC2_INTERL1_PRED_STORE-NEXT:    [[X_06:%.*]] = phi float [ [[CONV1:%.*]], [[FOR_BODY]] ], [ [[IND_END]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[FOR_BODY_PREHEADER]] ]
1348; VEC2_INTERL1_PRED_STORE-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]]
1349; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[X_06]], float* [[ARRAYIDX]], align 4
1350; VEC2_INTERL1_PRED_STORE-NEXT:    [[CONV1]] = fadd fast float [[X_06]], 5.000000e-01
1351; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
1352; VEC2_INTERL1_PRED_STORE-NEXT:    [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32
1353; VEC2_INTERL1_PRED_STORE-NEXT:    [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]]
1354; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[EXITCOND]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
1355; VEC2_INTERL1_PRED_STORE:       for.end:
1356; VEC2_INTERL1_PRED_STORE-NEXT:    ret void
1357;
1358entry:
1359  %cmp4 = icmp sgt i32 %N, 0
1360  br i1 %cmp4, label %for.body.preheader, label %for.end
1361
1362for.body.preheader:                               ; preds = %entry
1363  br label %for.body
1364
1365for.body:                                         ; preds = %for.body.preheader, %for.body
1366  %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ]
1367  %x.06 = phi float [ %conv1, %for.body ], [ 1.000000e+00, %for.body.preheader ]
1368  %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv
1369  store float %x.06, float* %arrayidx, align 4
1370  %conv1 = fadd fast float %x.06, 5.000000e-01
1371  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1372  %lftr.wideiv = trunc i64 %indvars.iv.next to i32
1373  %exitcond = icmp eq i32 %lftr.wideiv, %N
1374  br i1 %exitcond, label %for.end.loopexit, label %for.body
1375
1376for.end.loopexit:                                 ; preds = %for.body
1377  br label %for.end
1378
1379for.end:                                          ; preds = %for.end.loopexit, %entry
1380  ret void
1381}
1382
1383
1384define void @non_primary_iv_float_scalar(float* %A, i64 %N) {
1385; VEC4_INTERL1-LABEL: @non_primary_iv_float_scalar(
1386; VEC4_INTERL1-NEXT:  entry:
1387; VEC4_INTERL1-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
1388; VEC4_INTERL1-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 4
1389; VEC4_INTERL1-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1390; VEC4_INTERL1:       vector.ph:
1391; VEC4_INTERL1-NEXT:    [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775804
1392; VEC4_INTERL1-NEXT:    [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
1393; VEC4_INTERL1-NEXT:    br label [[VECTOR_BODY:%.*]]
1394; VEC4_INTERL1:       vector.body:
1395; VEC4_INTERL1-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE7:%.*]] ]
1396; VEC4_INTERL1-NEXT:    [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float
1397; VEC4_INTERL1-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
1398; VEC4_INTERL1-NEXT:    [[TMP2:%.*]] = bitcast float* [[TMP1]] to <4 x float>*
1399; VEC4_INTERL1-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4
1400; VEC4_INTERL1-NEXT:    [[TMP3:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], zeroinitializer
1401; VEC4_INTERL1-NEXT:    [[TMP4:%.*]] = extractelement <4 x i1> [[TMP3]], i64 0
1402; VEC4_INTERL1-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
1403; VEC4_INTERL1:       pred.store.if:
1404; VEC4_INTERL1-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]]
1405; VEC4_INTERL1-NEXT:    store float [[TMP0]], float* [[TMP5]], align 4
1406; VEC4_INTERL1-NEXT:    br label [[PRED_STORE_CONTINUE]]
1407; VEC4_INTERL1:       pred.store.continue:
1408; VEC4_INTERL1-NEXT:    [[TMP6:%.*]] = extractelement <4 x i1> [[TMP3]], i64 1
1409; VEC4_INTERL1-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3:%.*]]
1410; VEC4_INTERL1:       pred.store.if2:
1411; VEC4_INTERL1-NEXT:    [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00
1412; VEC4_INTERL1-NEXT:    [[TMP8:%.*]] = or i64 [[INDEX]], 1
1413; VEC4_INTERL1-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP8]]
1414; VEC4_INTERL1-NEXT:    store float [[TMP7]], float* [[TMP9]], align 4
1415; VEC4_INTERL1-NEXT:    br label [[PRED_STORE_CONTINUE3]]
1416; VEC4_INTERL1:       pred.store.continue3:
1417; VEC4_INTERL1-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP3]], i64 2
1418; VEC4_INTERL1-NEXT:    br i1 [[TMP10]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
1419; VEC4_INTERL1:       pred.store.if4:
1420; VEC4_INTERL1-NEXT:    [[TMP11:%.*]] = fadd fast float [[TMP0]], 2.000000e+00
1421; VEC4_INTERL1-NEXT:    [[TMP12:%.*]] = or i64 [[INDEX]], 2
1422; VEC4_INTERL1-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]]
1423; VEC4_INTERL1-NEXT:    store float [[TMP11]], float* [[TMP13]], align 4
1424; VEC4_INTERL1-NEXT:    br label [[PRED_STORE_CONTINUE5]]
1425; VEC4_INTERL1:       pred.store.continue5:
1426; VEC4_INTERL1-NEXT:    [[TMP14:%.*]] = extractelement <4 x i1> [[TMP3]], i64 3
1427; VEC4_INTERL1-NEXT:    br i1 [[TMP14]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7]]
1428; VEC4_INTERL1:       pred.store.if6:
1429; VEC4_INTERL1-NEXT:    [[TMP15:%.*]] = fadd fast float [[TMP0]], 3.000000e+00
1430; VEC4_INTERL1-NEXT:    [[TMP16:%.*]] = or i64 [[INDEX]], 3
1431; VEC4_INTERL1-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]]
1432; VEC4_INTERL1-NEXT:    store float [[TMP15]], float* [[TMP17]], align 4
1433; VEC4_INTERL1-NEXT:    br label [[PRED_STORE_CONTINUE7]]
1434; VEC4_INTERL1:       pred.store.continue7:
1435; VEC4_INTERL1-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
1436; VEC4_INTERL1-NEXT:    [[TMP18:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1437; VEC4_INTERL1-NEXT:    br i1 [[TMP18]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
1438; VEC4_INTERL1:       middle.block:
1439; VEC4_INTERL1-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
1440; VEC4_INTERL1-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1441; VEC4_INTERL1:       scalar.ph:
1442; VEC4_INTERL1-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1443; VEC4_INTERL1-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi float [ [[CAST_CRD]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ]
1444; VEC4_INTERL1-NEXT:    br label [[FOR_BODY:%.*]]
1445; VEC4_INTERL1:       for.body:
1446; VEC4_INTERL1-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1447; VEC4_INTERL1-NEXT:    [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
1448; VEC4_INTERL1-NEXT:    [[VAR0:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]]
1449; VEC4_INTERL1-NEXT:    [[VAR1:%.*]] = load float, float* [[VAR0]], align 4
1450; VEC4_INTERL1-NEXT:    [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00
1451; VEC4_INTERL1-NEXT:    br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]]
1452; VEC4_INTERL1:       if.pred:
1453; VEC4_INTERL1-NEXT:    store float [[J]], float* [[VAR0]], align 4
1454; VEC4_INTERL1-NEXT:    br label [[FOR_INC]]
1455; VEC4_INTERL1:       for.inc:
1456; VEC4_INTERL1-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1457; VEC4_INTERL1-NEXT:    [[J_NEXT]] = fadd fast float [[J]], 1.000000e+00
1458; VEC4_INTERL1-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
1459; VEC4_INTERL1-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP13:![0-9]+]]
1460; VEC4_INTERL1:       for.end:
1461; VEC4_INTERL1-NEXT:    ret void
1462;
1463; VEC4_INTERL2-LABEL: @non_primary_iv_float_scalar(
1464; VEC4_INTERL2-NEXT:  entry:
1465; VEC4_INTERL2-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
1466; VEC4_INTERL2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 8
1467; VEC4_INTERL2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1468; VEC4_INTERL2:       vector.ph:
1469; VEC4_INTERL2-NEXT:    [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775800
1470; VEC4_INTERL2-NEXT:    [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
1471; VEC4_INTERL2-NEXT:    br label [[VECTOR_BODY:%.*]]
1472; VEC4_INTERL2:       vector.body:
1473; VEC4_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE17:%.*]] ]
1474; VEC4_INTERL2-NEXT:    [[TMP0:%.*]] = or i64 [[INDEX]], 4
1475; VEC4_INTERL2-NEXT:    [[TMP1:%.*]] = sitofp i64 [[INDEX]] to float
1476; VEC4_INTERL2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
1477; VEC4_INTERL2-NEXT:    [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>*
1478; VEC4_INTERL2-NEXT:    [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4
1479; VEC4_INTERL2-NEXT:    [[TMP4:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 4
1480; VEC4_INTERL2-NEXT:    [[TMP5:%.*]] = bitcast float* [[TMP4]] to <4 x float>*
1481; VEC4_INTERL2-NEXT:    [[WIDE_LOAD3:%.*]] = load <4 x float>, <4 x float>* [[TMP5]], align 4
1482; VEC4_INTERL2-NEXT:    [[TMP6:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD]], zeroinitializer
1483; VEC4_INTERL2-NEXT:    [[TMP7:%.*]] = fcmp fast oeq <4 x float> [[WIDE_LOAD3]], zeroinitializer
1484; VEC4_INTERL2-NEXT:    [[TMP8:%.*]] = extractelement <4 x i1> [[TMP6]], i64 0
1485; VEC4_INTERL2-NEXT:    br i1 [[TMP8]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
1486; VEC4_INTERL2:       pred.store.if:
1487; VEC4_INTERL2-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]]
1488; VEC4_INTERL2-NEXT:    store float [[TMP1]], float* [[TMP9]], align 4
1489; VEC4_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE]]
1490; VEC4_INTERL2:       pred.store.continue:
1491; VEC4_INTERL2-NEXT:    [[TMP10:%.*]] = extractelement <4 x i1> [[TMP6]], i64 1
1492; VEC4_INTERL2-NEXT:    br i1 [[TMP10]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5:%.*]]
1493; VEC4_INTERL2:       pred.store.if4:
1494; VEC4_INTERL2-NEXT:    [[TMP11:%.*]] = fadd fast float [[TMP1]], 1.000000e+00
1495; VEC4_INTERL2-NEXT:    [[TMP12:%.*]] = or i64 [[INDEX]], 1
1496; VEC4_INTERL2-NEXT:    [[TMP13:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP12]]
1497; VEC4_INTERL2-NEXT:    store float [[TMP11]], float* [[TMP13]], align 4
1498; VEC4_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE5]]
1499; VEC4_INTERL2:       pred.store.continue5:
1500; VEC4_INTERL2-NEXT:    [[TMP14:%.*]] = extractelement <4 x i1> [[TMP6]], i64 2
1501; VEC4_INTERL2-NEXT:    br i1 [[TMP14]], label [[PRED_STORE_IF6:%.*]], label [[PRED_STORE_CONTINUE7:%.*]]
1502; VEC4_INTERL2:       pred.store.if6:
1503; VEC4_INTERL2-NEXT:    [[TMP15:%.*]] = fadd fast float [[TMP1]], 2.000000e+00
1504; VEC4_INTERL2-NEXT:    [[TMP16:%.*]] = or i64 [[INDEX]], 2
1505; VEC4_INTERL2-NEXT:    [[TMP17:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP16]]
1506; VEC4_INTERL2-NEXT:    store float [[TMP15]], float* [[TMP17]], align 4
1507; VEC4_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE7]]
1508; VEC4_INTERL2:       pred.store.continue7:
1509; VEC4_INTERL2-NEXT:    [[TMP18:%.*]] = extractelement <4 x i1> [[TMP6]], i64 3
1510; VEC4_INTERL2-NEXT:    br i1 [[TMP18]], label [[PRED_STORE_IF8:%.*]], label [[PRED_STORE_CONTINUE9:%.*]]
1511; VEC4_INTERL2:       pred.store.if8:
1512; VEC4_INTERL2-NEXT:    [[TMP19:%.*]] = fadd fast float [[TMP1]], 3.000000e+00
1513; VEC4_INTERL2-NEXT:    [[TMP20:%.*]] = or i64 [[INDEX]], 3
1514; VEC4_INTERL2-NEXT:    [[TMP21:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP20]]
1515; VEC4_INTERL2-NEXT:    store float [[TMP19]], float* [[TMP21]], align 4
1516; VEC4_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE9]]
1517; VEC4_INTERL2:       pred.store.continue9:
1518; VEC4_INTERL2-NEXT:    [[TMP22:%.*]] = extractelement <4 x i1> [[TMP7]], i64 0
1519; VEC4_INTERL2-NEXT:    br i1 [[TMP22]], label [[PRED_STORE_IF10:%.*]], label [[PRED_STORE_CONTINUE11:%.*]]
1520; VEC4_INTERL2:       pred.store.if10:
1521; VEC4_INTERL2-NEXT:    [[TMP23:%.*]] = fadd fast float [[TMP1]], 4.000000e+00
1522; VEC4_INTERL2-NEXT:    [[TMP24:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP0]]
1523; VEC4_INTERL2-NEXT:    store float [[TMP23]], float* [[TMP24]], align 4
1524; VEC4_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE11]]
1525; VEC4_INTERL2:       pred.store.continue11:
1526; VEC4_INTERL2-NEXT:    [[TMP25:%.*]] = extractelement <4 x i1> [[TMP7]], i64 1
1527; VEC4_INTERL2-NEXT:    br i1 [[TMP25]], label [[PRED_STORE_IF12:%.*]], label [[PRED_STORE_CONTINUE13:%.*]]
1528; VEC4_INTERL2:       pred.store.if12:
1529; VEC4_INTERL2-NEXT:    [[TMP26:%.*]] = fadd fast float [[TMP1]], 5.000000e+00
1530; VEC4_INTERL2-NEXT:    [[TMP27:%.*]] = or i64 [[INDEX]], 5
1531; VEC4_INTERL2-NEXT:    [[TMP28:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP27]]
1532; VEC4_INTERL2-NEXT:    store float [[TMP26]], float* [[TMP28]], align 4
1533; VEC4_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE13]]
1534; VEC4_INTERL2:       pred.store.continue13:
1535; VEC4_INTERL2-NEXT:    [[TMP29:%.*]] = extractelement <4 x i1> [[TMP7]], i64 2
1536; VEC4_INTERL2-NEXT:    br i1 [[TMP29]], label [[PRED_STORE_IF14:%.*]], label [[PRED_STORE_CONTINUE15:%.*]]
1537; VEC4_INTERL2:       pred.store.if14:
1538; VEC4_INTERL2-NEXT:    [[TMP30:%.*]] = fadd fast float [[TMP1]], 6.000000e+00
1539; VEC4_INTERL2-NEXT:    [[TMP31:%.*]] = or i64 [[INDEX]], 6
1540; VEC4_INTERL2-NEXT:    [[TMP32:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP31]]
1541; VEC4_INTERL2-NEXT:    store float [[TMP30]], float* [[TMP32]], align 4
1542; VEC4_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE15]]
1543; VEC4_INTERL2:       pred.store.continue15:
1544; VEC4_INTERL2-NEXT:    [[TMP33:%.*]] = extractelement <4 x i1> [[TMP7]], i64 3
1545; VEC4_INTERL2-NEXT:    br i1 [[TMP33]], label [[PRED_STORE_IF16:%.*]], label [[PRED_STORE_CONTINUE17]]
1546; VEC4_INTERL2:       pred.store.if16:
1547; VEC4_INTERL2-NEXT:    [[TMP34:%.*]] = fadd fast float [[TMP1]], 7.000000e+00
1548; VEC4_INTERL2-NEXT:    [[TMP35:%.*]] = or i64 [[INDEX]], 7
1549; VEC4_INTERL2-NEXT:    [[TMP36:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP35]]
1550; VEC4_INTERL2-NEXT:    store float [[TMP34]], float* [[TMP36]], align 4
1551; VEC4_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE17]]
1552; VEC4_INTERL2:       pred.store.continue17:
1553; VEC4_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 8
1554; VEC4_INTERL2-NEXT:    [[TMP37:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1555; VEC4_INTERL2-NEXT:    br i1 [[TMP37]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
1556; VEC4_INTERL2:       middle.block:
1557; VEC4_INTERL2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
1558; VEC4_INTERL2-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1559; VEC4_INTERL2:       scalar.ph:
1560; VEC4_INTERL2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1561; VEC4_INTERL2-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi float [ [[CAST_CRD]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ]
1562; VEC4_INTERL2-NEXT:    br label [[FOR_BODY:%.*]]
1563; VEC4_INTERL2:       for.body:
1564; VEC4_INTERL2-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1565; VEC4_INTERL2-NEXT:    [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
1566; VEC4_INTERL2-NEXT:    [[VAR0:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]]
1567; VEC4_INTERL2-NEXT:    [[VAR1:%.*]] = load float, float* [[VAR0]], align 4
1568; VEC4_INTERL2-NEXT:    [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00
1569; VEC4_INTERL2-NEXT:    br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]]
1570; VEC4_INTERL2:       if.pred:
1571; VEC4_INTERL2-NEXT:    store float [[J]], float* [[VAR0]], align 4
1572; VEC4_INTERL2-NEXT:    br label [[FOR_INC]]
1573; VEC4_INTERL2:       for.inc:
1574; VEC4_INTERL2-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1575; VEC4_INTERL2-NEXT:    [[J_NEXT]] = fadd fast float [[J]], 1.000000e+00
1576; VEC4_INTERL2-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
1577; VEC4_INTERL2-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP13:![0-9]+]]
1578; VEC4_INTERL2:       for.end:
1579; VEC4_INTERL2-NEXT:    ret void
1580;
1581; VEC1_INTERL2-LABEL: @non_primary_iv_float_scalar(
1582; VEC1_INTERL2-NEXT:  entry:
1583; VEC1_INTERL2-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
1584; VEC1_INTERL2-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 2
1585; VEC1_INTERL2-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]]
1586; VEC1_INTERL2:       vector.ph:
1587; VEC1_INTERL2-NEXT:    [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806
1588; VEC1_INTERL2-NEXT:    [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
1589; VEC1_INTERL2-NEXT:    br label [[VECTOR_BODY:%.*]]
1590; VEC1_INTERL2:       vector.body:
1591; VEC1_INTERL2-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE5:%.*]] ]
1592; VEC1_INTERL2-NEXT:    [[INDUCTION2:%.*]] = or i64 [[INDEX]], 1
1593; VEC1_INTERL2-NEXT:    [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float
1594; VEC1_INTERL2-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
1595; VEC1_INTERL2-NEXT:    [[TMP2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDUCTION2]]
1596; VEC1_INTERL2-NEXT:    [[TMP3:%.*]] = load float, float* [[TMP1]], align 4
1597; VEC1_INTERL2-NEXT:    [[TMP4:%.*]] = load float, float* [[TMP2]], align 4
1598; VEC1_INTERL2-NEXT:    [[TMP5:%.*]] = fcmp fast oeq float [[TMP3]], 0.000000e+00
1599; VEC1_INTERL2-NEXT:    [[TMP6:%.*]] = fcmp fast oeq float [[TMP4]], 0.000000e+00
1600; VEC1_INTERL2-NEXT:    br i1 [[TMP5]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
1601; VEC1_INTERL2:       pred.store.if:
1602; VEC1_INTERL2-NEXT:    store float [[TMP0]], float* [[TMP1]], align 4
1603; VEC1_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE]]
1604; VEC1_INTERL2:       pred.store.continue:
1605; VEC1_INTERL2-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF4:%.*]], label [[PRED_STORE_CONTINUE5]]
1606; VEC1_INTERL2:       pred.store.if4:
1607; VEC1_INTERL2-NEXT:    [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00
1608; VEC1_INTERL2-NEXT:    store float [[TMP7]], float* [[TMP2]], align 4
1609; VEC1_INTERL2-NEXT:    br label [[PRED_STORE_CONTINUE5]]
1610; VEC1_INTERL2:       pred.store.continue5:
1611; VEC1_INTERL2-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1612; VEC1_INTERL2-NEXT:    [[TMP8:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1613; VEC1_INTERL2-NEXT:    br i1 [[TMP8]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP11:![0-9]+]]
1614; VEC1_INTERL2:       middle.block:
1615; VEC1_INTERL2-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
1616; VEC1_INTERL2-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]]
1617; VEC1_INTERL2:       scalar.ph:
1618; VEC1_INTERL2-NEXT:    [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1619; VEC1_INTERL2-NEXT:    [[BC_RESUME_VAL1:%.*]] = phi float [ [[CAST_CRD]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ]
1620; VEC1_INTERL2-NEXT:    br label [[FOR_BODY:%.*]]
1621; VEC1_INTERL2:       for.body:
1622; VEC1_INTERL2-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ]
1623; VEC1_INTERL2-NEXT:    [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[BC_RESUME_VAL1]], [[SCALAR_PH]] ]
1624; VEC1_INTERL2-NEXT:    [[VAR0:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]]
1625; VEC1_INTERL2-NEXT:    [[VAR1:%.*]] = load float, float* [[VAR0]], align 4
1626; VEC1_INTERL2-NEXT:    [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00
1627; VEC1_INTERL2-NEXT:    br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]]
1628; VEC1_INTERL2:       if.pred:
1629; VEC1_INTERL2-NEXT:    store float [[J]], float* [[VAR0]], align 4
1630; VEC1_INTERL2-NEXT:    br label [[FOR_INC]]
1631; VEC1_INTERL2:       for.inc:
1632; VEC1_INTERL2-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1633; VEC1_INTERL2-NEXT:    [[J_NEXT]] = fadd fast float [[J]], 1.000000e+00
1634; VEC1_INTERL2-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
1635; VEC1_INTERL2-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP12:![0-9]+]]
1636; VEC1_INTERL2:       for.end:
1637; VEC1_INTERL2-NEXT:    ret void
1638;
1639; VEC2_INTERL1_PRED_STORE-LABEL: @non_primary_iv_float_scalar(
1640; VEC2_INTERL1_PRED_STORE-NEXT:  entry:
1641; VEC2_INTERL1_PRED_STORE-NEXT:    [[SMAX:%.*]] = call i64 @llvm.smax.i64(i64 [[N:%.*]], i64 1)
1642; VEC2_INTERL1_PRED_STORE-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[SMAX]], 2
1643; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[MIN_ITERS_CHECK]], label [[FOR_BODY:%.*]], label [[VECTOR_PH:%.*]]
1644; VEC2_INTERL1_PRED_STORE:       vector.ph:
1645; VEC2_INTERL1_PRED_STORE-NEXT:    [[N_VEC:%.*]] = and i64 [[SMAX]], 9223372036854775806
1646; VEC2_INTERL1_PRED_STORE-NEXT:    [[CAST_CRD:%.*]] = sitofp i64 [[N_VEC]] to float
1647; VEC2_INTERL1_PRED_STORE-NEXT:    br label [[VECTOR_BODY:%.*]]
1648; VEC2_INTERL1_PRED_STORE:       vector.body:
1649; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_STORE_CONTINUE3:%.*]] ]
1650; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP0:%.*]] = sitofp i64 [[INDEX]] to float
1651; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]]
1652; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP2:%.*]] = bitcast float* [[TMP1]] to <2 x float>*
1653; VEC2_INTERL1_PRED_STORE-NEXT:    [[WIDE_LOAD:%.*]] = load <2 x float>, <2 x float>* [[TMP2]], align 4
1654; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP3:%.*]] = fcmp fast oeq <2 x float> [[WIDE_LOAD]], zeroinitializer
1655; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP4:%.*]] = extractelement <2 x i1> [[TMP3]], i64 0
1656; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[TMP4]], label [[PRED_STORE_IF:%.*]], label [[PRED_STORE_CONTINUE:%.*]]
1657; VEC2_INTERL1_PRED_STORE:       pred.store.if:
1658; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDEX]]
1659; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[TMP0]], float* [[TMP5]], align 4
1660; VEC2_INTERL1_PRED_STORE-NEXT:    br label [[PRED_STORE_CONTINUE]]
1661; VEC2_INTERL1_PRED_STORE:       pred.store.continue:
1662; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP6:%.*]] = extractelement <2 x i1> [[TMP3]], i64 1
1663; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[TMP6]], label [[PRED_STORE_IF2:%.*]], label [[PRED_STORE_CONTINUE3]]
1664; VEC2_INTERL1_PRED_STORE:       pred.store.if2:
1665; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP7:%.*]] = fadd fast float [[TMP0]], 1.000000e+00
1666; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP8:%.*]] = or i64 [[INDEX]], 1
1667; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP8]]
1668; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[TMP7]], float* [[TMP9]], align 4
1669; VEC2_INTERL1_PRED_STORE-NEXT:    br label [[PRED_STORE_CONTINUE3]]
1670; VEC2_INTERL1_PRED_STORE:       pred.store.continue3:
1671; VEC2_INTERL1_PRED_STORE-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 2
1672; VEC2_INTERL1_PRED_STORE-NEXT:    [[TMP10:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
1673; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[TMP10]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP12:![0-9]+]]
1674; VEC2_INTERL1_PRED_STORE:       middle.block:
1675; VEC2_INTERL1_PRED_STORE-NEXT:    [[CMP_N:%.*]] = icmp eq i64 [[SMAX]], [[N_VEC]]
1676; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[FOR_BODY]]
1677; VEC2_INTERL1_PRED_STORE:       for.body:
1678; VEC2_INTERL1_PRED_STORE-NEXT:    [[I:%.*]] = phi i64 [ [[I_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ]
1679; VEC2_INTERL1_PRED_STORE-NEXT:    [[J:%.*]] = phi float [ [[J_NEXT:%.*]], [[FOR_INC]] ], [ [[CAST_CRD]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[ENTRY]] ]
1680; VEC2_INTERL1_PRED_STORE-NEXT:    [[VAR0:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[I]]
1681; VEC2_INTERL1_PRED_STORE-NEXT:    [[VAR1:%.*]] = load float, float* [[VAR0]], align 4
1682; VEC2_INTERL1_PRED_STORE-NEXT:    [[VAR2:%.*]] = fcmp fast oeq float [[VAR1]], 0.000000e+00
1683; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[VAR2]], label [[IF_PRED:%.*]], label [[FOR_INC]]
1684; VEC2_INTERL1_PRED_STORE:       if.pred:
1685; VEC2_INTERL1_PRED_STORE-NEXT:    store float [[J]], float* [[VAR0]], align 4
1686; VEC2_INTERL1_PRED_STORE-NEXT:    br label [[FOR_INC]]
1687; VEC2_INTERL1_PRED_STORE:       for.inc:
1688; VEC2_INTERL1_PRED_STORE-NEXT:    [[I_NEXT]] = add nuw nsw i64 [[I]], 1
1689; VEC2_INTERL1_PRED_STORE-NEXT:    [[J_NEXT]] = fadd fast float [[J]], 1.000000e+00
1690; VEC2_INTERL1_PRED_STORE-NEXT:    [[COND:%.*]] = icmp slt i64 [[I_NEXT]], [[N]]
1691; VEC2_INTERL1_PRED_STORE-NEXT:    br i1 [[COND]], label [[FOR_BODY]], label [[FOR_END]], !llvm.loop [[LOOP13:![0-9]+]]
1692; VEC2_INTERL1_PRED_STORE:       for.end:
1693; VEC2_INTERL1_PRED_STORE-NEXT:    ret void
1694;
1695entry:
1696  br label %for.body
1697
1698for.body:
1699  %i = phi i64 [ %i.next, %for.inc ], [ 0, %entry ]
1700  %j = phi float [ %j.next, %for.inc ], [ 0.0, %entry ]
1701  %var0 = getelementptr inbounds float, float* %A, i64 %i
1702  %var1 = load float, float* %var0, align 4
1703  %var2 = fcmp fast oeq float %var1, 0.0
1704  br i1 %var2, label %if.pred, label %for.inc
1705
1706if.pred:
1707  store float %j, float* %var0, align 4
1708  br label %for.inc
1709
1710for.inc:
1711  %i.next = add nuw nsw i64 %i, 1
1712  %j.next = fadd fast float %j, 1.0
1713  %cond = icmp slt i64 %i.next, %N
1714  br i1 %cond, label %for.body, label %for.end
1715
1716for.end:
1717  ret void
1718}
1719