1; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL1 %s 2; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=4 -dce -instcombine -S | FileCheck --check-prefix VEC4_INTERL2 %s 3; RUN: opt < %s -loop-vectorize -force-vector-interleave=2 -force-vector-width=1 -dce -instcombine -S | FileCheck --check-prefix VEC1_INTERL2 %s 4 5; VEC4_INTERL1-LABEL: @fp_iv_loop1( 6; VEC4_INTERL1: %[[FP_INC:.*]] = load float, float* @fp_inc 7; VEC4_INTERL1: vector.body: 8; VEC4_INTERL1: %[[FP_INDEX:.*]] = sitofp i64 {{.*}} to float 9; VEC4_INTERL1: %[[VEC_INCR:.*]] = fmul fast float {{.*}}, %[[FP_INDEX]] 10; VEC4_INTERL1: %[[FP_OFFSET_IDX:.*]] = fsub fast float %init, %[[VEC_INCR]] 11; VEC4_INTERL1: %[[BRCT_INSERT:.*]] = insertelement <4 x float> undef, float %[[FP_OFFSET_IDX]], i32 0 12; VEC4_INTERL1-NEXT: %[[BRCT_SPLAT:.*]] = shufflevector <4 x float> %[[BRCT_INSERT]], <4 x float> undef, <4 x i32> zeroinitializer 13; VEC4_INTERL1: %[[BRCT_INSERT:.*]] = insertelement {{.*}} %[[FP_INC]] 14; VEC4_INTERL1-NEXT: %[[FP_INC_BCST:.*]] = shufflevector <4 x float> %[[BRCT_INSERT]], {{.*}} zeroinitializer 15; VEC4_INTERL1: %[[VSTEP:.*]] = fmul fast <4 x float> %[[FP_INC_BCST]], <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00> 16; VEC4_INTERL1-NEXT: %[[VEC_INDUCTION:.*]] = fsub fast <4 x float> %[[BRCT_SPLAT]], %[[VSTEP]] 17; VEC4_INTERL1: store <4 x float> %[[VEC_INDUCTION]] 18 19; VEC4_INTERL2-LABEL: @fp_iv_loop1( 20; VEC4_INTERL2: %[[FP_INC:.*]] = load float, float* @fp_inc 21; VEC4_INTERL2: vector.body: 22; VEC4_INTERL2: %[[INDEX:.*]] = sitofp i64 {{.*}} to float 23; VEC4_INTERL2: %[[VEC_INCR:.*]] = fmul fast float %{{.*}}, %[[INDEX]] 24; VEC4_INTERL2: fsub fast float %init, %[[VEC_INCR]] 25; VEC4_INTERL2: %[[VSTEP1:.*]] = fmul fast <4 x float> %{{.*}}, <float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00> 26; VEC4_INTERL2-NEXT: %[[VEC_INDUCTION1:.*]] = fsub fast <4 x float> {{.*}}, %[[VSTEP1]] 27; VEC4_INTERL2: %[[VSTEP2:.*]] = fmul fast <4 x float> %{{.*}}, <float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00> 28; VEC4_INTERL2-NEXT: %[[VEC_INDUCTION2:.*]] = fsub fast <4 x float> {{.*}}, %[[VSTEP2]] 29; VEC4_INTERL2: store <4 x float> %[[VEC_INDUCTION1]] 30; VEC4_INTERL2: store <4 x float> %[[VEC_INDUCTION2]] 31 32; VEC1_INTERL2-LABEL: @fp_iv_loop1( 33; VEC1_INTERL2: %[[FP_INC:.*]] = load float, float* @fp_inc 34; VEC1_INTERL2: vector.body: 35; VEC1_INTERL2: %[[INDEX:.*]] = sitofp i64 {{.*}} to float 36; VEC1_INTERL2: %[[STEP:.*]] = fmul fast float %{{.*}}, %[[INDEX]] 37; VEC1_INTERL2: %[[FP_OFFSET_IDX:.*]] = fsub fast float %init, %[[STEP]] 38; VEC1_INTERL2: %[[SCALAR_INDUCTION2:.*]] = fsub fast float %[[FP_OFFSET_IDX]], %[[FP_INC]] 39; VEC1_INTERL2: store float %[[FP_OFFSET_IDX]] 40; VEC1_INTERL2: store float %[[SCALAR_INDUCTION2]] 41 42@fp_inc = common global float 0.000000e+00, align 4 43 44;void fp_iv_loop1(float init, float * __restrict__ A, int N) { 45; float x = init; 46; for (int i=0; i < N; ++i) { 47; A[i] = x; 48; x -= fp_inc; 49; } 50;} 51 52define void @fp_iv_loop1(float %init, float* noalias nocapture %A, i32 %N) #1 { 53entry: 54 %cmp4 = icmp sgt i32 %N, 0 55 br i1 %cmp4, label %for.body.lr.ph, label %for.end 56 57for.body.lr.ph: ; preds = %entry 58 %fpinc = load float, float* @fp_inc, align 4 59 br label %for.body 60 61for.body: ; preds = %for.body, %for.body.lr.ph 62 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] 63 %x.05 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ] 64 %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv 65 store float %x.05, float* %arrayidx, align 4 66 %add = fsub fast float %x.05, %fpinc 67 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 68 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 69 %exitcond = icmp eq i32 %lftr.wideiv, %N 70 br i1 %exitcond, label %for.end.loopexit, label %for.body 71 72for.end.loopexit: ; preds = %for.body 73 br label %for.end 74 75for.end: ; preds = %for.end.loopexit, %entry 76 ret void 77} 78 79;void fp_iv_loop2(float init, float * __restrict__ A, int N) { 80; float x = init; 81; for (int i=0; i < N; ++i) { 82; A[i] = x; 83; x += 0.5; 84; } 85;} 86 87; VEC4_INTERL1-LABEL: @fp_iv_loop2( 88; VEC4_INTERL1: vector.body 89; VEC4_INTERL1: %[[index:.*]] = phi i64 [ 0, %vector.ph ] 90; VEC4_INTERL1: sitofp i64 %[[index]] to float 91; VEC4_INTERL1: %[[VAR1:.*]] = fmul fast float {{.*}}, 5.000000e-01 92; VEC4_INTERL1: %[[VAR2:.*]] = fadd fast float %[[VAR1]] 93; VEC4_INTERL1: insertelement <4 x float> undef, float %[[VAR2]], i32 0 94; VEC4_INTERL1: shufflevector <4 x float> {{.*}}, <4 x float> undef, <4 x i32> zeroinitializer 95; VEC4_INTERL1: fadd fast <4 x float> {{.*}}, <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float 1.500000e+00> 96; VEC4_INTERL1: store <4 x float> 97 98define void @fp_iv_loop2(float %init, float* noalias nocapture %A, i32 %N) #0 { 99entry: 100 %cmp4 = icmp sgt i32 %N, 0 101 br i1 %cmp4, label %for.body.preheader, label %for.end 102 103for.body.preheader: ; preds = %entry 104 br label %for.body 105 106for.body: ; preds = %for.body.preheader, %for.body 107 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 108 %x.06 = phi float [ %conv1, %for.body ], [ %init, %for.body.preheader ] 109 %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv 110 store float %x.06, float* %arrayidx, align 4 111 %conv1 = fadd fast float %x.06, 5.000000e-01 112 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 113 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 114 %exitcond = icmp eq i32 %lftr.wideiv, %N 115 br i1 %exitcond, label %for.end.loopexit, label %for.body 116 117for.end.loopexit: ; preds = %for.body 118 br label %for.end 119 120for.end: ; preds = %for.end.loopexit, %entry 121 ret void 122} 123 124;void fp_iv_loop3(float init, float * __restrict__ A, float * __restrict__ B, float * __restrict__ C, int N) { 125; int i = 0; 126; float x = init; 127; float y = 0.1; 128; for (; i < N; ++i) { 129; A[i] = x; 130; x += fp_inc; 131; y -= 0.5; 132; B[i] = x + y; 133; C[i] = y; 134; } 135;} 136; VEC4_INTERL1-LABEL: @fp_iv_loop3( 137; VEC4_INTERL1: vector.body 138; VEC4_INTERL1: %[[index:.*]] = phi i64 [ 0, %vector.ph ] 139; VEC4_INTERL1: sitofp i64 %[[index]] to float 140; VEC4_INTERL1: %[[VAR1:.*]] = fmul fast float {{.*}}, -5.000000e-01 141; VEC4_INTERL1: fadd fast float %[[VAR1]] 142; VEC4_INTERL1: fadd fast <4 x float> {{.*}}, <float -5.000000e-01, float -1.000000e+00, float -1.500000e+00, float -2.000000e+00> 143; VEC4_INTERL1: store <4 x float> 144 145define void @fp_iv_loop3(float %init, float* noalias nocapture %A, float* noalias nocapture %B, float* noalias nocapture %C, i32 %N) #1 { 146entry: 147 %cmp9 = icmp sgt i32 %N, 0 148 br i1 %cmp9, label %for.body.lr.ph, label %for.end 149 150for.body.lr.ph: ; preds = %entry 151 %0 = load float, float* @fp_inc, align 4 152 br label %for.body 153 154for.body: ; preds = %for.body, %for.body.lr.ph 155 %indvars.iv = phi i64 [ 0, %for.body.lr.ph ], [ %indvars.iv.next, %for.body ] 156 %y.012 = phi float [ 0x3FB99999A0000000, %for.body.lr.ph ], [ %conv1, %for.body ] 157 %x.011 = phi float [ %init, %for.body.lr.ph ], [ %add, %for.body ] 158 %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv 159 store float %x.011, float* %arrayidx, align 4 160 %add = fadd fast float %x.011, %0 161 %conv1 = fadd fast float %y.012, -5.000000e-01 162 %add2 = fadd fast float %conv1, %add 163 %arrayidx4 = getelementptr inbounds float, float* %B, i64 %indvars.iv 164 store float %add2, float* %arrayidx4, align 4 165 %arrayidx6 = getelementptr inbounds float, float* %C, i64 %indvars.iv 166 store float %conv1, float* %arrayidx6, align 4 167 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 168 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 169 %exitcond = icmp eq i32 %lftr.wideiv, %N 170 br i1 %exitcond, label %for.end.loopexit, label %for.body 171 172for.end.loopexit: 173 br label %for.end 174 175for.end: 176 ret void 177} 178 179; Start and step values are constants. There is no 'fmul' operation in this case 180;void fp_iv_loop4(float * __restrict__ A, int N) { 181; float x = 1.0; 182; for (int i=0; i < N; ++i) { 183; A[i] = x; 184; x += 0.5; 185; } 186;} 187 188; VEC4_INTERL1-LABEL: @fp_iv_loop4( 189; VEC4_INTERL1: vector.body 190; VEC4_INTERL1-NOT: fmul fast <4 x float> 191; VEC4_INTERL1: %[[induction:.*]] = fadd fast <4 x float> %{{.*}}, <float 0.000000e+00, float 5.000000e-01, float 1.000000e+00, float 1.500000e+00> 192; VEC4_INTERL1: store <4 x float> %[[induction]] 193 194define void @fp_iv_loop4(float* noalias nocapture %A, i32 %N) { 195entry: 196 %cmp4 = icmp sgt i32 %N, 0 197 br i1 %cmp4, label %for.body.preheader, label %for.end 198 199for.body.preheader: ; preds = %entry 200 br label %for.body 201 202for.body: ; preds = %for.body.preheader, %for.body 203 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %for.body.preheader ] 204 %x.06 = phi float [ %conv1, %for.body ], [ 1.000000e+00, %for.body.preheader ] 205 %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv 206 store float %x.06, float* %arrayidx, align 4 207 %conv1 = fadd fast float %x.06, 5.000000e-01 208 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 209 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 210 %exitcond = icmp eq i32 %lftr.wideiv, %N 211 br i1 %exitcond, label %for.end.loopexit, label %for.body 212 213for.end.loopexit: ; preds = %for.body 214 br label %for.end 215 216for.end: ; preds = %for.end.loopexit, %entry 217 ret void 218} 219