1; RUN: opt -loop-vectorize -scalable-vectorization=on -force-target-instruction-cost=1 -force-target-supports-scalable-vectors -dce -instcombine < %s -S | FileCheck %s 2 3; Test that we can add on the induction variable 4; for (long long i = 0; i < n; i++) { 5; a[i] = b[i] + i; 6; } 7; with an unroll factor (interleave count) of 2. 8 9define void @add_ind64_unrolled(i64* noalias nocapture %a, i64* noalias nocapture readonly %b, i64 %n) { 10; CHECK-LABEL: @add_ind64_unrolled( 11; CHECK-NEXT: entry: 12; CHECK: vector.body: 13; CHECK-NEXT: %[[INDEX:.*]] = phi i64 [ 0, %vector.ph ], [ %{{.*}}, %vector.body ] 14; CHECK-NEXT: %[[STEPVEC:.*]] = call <vscale x 2 x i64> @llvm.experimental.stepvector.nxv2i64() 15; CHECK-NEXT: %[[TMP1:.*]] = insertelement <vscale x 2 x i64> poison, i64 %[[INDEX]], i32 0 16; CHECK-NEXT: %[[IDXSPLT:.*]] = shufflevector <vscale x 2 x i64> %[[TMP1]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 17; CHECK-NEXT: %[[VECIND1:.*]] = add <vscale x 2 x i64> %[[IDXSPLT]], %[[STEPVEC]] 18; CHECK-NEXT: %[[VSCALE:.*]] = call i64 @llvm.vscale.i64() 19; CHECK-NEXT: %[[EC:.*]] = shl i64 %[[VSCALE]], 1 20; CHECK-NEXT: %[[TMP2:.*]] = insertelement <vscale x 2 x i64> poison, i64 %[[EC]], i32 0 21; CHECK-NEXT: %[[ECSPLT:.*]] = shufflevector <vscale x 2 x i64> %[[TMP2]], <vscale x 2 x i64> poison, <vscale x 2 x i32> zeroinitializer 22; CHECK-NEXT: %[[TMP3:.*]] = add <vscale x 2 x i64> %[[ECSPLT]], %[[STEPVEC]] 23; CHECK-NEXT: %[[VECIND2:.*]] = add <vscale x 2 x i64> %[[IDXSPLT]], %[[TMP3]] 24; CHECK: %[[LOAD1:.*]] = load <vscale x 2 x i64> 25; CHECK: %[[LOAD2:.*]] = load <vscale x 2 x i64> 26; CHECK: %[[STOREVAL1:.*]] = add nsw <vscale x 2 x i64> %[[LOAD1]], %[[VECIND1]] 27; CHECK: %[[STOREVAL2:.*]] = add nsw <vscale x 2 x i64> %[[LOAD2]], %[[VECIND2]] 28; CHECK: store <vscale x 2 x i64> %[[STOREVAL1]] 29; CHECK: store <vscale x 2 x i64> %[[STOREVAL2]] 30 31entry: 32 br label %for.body 33 34for.body: ; preds = %entry, %for.body 35 %i.08 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 36 %arrayidx = getelementptr inbounds i64, i64* %b, i64 %i.08 37 %0 = load i64, i64* %arrayidx, align 8 38 %add = add nsw i64 %0, %i.08 39 %arrayidx1 = getelementptr inbounds i64, i64* %a, i64 %i.08 40 store i64 %add, i64* %arrayidx1, align 8 41 %inc = add nuw nsw i64 %i.08, 1 42 %exitcond.not = icmp eq i64 %inc, %n 43 br i1 %exitcond.not, label %exit, label %for.body, !llvm.loop !0 44 45exit: ; preds = %for.body 46 ret void 47} 48 49 50; Same as above, except we test with a vectorisation factor of (1, scalable) 51 52define void @add_ind64_unrolled_nxv1i64(i64* noalias nocapture %a, i64* noalias nocapture readonly %b, i64 %n) { 53; CHECK-LABEL: @add_ind64_unrolled_nxv1i64( 54; CHECK-NEXT: entry: 55; CHECK: vector.body: 56; CHECK-NEXT: %[[INDEX:.*]] = phi i64 [ 0, %vector.ph ], [ %{{.*}}, %vector.body ] 57; CHECK-NEXT: %[[STEPVEC:.*]] = call <vscale x 1 x i64> @llvm.experimental.stepvector.nxv1i64() 58; CHECK-NEXT: %[[TMP1:.*]] = insertelement <vscale x 1 x i64> poison, i64 %[[INDEX]], i32 0 59; CHECK-NEXT: %[[IDXSPLT:.*]] = shufflevector <vscale x 1 x i64> %[[TMP1]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 60; CHECK-NEXT: %[[VECIND1:.*]] = add <vscale x 1 x i64> %[[IDXSPLT]], %[[STEPVEC]] 61; CHECK-NEXT: %[[EC:.*]] = call i64 @llvm.vscale.i64() 62; CHECK-NEXT: %[[TMP2:.*]] = insertelement <vscale x 1 x i64> poison, i64 %[[EC]], i32 0 63; CHECK-NEXT: %[[ECSPLT:.*]] = shufflevector <vscale x 1 x i64> %[[TMP2]], <vscale x 1 x i64> poison, <vscale x 1 x i32> zeroinitializer 64; CHECK-NEXT: %[[TMP3:.*]] = add <vscale x 1 x i64> %[[ECSPLT]], %[[STEPVEC]] 65; CHECK-NEXT: %[[VECIND2:.*]] = add <vscale x 1 x i64> %[[IDXSPLT]], %[[TMP3]] 66; CHECK: %[[LOAD1:.*]] = load <vscale x 1 x i64> 67; CHECK: %[[LOAD2:.*]] = load <vscale x 1 x i64> 68; CHECK: %[[STOREVAL1:.*]] = add nsw <vscale x 1 x i64> %[[LOAD1]], %[[VECIND1]] 69; CHECK: %[[STOREVAL2:.*]] = add nsw <vscale x 1 x i64> %[[LOAD2]], %[[VECIND2]] 70; CHECK: store <vscale x 1 x i64> %[[STOREVAL1]] 71; CHECK: store <vscale x 1 x i64> %[[STOREVAL2]] 72 73entry: 74 br label %for.body 75 76for.body: ; preds = %entry, %for.body 77 %i.08 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 78 %arrayidx = getelementptr inbounds i64, i64* %b, i64 %i.08 79 %0 = load i64, i64* %arrayidx, align 8 80 %add = add nsw i64 %0, %i.08 81 %arrayidx1 = getelementptr inbounds i64, i64* %a, i64 %i.08 82 store i64 %add, i64* %arrayidx1, align 8 83 %inc = add nuw nsw i64 %i.08, 1 84 %exitcond.not = icmp eq i64 %inc, %n 85 br i1 %exitcond.not, label %exit, label %for.body, !llvm.loop !9 86 87exit: ; preds = %for.body 88 ret void 89} 90 91 92; Test that we can vectorize a separate induction variable (not used for the branch) 93; int r = 0; 94; for (long long i = 0; i < n; i++) { 95; a[i] = r; 96; r += 2; 97; } 98; with an unroll factor (interleave count) of 1. 99 100 101define void @add_unique_ind32(i32* noalias nocapture %a, i64 %n) { 102; CHECK-LABEL: @add_unique_ind32( 103; CHECK: vector.ph: 104; CHECK: %[[STEPVEC:.*]] = call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32() 105; CHECK-NEXT: %[[INDINIT:.*]] = shl <vscale x 4 x i32> %[[STEPVEC]], shufflevector (<vscale x 4 x i32> insertelement (<vscale x 4 x i32> poison, i32 1, i32 0), <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer) 106; CHECK-NEXT: %[[VSCALE:.*]] = call i32 @llvm.vscale.i32() 107; CHECK-NEXT: %[[INC:.*]] = shl i32 %[[VSCALE]], 3 108; CHECK-NEXT: %[[TMP:.*]] = insertelement <vscale x 4 x i32> poison, i32 %[[INC]], i32 0 109; CHECK-NEXT: %[[VECINC:.*]] = shufflevector <vscale x 4 x i32> %[[TMP]], <vscale x 4 x i32> poison, <vscale x 4 x i32> zeroinitializer 110; CHECK: vector.body: 111; CHECK: %[[VECIND:.*]] = phi <vscale x 4 x i32> [ %[[INDINIT]], %vector.ph ], [ %[[VECINDNXT:.*]], %vector.body ] 112; CHECK: store <vscale x 4 x i32> %[[VECIND]] 113; CHECK: %[[VECINDNXT]] = add <vscale x 4 x i32> %[[VECIND]], %[[VECINC]] 114entry: 115 br label %for.body 116 117for.body: ; preds = %entry, %for.body 118 %i.08 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 119 %r.07 = phi i32 [ %add, %for.body ], [ 0, %entry ] 120 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %i.08 121 store i32 %r.07, i32* %arrayidx, align 4 122 %add = add nuw nsw i32 %r.07, 2 123 %inc = add nuw nsw i64 %i.08, 1 124 %exitcond.not = icmp eq i64 %inc, %n 125 br i1 %exitcond.not, label %exit, label %for.body, !llvm.loop !6 126 127exit: ; preds = %for.body 128 ret void 129} 130 131 132; Test that we can vectorize a separate FP induction variable (not used for the branch) 133; float r = 0; 134; for (long long i = 0; i < n; i++) { 135; a[i] = r; 136; r += 2; 137; } 138; with an unroll factor (interleave count) of 1. 139 140define void @add_unique_indf32(float* noalias nocapture %a, i64 %n) { 141; CHECK-LABEL: @add_unique_indf32( 142; CHECK: vector.ph: 143; CHECK: %[[STEPVEC:.*]] = call <vscale x 4 x i32> @llvm.experimental.stepvector.nxv4i32() 144; CHECK-NEXT: %[[TMP1:.*]] = uitofp <vscale x 4 x i32> %[[STEPVEC]] to <vscale x 4 x float> 145; CHECK-NEXT: %[[TMP2:.*]] = fmul <vscale x 4 x float> %[[TMP1]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 2.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) 146; CHECK-NEXT: %[[INDINIT:.*]] = fadd <vscale x 4 x float> %[[TMP2]], zeroinitializer 147; CHECK-NEXT: %[[VSCALE:.*]] = call i32 @llvm.vscale.i32() 148; CHECK-NEXT: %[[TMP3:.*]] = shl i32 %8, 2 149; CHECK-NEXT: %[[TMP4:.*]] = uitofp i32 %[[TMP3]] to float 150; CHECK-NEXT: %[[INC:.*]] = fmul float %[[TMP4]], 2.000000e+00 151; CHECK-NEXT: %[[TMP5:.*]] = insertelement <vscale x 4 x float> poison, float %[[INC]], i32 0 152; CHECK-NEXT: %[[VECINC:.*]] = shufflevector <vscale x 4 x float> %[[TMP5]], <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer 153; CHECK: vector.body: 154; CHECK: %[[VECIND:.*]] = phi <vscale x 4 x float> [ %[[INDINIT]], %vector.ph ], [ %[[VECINDNXT:.*]], %vector.body ] 155; CHECK: store <vscale x 4 x float> %[[VECIND]] 156; CHECK: %[[VECINDNXT]] = fadd <vscale x 4 x float> %[[VECIND]], %[[VECINC]] 157 158entry: 159 br label %for.body 160 161for.body: ; preds = %entry, %for.body 162 %i.08 = phi i64 [ %inc, %for.body ], [ 0, %entry ] 163 %r.07 = phi float [ %add, %for.body ], [ 0.000000e+00, %entry ] 164 %arrayidx = getelementptr inbounds float, float* %a, i64 %i.08 165 store float %r.07, float* %arrayidx, align 4 166 %add = fadd float %r.07, 2.000000e+00 167 %inc = add nuw nsw i64 %i.08, 1 168 %exitcond.not = icmp eq i64 %inc, %n 169 br i1 %exitcond.not, label %exit, label %for.body, !llvm.loop !6 170 171exit: ; preds = %for.body 172 ret void 173} 174 175!0 = distinct !{!0, !1, !2, !3, !4, !5} 176!1 = !{!"llvm.loop.mustprogress"} 177!2 = !{!"llvm.loop.vectorize.width", i32 2} 178!3 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} 179!4 = !{!"llvm.loop.interleave.count", i32 2} 180!5 = !{!"llvm.loop.vectorize.enable", i1 true} 181!6 = distinct !{!6, !1, !7, !3, !8, !5} 182!7 = !{!"llvm.loop.vectorize.width", i32 4} 183!8 = !{!"llvm.loop.interleave.count", i32 1} 184!9 = distinct !{!9, !1, !10, !3, !4, !5} 185!10 = !{!"llvm.loop.vectorize.width", i32 1} 186