1cef0de5eSMauri Mustonen; RUN: opt -loop-vectorize -force-vector-width=4 -enable-vplan-native-path -S %s | FileCheck %s 2cef0de5eSMauri Mustonen 3cef0de5eSMauri Mustonen; Vectorize explict marked outer loop using vplan native path. Inner loop 4cef0de5eSMauri Mustonen; contains simple double add reduction. IR is compiled and modified by hand 5cef0de5eSMauri Mustonen; from following C code: 6cef0de5eSMauri Mustonen; void inner_loop_reduction(const double* restrict in_a, const double* restrict in_b, double* restrict out) 7cef0de5eSMauri Mustonen; { 8cef0de5eSMauri Mustonen; #pragma clang loop vectorize(enable) 9cef0de5eSMauri Mustonen; for (int i = 0; i < 1000; ++i) { 10cef0de5eSMauri Mustonen; double a = in_a[i]; 11cef0de5eSMauri Mustonen; double b = in_b[i]; 12cef0de5eSMauri Mustonen; for (int j = 0; j < 10000; ++j) { 13cef0de5eSMauri Mustonen; a = a + b; 14cef0de5eSMauri Mustonen; } 15cef0de5eSMauri Mustonen; out[i] = a; 16cef0de5eSMauri Mustonen; } 17cef0de5eSMauri Mustonen; } 18cef0de5eSMauri Mustonendefine void @inner_loop_reduction(double* noalias nocapture readonly %a.in, double* noalias nocapture readonly %b.in, double* noalias nocapture %c.out) { 19cef0de5eSMauri Mustonen; CHECK-LABEL: @inner_loop_reduction( 20*872f7000SDávid Bolvanský 21cef0de5eSMauri Mustonen; CHECK: vector.body: 22*872f7000SDávid Bolvanský; CHECK-NEXT: %[[FOR1_INDEX:.*]] = phi i64 [ 0, %[[LABEL_PR:.*]] ], [ %{{.*}}, %[[LABEL_FOR1_LATCH:.*]] ] 23*872f7000SDávid Bolvanský; CHECK: %[[VEC_INDEX:.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, %[[LABEL_PR]] ], [ %{{.*}}, %[[LABEL_FOR1_LATCH]] ] 24*872f7000SDávid Bolvanský; CHECK-NEXT: %[[A_PTR:.*]] = getelementptr inbounds double, double* %a.in, <4 x i64> %[[VEC_INDEX]] 25*872f7000SDávid Bolvanský; CHECK-NEXT: %[[MASKED_GATHER1:.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %[[A_PTR]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef) 26*872f7000SDávid Bolvanský; CHECK-NEXT: %[[B_PTR:.*]] = getelementptr inbounds double, double* %b.in, <4 x i64> %[[VEC_INDEX]] 27*872f7000SDávid Bolvanský; CHECK-NEXT: %[[MASKED_GATHER2:.*]] = call <4 x double> @llvm.masked.gather.v4f64.v4p0f64(<4 x double*> %[[B_PTR]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>, <4 x double> undef) 28*872f7000SDávid Bolvanský; CHECK-NEXT: br label %[[FOR2_HEADER:.*]] 29cef0de5eSMauri Mustonen 30*872f7000SDávid Bolvanský; CHECK: [[FOR2_HEADER]]: 31*872f7000SDávid Bolvanský; CHECK-NEXT: %[[FOR2_INDEX:.*]] = phi <4 x i32> [ zeroinitializer, %vector.body ], [ %[[FOR2_INDEX_NEXT:.*]], %[[FOR2_HEADER]] ] 32*872f7000SDávid Bolvanský; CHECK-NEXT: %[[REDUCTION:.*]] = phi <4 x double> [ %[[MASKED_GATHER1]], %vector.body ], [ %[[REDUCTION_NEXT:.*]], %[[FOR2_HEADER]] ] 33*872f7000SDávid Bolvanský; CHECK-NEXT: %[[REDUCTION_NEXT]] = fadd <4 x double> %[[MASKED_GATHER2]], %[[REDUCTION]] 34*872f7000SDávid Bolvanský; CHECK-NEXT: %[[FOR2_INDEX_NEXT]] = add nuw nsw <4 x i32> %[[FOR2_INDEX]], <i32 1, i32 1, i32 1, i32 1> 35*872f7000SDávid Bolvanský; CHECK-NEXT: %[[VEC_PTR:.*]] = icmp eq <4 x i32> %[[FOR2_INDEX_NEXT]], <i32 10000, i32 10000, i32 10000, i32 10000> 36*872f7000SDávid Bolvanský; CHECK-NEXT: %[[EXIT_COND:.*]] = extractelement <4 x i1> %[[VEC_PTR]], i32 0 37*872f7000SDávid Bolvanský; CHECK-NEXT: br i1 %[[EXIT_COND]], label %[[FOR1_LATCH:.*]], label %{{.*}} 38cef0de5eSMauri Mustonen 39*872f7000SDávid Bolvanský; CHECK: [[FOR1_LATCH]]: 40*872f7000SDávid Bolvanský; CHECK-NEXT: %[[REDUCTION:.*]] = phi <4 x double> [ %[[REDUCTION_NEXT]], %[[FOR2_HEADER]] ] 41*872f7000SDávid Bolvanský; CHECK-NEXT: %[[C_PTR:.*]] = getelementptr inbounds double, double* %c.out, <4 x i64> %[[VEC_INDEX]] 42*872f7000SDávid Bolvanský; CHECK-NEXT: call void @llvm.masked.scatter.v4f64.v4p0f64(<4 x double> %[[REDUCTION]], <4 x double*> %[[C_PTR]], i32 8, <4 x i1> <i1 true, i1 true, i1 true, i1 true>) 43*872f7000SDávid Bolvanský; CHECK-NEXT: %[[VEC_INDEX_NEXT:.*]] = add nuw nsw <4 x i64> %[[VEC_INDEX]], <i64 1, i64 1, i64 1, i64 1> 44*872f7000SDávid Bolvanský; CHECK-NEXT: %[[VEC_PTR:.*]] = icmp eq <4 x i64> %[[VEC_INDEX_NEXT]], <i64 1000, i64 1000, i64 1000, i64 1000> 45*872f7000SDávid Bolvanský; CHECK-NEXT: %[[FOR1_INDEX_NEXT:.*]] = add nuw i64 %[[FOR1_INDEX]], 4 46*872f7000SDávid Bolvanský; CHECK-NEXT: %{{.*}} = add <4 x i64> %[[VEC_INDEX]], <i64 4, i64 4, i64 4, i64 4> 47*872f7000SDávid Bolvanský; CHECK-NEXT: %[[EXIT_COND:.*]] = icmp eq i64 %[[FOR1_INDEX_NEXT]], 1000 48*872f7000SDávid Bolvanský; CHECK-NEXT: br i1 %[[EXIT_COND]], label %{{.*}}, label %vector.body 49cef0de5eSMauri Mustonen 50cef0de5eSMauri Mustonenentry: 51cef0de5eSMauri Mustonen br label %for1.header 52cef0de5eSMauri Mustonen 53cef0de5eSMauri Mustonenfor1.header: ; preds = %entry 54cef0de5eSMauri Mustonen %indvar1 = phi i64 [ 0, %entry ], [ %indvar11, %for1.latch ] 55cef0de5eSMauri Mustonen %a.ptr = getelementptr inbounds double, double* %a.in, i64 %indvar1 56cef0de5eSMauri Mustonen %a = load double, double* %a.ptr, align 8 57cef0de5eSMauri Mustonen %b.ptr = getelementptr inbounds double, double* %b.in, i64 %indvar1 58cef0de5eSMauri Mustonen %b = load double, double* %b.ptr, align 8 59cef0de5eSMauri Mustonen br label %for2.header 60cef0de5eSMauri Mustonen 61cef0de5eSMauri Mustonenfor2.header: ; preds = %for1.header, %for2.header 62cef0de5eSMauri Mustonen %indvar2 = phi i32 [ 0, %for1.header ], [ %indvar21, %for2.header ] 63cef0de5eSMauri Mustonen %a.reduction = phi double [ %a, %for1.header ], [ %a.reduction1, %for2.header ] 64cef0de5eSMauri Mustonen %a.reduction1 = fadd double %b, %a.reduction 65cef0de5eSMauri Mustonen %indvar21 = add nuw nsw i32 %indvar2, 1 66cef0de5eSMauri Mustonen %for2.cond = icmp eq i32 %indvar21, 10000 67cef0de5eSMauri Mustonen br i1 %for2.cond, label %for1.latch, label %for2.header 68cef0de5eSMauri Mustonen 69cef0de5eSMauri Mustonenfor1.latch: ; preds = %for2.header 70cef0de5eSMauri Mustonen %c.ptr = getelementptr inbounds double, double* %c.out, i64 %indvar1 71cef0de5eSMauri Mustonen store double %a.reduction1, double* %c.ptr, align 8 72cef0de5eSMauri Mustonen %indvar11 = add nuw nsw i64 %indvar1, 1 73cef0de5eSMauri Mustonen %for1.cond = icmp eq i64 %indvar11, 1000 74cef0de5eSMauri Mustonen br i1 %for1.cond, label %exit, label %for1.header, !llvm.loop !0 75cef0de5eSMauri Mustonen 76cef0de5eSMauri Mustonenexit: ; preds = %for1.latch 77cef0de5eSMauri Mustonen ret void 78cef0de5eSMauri Mustonen} 79cef0de5eSMauri Mustonen 80cef0de5eSMauri Mustonen!0 = distinct !{!0, !1} 81cef0de5eSMauri Mustonen!1 = !{!"llvm.loop.vectorize.enable", i1 true} 82