1*2e14900dSBjorn Pettersson; RUN: opt < %s -passes=loop-vectorize -enable-vplan-native-path -debug-only=loop-vectorize -S 2>&1 | FileCheck %s 2cee313d2SEric Christopher; REQUIRES: asserts 3cee313d2SEric Christopher 4cee313d2SEric Christopher; Verify that outer loops annotated only with the expected explicit 5cee313d2SEric Christopher; vectorization hints are collected for vectorization instead of inner loops. 6cee313d2SEric Christopher 7cee313d2SEric Christopher; Root C/C++ source code for all the test cases 8cee313d2SEric Christopher; void foo(int *a, int *b, int N, int M) 9cee313d2SEric Christopher; { 10cee313d2SEric Christopher; int i, j; 11cee313d2SEric Christopher; #pragma clang loop vectorize(enable) 12cee313d2SEric Christopher; for (i = 0; i < N; i++) { 13cee313d2SEric Christopher; for (j = 0; j < M; j++) { 14cee313d2SEric Christopher; a[i*M+j] = b[i*M+j] * b[i*M+j]; 15cee313d2SEric Christopher; } 16cee313d2SEric Christopher; } 17cee313d2SEric Christopher; } 18cee313d2SEric Christopher 19cee313d2SEric Christopher; Case 1: Annotated outer loop WITH vector width information must be collected. 20cee313d2SEric Christopher 21cee313d2SEric Christopher; CHECK-LABEL: vector_width 22ddb3b26aSBardia Mahjour; CHECK: LV: Loop hints: force=enabled width=4 interleave=0 23cee313d2SEric Christopher; CHECK: LV: We can vectorize this outer loop! 24cee313d2SEric Christopher; CHECK: LV: Using user VF 4 to build VPlans. 25cee313d2SEric Christopher; CHECK-NOT: LV: Loop hints: force=? 26cee313d2SEric Christopher; CHECK-NOT: LV: Found a loop: inner.body 27cee313d2SEric Christopher 28cee313d2SEric Christophertarget datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 29cee313d2SEric Christopher 30cee313d2SEric Christopherdefine void @vector_width(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr { 31cee313d2SEric Christopherentry: 32cee313d2SEric Christopher %cmp32 = icmp sgt i32 %N, 0 33cee313d2SEric Christopher br i1 %cmp32, label %outer.ph, label %for.end15 34cee313d2SEric Christopher 35cee313d2SEric Christopherouter.ph: ; preds = %entry 36cee313d2SEric Christopher %cmp230 = icmp sgt i32 %M, 0 37cee313d2SEric Christopher %0 = sext i32 %M to i64 38cee313d2SEric Christopher %wide.trip.count = zext i32 %M to i64 39cee313d2SEric Christopher %wide.trip.count38 = zext i32 %N to i64 40cee313d2SEric Christopher br label %outer.body 41cee313d2SEric Christopher 42cee313d2SEric Christopherouter.body: ; preds = %outer.inc, %outer.ph 43cee313d2SEric Christopher %indvars.iv35 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next36, %outer.inc ] 44cee313d2SEric Christopher br i1 %cmp230, label %inner.ph, label %outer.inc 45cee313d2SEric Christopher 46cee313d2SEric Christopherinner.ph: ; preds = %outer.body 47cee313d2SEric Christopher %1 = mul nsw i64 %indvars.iv35, %0 48cee313d2SEric Christopher br label %inner.body 49cee313d2SEric Christopher 50cee313d2SEric Christopherinner.body: ; preds = %inner.body, %inner.ph 51cee313d2SEric Christopher %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ] 52cee313d2SEric Christopher %2 = add nsw i64 %indvars.iv, %1 53cee313d2SEric Christopher %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2 54cee313d2SEric Christopher %3 = load i32, i32* %arrayidx, align 4, !tbaa !2 55cee313d2SEric Christopher %mul8 = mul nsw i32 %3, %3 56cee313d2SEric Christopher %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2 57cee313d2SEric Christopher store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2 58cee313d2SEric Christopher %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 59cee313d2SEric Christopher %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 60cee313d2SEric Christopher br i1 %exitcond, label %outer.inc, label %inner.body 61cee313d2SEric Christopher 62cee313d2SEric Christopherouter.inc: ; preds = %inner.body, %outer.body 63cee313d2SEric Christopher %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1 64cee313d2SEric Christopher %exitcond39 = icmp eq i64 %indvars.iv.next36, %wide.trip.count38 65cee313d2SEric Christopher br i1 %exitcond39, label %for.end15, label %outer.body, !llvm.loop !6 66cee313d2SEric Christopher 67cee313d2SEric Christopherfor.end15: ; preds = %outer.inc, %entry 68cee313d2SEric Christopher ret void 69cee313d2SEric Christopher} 70cee313d2SEric Christopher 71cee313d2SEric Christopher; Case 2: Annotated outer loop WITHOUT vector width information must be collected. 72cee313d2SEric Christopher 73cee313d2SEric Christopher; CHECK-LABEL: case2 74ddb3b26aSBardia Mahjour; CHECK: LV: Loop hints: force=enabled width=0 interleave=0 75cee313d2SEric Christopher; CHECK: LV: We can vectorize this outer loop! 76cee313d2SEric Christopher; CHECK: LV: Using VF 1 to build VPlans. 77cee313d2SEric Christopher 78cee313d2SEric Christopherdefine void @case2(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr { 79cee313d2SEric Christopherentry: 80cee313d2SEric Christopher %cmp32 = icmp sgt i32 %N, 0 81cee313d2SEric Christopher br i1 %cmp32, label %outer.ph, label %for.end15 82cee313d2SEric Christopher 83cee313d2SEric Christopherouter.ph: ; preds = %entry 84cee313d2SEric Christopher %cmp230 = icmp sgt i32 %M, 0 85cee313d2SEric Christopher %0 = sext i32 %M to i64 86cee313d2SEric Christopher %wide.trip.count = zext i32 %M to i64 87cee313d2SEric Christopher %wide.trip.count38 = zext i32 %N to i64 88cee313d2SEric Christopher br label %outer.body 89cee313d2SEric Christopher 90cee313d2SEric Christopherouter.body: ; preds = %outer.inc, %outer.ph 91cee313d2SEric Christopher %indvars.iv35 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next36, %outer.inc ] 92cee313d2SEric Christopher br i1 %cmp230, label %inner.ph, label %outer.inc 93cee313d2SEric Christopher 94cee313d2SEric Christopherinner.ph: ; preds = %outer.body 95cee313d2SEric Christopher %1 = mul nsw i64 %indvars.iv35, %0 96cee313d2SEric Christopher br label %inner.body 97cee313d2SEric Christopher 98cee313d2SEric Christopherinner.body: ; preds = %inner.body, %inner.ph 99cee313d2SEric Christopher %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ] 100cee313d2SEric Christopher %2 = add nsw i64 %indvars.iv, %1 101cee313d2SEric Christopher %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2 102cee313d2SEric Christopher %3 = load i32, i32* %arrayidx, align 4, !tbaa !2 103cee313d2SEric Christopher %mul8 = mul nsw i32 %3, %3 104cee313d2SEric Christopher %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2 105cee313d2SEric Christopher store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2 106cee313d2SEric Christopher %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 107cee313d2SEric Christopher %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 108cee313d2SEric Christopher br i1 %exitcond, label %outer.inc, label %inner.body 109cee313d2SEric Christopher 110cee313d2SEric Christopherouter.inc: ; preds = %inner.body, %outer.body 111cee313d2SEric Christopher %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1 112cee313d2SEric Christopher %exitcond39 = icmp eq i64 %indvars.iv.next36, %wide.trip.count38 113cee313d2SEric Christopher br i1 %exitcond39, label %for.end15, label %outer.body, !llvm.loop !9 114cee313d2SEric Christopher 115cee313d2SEric Christopherfor.end15: ; preds = %outer.inc, %entry 116cee313d2SEric Christopher ret void 117cee313d2SEric Christopher} 118cee313d2SEric Christopher 119cee313d2SEric Christopher; Case 3: Annotated outer loop WITH vector width and interleave information 120cee313d2SEric Christopher; doesn't have to be collected. 121cee313d2SEric Christopher 122cee313d2SEric Christopher; CHECK-LABEL: case3 123cee313d2SEric Christopher; CHECK-NOT: LV: Loop hints: force=enabled 124cee313d2SEric Christopher; CHECK-NOT: LV: We can vectorize this outer loop! 125cee313d2SEric Christopher; CHECK: LV: Loop hints: force=? 126cee313d2SEric Christopher; CHECK: LV: Found a loop: inner.body 127cee313d2SEric Christopher 128cee313d2SEric Christopherdefine void @case3(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr { 129cee313d2SEric Christopherentry: 130cee313d2SEric Christopher %cmp32 = icmp sgt i32 %N, 0 131cee313d2SEric Christopher br i1 %cmp32, label %outer.ph, label %for.end15 132cee313d2SEric Christopher 133cee313d2SEric Christopherouter.ph: ; preds = %entry 134cee313d2SEric Christopher %cmp230 = icmp sgt i32 %M, 0 135cee313d2SEric Christopher %0 = sext i32 %M to i64 136cee313d2SEric Christopher %wide.trip.count = zext i32 %M to i64 137cee313d2SEric Christopher %wide.trip.count38 = zext i32 %N to i64 138cee313d2SEric Christopher br label %outer.body 139cee313d2SEric Christopher 140cee313d2SEric Christopherouter.body: ; preds = %outer.inc, %outer.ph 141cee313d2SEric Christopher %indvars.iv35 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next36, %outer.inc ] 142cee313d2SEric Christopher br i1 %cmp230, label %inner.ph, label %outer.inc 143cee313d2SEric Christopher 144cee313d2SEric Christopherinner.ph: ; preds = %outer.body 145cee313d2SEric Christopher %1 = mul nsw i64 %indvars.iv35, %0 146cee313d2SEric Christopher br label %inner.body 147cee313d2SEric Christopher 148cee313d2SEric Christopherinner.body: ; preds = %inner.body, %inner.ph 149cee313d2SEric Christopher %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ] 150cee313d2SEric Christopher %2 = add nsw i64 %indvars.iv, %1 151cee313d2SEric Christopher %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2 152cee313d2SEric Christopher %3 = load i32, i32* %arrayidx, align 4, !tbaa !2 153cee313d2SEric Christopher %mul8 = mul nsw i32 %3, %3 154cee313d2SEric Christopher %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2 155cee313d2SEric Christopher store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2 156cee313d2SEric Christopher %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 157cee313d2SEric Christopher %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 158cee313d2SEric Christopher br i1 %exitcond, label %outer.inc, label %inner.body 159cee313d2SEric Christopher 160cee313d2SEric Christopherouter.inc: ; preds = %inner.body, %outer.body 161cee313d2SEric Christopher %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1 162cee313d2SEric Christopher %exitcond39 = icmp eq i64 %indvars.iv.next36, %wide.trip.count38 163cee313d2SEric Christopher br i1 %exitcond39, label %for.end15, label %outer.body, !llvm.loop !11 164cee313d2SEric Christopher 165cee313d2SEric Christopherfor.end15: ; preds = %outer.inc, %entry 166cee313d2SEric Christopher ret void 167cee313d2SEric Christopher} 168cee313d2SEric Christopher 169cee313d2SEric Christopher; Case 4: Outer loop without any explicit vectorization annotation doesn't have 170cee313d2SEric Christopher; to be collected. 171cee313d2SEric Christopher 172cee313d2SEric Christopher; CHECK-LABEL: case4 173cee313d2SEric Christopher; CHECK-NOT: LV: Loop hints: force=enabled 174cee313d2SEric Christopher; CHECK-NOT: LV: We can vectorize this outer loop! 175cee313d2SEric Christopher; CHECK: LV: Loop hints: force=? 176cee313d2SEric Christopher; CHECK: LV: Found a loop: inner.body 177cee313d2SEric Christopher 178cee313d2SEric Christopherdefine void @case4(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr { 179cee313d2SEric Christopherentry: 180cee313d2SEric Christopher %cmp32 = icmp sgt i32 %N, 0 181cee313d2SEric Christopher br i1 %cmp32, label %outer.ph, label %for.end15 182cee313d2SEric Christopher 183cee313d2SEric Christopherouter.ph: ; preds = %entry 184cee313d2SEric Christopher %cmp230 = icmp sgt i32 %M, 0 185cee313d2SEric Christopher %0 = sext i32 %M to i64 186cee313d2SEric Christopher %wide.trip.count = zext i32 %M to i64 187cee313d2SEric Christopher %wide.trip.count38 = zext i32 %N to i64 188cee313d2SEric Christopher br label %outer.body 189cee313d2SEric Christopher 190cee313d2SEric Christopherouter.body: ; preds = %outer.inc, %outer.ph 191cee313d2SEric Christopher %indvars.iv35 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next36, %outer.inc ] 192cee313d2SEric Christopher br i1 %cmp230, label %inner.ph, label %outer.inc 193cee313d2SEric Christopher 194cee313d2SEric Christopherinner.ph: ; preds = %outer.body 195cee313d2SEric Christopher %1 = mul nsw i64 %indvars.iv35, %0 196cee313d2SEric Christopher br label %inner.body 197cee313d2SEric Christopher 198cee313d2SEric Christopherinner.body: ; preds = %inner.body, %inner.ph 199cee313d2SEric Christopher %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ] 200cee313d2SEric Christopher %2 = add nsw i64 %indvars.iv, %1 201cee313d2SEric Christopher %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2 202cee313d2SEric Christopher %3 = load i32, i32* %arrayidx, align 4, !tbaa !2 203cee313d2SEric Christopher %mul8 = mul nsw i32 %3, %3 204cee313d2SEric Christopher %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2 205cee313d2SEric Christopher store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2 206cee313d2SEric Christopher %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 207cee313d2SEric Christopher %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 208cee313d2SEric Christopher br i1 %exitcond, label %outer.inc, label %inner.body 209cee313d2SEric Christopher 210cee313d2SEric Christopherouter.inc: ; preds = %inner.body, %outer.body 211cee313d2SEric Christopher %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1 212cee313d2SEric Christopher %exitcond39 = icmp eq i64 %indvars.iv.next36, %wide.trip.count38 213cee313d2SEric Christopher br i1 %exitcond39, label %for.end15, label %outer.body 214cee313d2SEric Christopher 215cee313d2SEric Christopherfor.end15: ; preds = %outer.inc, %entry 216cee313d2SEric Christopher ret void 217cee313d2SEric Christopher} 218cee313d2SEric Christopher 219cee313d2SEric Christopher!llvm.module.flags = !{!0} 220cee313d2SEric Christopher!llvm.ident = !{!1} 221cee313d2SEric Christopher 222cee313d2SEric Christopher!0 = !{i32 1, !"wchar_size", i32 4} 223cee313d2SEric Christopher!1 = !{!"clang version 6.0.0"} 224cee313d2SEric Christopher!2 = !{!3, !3, i64 0} 225cee313d2SEric Christopher!3 = !{!"int", !4, i64 0} 226cee313d2SEric Christopher!4 = !{!"omnipotent char", !5, i64 0} 227cee313d2SEric Christopher!5 = !{!"Simple C/C++ TBAA"} 228cee313d2SEric Christopher; Case 1 229cee313d2SEric Christopher!6 = distinct !{!6, !7, !8} 230cee313d2SEric Christopher!7 = !{!"llvm.loop.vectorize.width", i32 4} 231cee313d2SEric Christopher!8 = !{!"llvm.loop.vectorize.enable", i1 true} 232cee313d2SEric Christopher; Case 2 233cee313d2SEric Christopher!9 = distinct !{!9, !8} 234cee313d2SEric Christopher; Case 3 235cee313d2SEric Christopher!10 = !{!"llvm.loop.interleave.count", i32 2} 236cee313d2SEric Christopher!11 = distinct !{!11, !7, !10, !8} 237