1; RUN: opt < %s -loop-vectorize -enable-vplan-native-path -debug-only=loop-vectorize -S 2>&1 | FileCheck %s 2; REQUIRES: asserts 3 4; Verify that outer loops annotated only with the expected explicit 5; vectorization hints are collected for vectorization instead of inner loops. 6 7; Root C/C++ source code for all the test cases 8; void foo(int *a, int *b, int N, int M) 9; { 10; int i, j; 11; #pragma clang loop vectorize(enable) 12; for (i = 0; i < N; i++) { 13; for (j = 0; j < M; j++) { 14; a[i*M+j] = b[i*M+j] * b[i*M+j]; 15; } 16; } 17; } 18 19; Case 1: Annotated outer loop WITH vector width information must be collected. 20 21; CHECK-LABEL: vector_width 22; CHECK: LV: Loop hints: force=enabled width=4 unroll=0 23; CHECK: LV: We can vectorize this outer loop! 24; CHECK: LV: Using user VF 4. 25; CHECK-NOT: LV: Loop hints: force=? 26; CHECK-NOT: LV: Found a loop: inner.body 27 28target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 29 30define void @vector_width(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr { 31entry: 32 %cmp32 = icmp sgt i32 %N, 0 33 br i1 %cmp32, label %outer.ph, label %for.end15 34 35outer.ph: ; preds = %entry 36 %cmp230 = icmp sgt i32 %M, 0 37 %0 = sext i32 %M to i64 38 %wide.trip.count = zext i32 %M to i64 39 %wide.trip.count38 = zext i32 %N to i64 40 br label %outer.body 41 42outer.body: ; preds = %outer.inc, %outer.ph 43 %indvars.iv35 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next36, %outer.inc ] 44 br i1 %cmp230, label %inner.ph, label %outer.inc 45 46inner.ph: ; preds = %outer.body 47 %1 = mul nsw i64 %indvars.iv35, %0 48 br label %inner.body 49 50inner.body: ; preds = %inner.body, %inner.ph 51 %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ] 52 %2 = add nsw i64 %indvars.iv, %1 53 %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2 54 %3 = load i32, i32* %arrayidx, align 4, !tbaa !2 55 %mul8 = mul nsw i32 %3, %3 56 %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2 57 store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2 58 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 59 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 60 br i1 %exitcond, label %outer.inc, label %inner.body 61 62outer.inc: ; preds = %inner.body, %outer.body 63 %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1 64 %exitcond39 = icmp eq i64 %indvars.iv.next36, %wide.trip.count38 65 br i1 %exitcond39, label %for.end15, label %outer.body, !llvm.loop !6 66 67for.end15: ; preds = %outer.inc, %entry 68 ret void 69} 70 71; Case 2: Annotated outer loop WITHOUT vector width information doesn't have to 72; be collected. 73 74; CHECK-LABEL: case2 75; CHECK-NOT: LV: Loop hints: force=enabled 76; CHECK-NOT: LV: We can vectorize this outer loop! 77; CHECK: LV: Loop hints: force=? 78; CHECK: LV: Found a loop: inner.body 79 80define void @case2(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr { 81entry: 82 %cmp32 = icmp sgt i32 %N, 0 83 br i1 %cmp32, label %outer.ph, label %for.end15 84 85outer.ph: ; preds = %entry 86 %cmp230 = icmp sgt i32 %M, 0 87 %0 = sext i32 %M to i64 88 %wide.trip.count = zext i32 %M to i64 89 %wide.trip.count38 = zext i32 %N to i64 90 br label %outer.body 91 92outer.body: ; preds = %outer.inc, %outer.ph 93 %indvars.iv35 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next36, %outer.inc ] 94 br i1 %cmp230, label %inner.ph, label %outer.inc 95 96inner.ph: ; preds = %outer.body 97 %1 = mul nsw i64 %indvars.iv35, %0 98 br label %inner.body 99 100inner.body: ; preds = %inner.body, %inner.ph 101 %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ] 102 %2 = add nsw i64 %indvars.iv, %1 103 %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2 104 %3 = load i32, i32* %arrayidx, align 4, !tbaa !2 105 %mul8 = mul nsw i32 %3, %3 106 %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2 107 store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2 108 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 109 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 110 br i1 %exitcond, label %outer.inc, label %inner.body 111 112outer.inc: ; preds = %inner.body, %outer.body 113 %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1 114 %exitcond39 = icmp eq i64 %indvars.iv.next36, %wide.trip.count38 115 br i1 %exitcond39, label %for.end15, label %outer.body, !llvm.loop !9 116 117for.end15: ; preds = %outer.inc, %entry 118 ret void 119} 120 121; Case 3: Annotated outer loop WITH vector width and interleave information 122; doesn't have to be collected. 123 124; CHECK-LABEL: case3 125; CHECK-NOT: LV: Loop hints: force=enabled 126; CHECK-NOT: LV: We can vectorize this outer loop! 127; CHECK: LV: Loop hints: force=? 128; CHECK: LV: Found a loop: inner.body 129 130define void @case3(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr { 131entry: 132 %cmp32 = icmp sgt i32 %N, 0 133 br i1 %cmp32, label %outer.ph, label %for.end15 134 135outer.ph: ; preds = %entry 136 %cmp230 = icmp sgt i32 %M, 0 137 %0 = sext i32 %M to i64 138 %wide.trip.count = zext i32 %M to i64 139 %wide.trip.count38 = zext i32 %N to i64 140 br label %outer.body 141 142outer.body: ; preds = %outer.inc, %outer.ph 143 %indvars.iv35 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next36, %outer.inc ] 144 br i1 %cmp230, label %inner.ph, label %outer.inc 145 146inner.ph: ; preds = %outer.body 147 %1 = mul nsw i64 %indvars.iv35, %0 148 br label %inner.body 149 150inner.body: ; preds = %inner.body, %inner.ph 151 %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ] 152 %2 = add nsw i64 %indvars.iv, %1 153 %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2 154 %3 = load i32, i32* %arrayidx, align 4, !tbaa !2 155 %mul8 = mul nsw i32 %3, %3 156 %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2 157 store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2 158 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 159 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 160 br i1 %exitcond, label %outer.inc, label %inner.body 161 162outer.inc: ; preds = %inner.body, %outer.body 163 %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1 164 %exitcond39 = icmp eq i64 %indvars.iv.next36, %wide.trip.count38 165 br i1 %exitcond39, label %for.end15, label %outer.body, !llvm.loop !11 166 167for.end15: ; preds = %outer.inc, %entry 168 ret void 169} 170 171; Case 4: Outer loop without any explicit vectorization annotation doesn't have 172; to be collected. 173 174; CHECK-LABEL: case4 175; CHECK-NOT: LV: Loop hints: force=enabled 176; CHECK-NOT: LV: We can vectorize this outer loop! 177; CHECK: LV: Loop hints: force=? 178; CHECK: LV: Found a loop: inner.body 179 180define void @case4(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr { 181entry: 182 %cmp32 = icmp sgt i32 %N, 0 183 br i1 %cmp32, label %outer.ph, label %for.end15 184 185outer.ph: ; preds = %entry 186 %cmp230 = icmp sgt i32 %M, 0 187 %0 = sext i32 %M to i64 188 %wide.trip.count = zext i32 %M to i64 189 %wide.trip.count38 = zext i32 %N to i64 190 br label %outer.body 191 192outer.body: ; preds = %outer.inc, %outer.ph 193 %indvars.iv35 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next36, %outer.inc ] 194 br i1 %cmp230, label %inner.ph, label %outer.inc 195 196inner.ph: ; preds = %outer.body 197 %1 = mul nsw i64 %indvars.iv35, %0 198 br label %inner.body 199 200inner.body: ; preds = %inner.body, %inner.ph 201 %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ] 202 %2 = add nsw i64 %indvars.iv, %1 203 %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2 204 %3 = load i32, i32* %arrayidx, align 4, !tbaa !2 205 %mul8 = mul nsw i32 %3, %3 206 %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2 207 store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2 208 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 209 %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count 210 br i1 %exitcond, label %outer.inc, label %inner.body 211 212outer.inc: ; preds = %inner.body, %outer.body 213 %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1 214 %exitcond39 = icmp eq i64 %indvars.iv.next36, %wide.trip.count38 215 br i1 %exitcond39, label %for.end15, label %outer.body 216 217for.end15: ; preds = %outer.inc, %entry 218 ret void 219} 220 221!llvm.module.flags = !{!0} 222!llvm.ident = !{!1} 223 224!0 = !{i32 1, !"wchar_size", i32 4} 225!1 = !{!"clang version 6.0.0"} 226!2 = !{!3, !3, i64 0} 227!3 = !{!"int", !4, i64 0} 228!4 = !{!"omnipotent char", !5, i64 0} 229!5 = !{!"Simple C/C++ TBAA"} 230; Case 1 231!6 = distinct !{!6, !7, !8} 232!7 = !{!"llvm.loop.vectorize.width", i32 4} 233!8 = !{!"llvm.loop.vectorize.enable", i1 true} 234; Case 2 235!9 = distinct !{!9, !8} 236; Case 3 237!10 = !{!"llvm.loop.interleave.count", i32 2} 238!11 = distinct !{!11, !7, !10, !8} 239