1; RUN: opt < %s -passes=loop-vectorize -enable-vplan-native-path -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S 2>&1 | FileCheck %s 2; REQUIRES: asserts 3 4; Verify that LV bails out on explicit vectorization outer loops that contain 5; divergent inner loops. 6 7; Root C/C++ source code for all the test cases 8; void foo(int *a, int *b, int N, int M) 9; { 10; int i, j; 11; #pragma clang loop vectorize(enable) vectorize_width(8) 12; for (i = 0; i < N; i++) { 13; // Tested inner loop. It will be replaced per test. 14; for (j = 0; j < M; j++) { 15; a[i*M+j] = b[i*M+j] * b[i*M+j]; 16; } 17; } 18; } 19 20; Case 1 (for (j = i; j < M; j++)): Inner loop with divergent IV start. 21 22; CHECK-LABEL: iv_start 23; CHECK: LV: Not vectorizing: Outer loop contains divergent loops. 24; CHECK: LV: Not vectorizing: Unsupported outer loop. 25 26target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" 27 28define void @iv_start(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr { 29entry: 30 %cmp33 = icmp sgt i32 %N, 0 31 br i1 %cmp33, label %outer.ph, label %for.end15 32 33outer.ph: ; preds = %entry 34 %0 = sext i32 %M to i64 35 %wide.trip.count = zext i32 %M to i64 36 %wide.trip.count41 = zext i32 %N to i64 37 br label %outer.body 38 39outer.body: ; preds = %outer.inc, %outer.ph 40 %indvars.iv38 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next39, %outer.inc ] 41 %cmp231 = icmp slt i64 %indvars.iv38, %0 42 br i1 %cmp231, label %inner.ph, label %outer.inc 43 44inner.ph: ; preds = %outer.body 45 %1 = mul nsw i64 %indvars.iv38, %0 46 br label %inner.body 47 48inner.body: ; preds = %inner.body, %inner.ph 49 %indvars.iv35 = phi i64 [ %indvars.iv38, %inner.ph ], [ %indvars.iv.next36, %inner.body ] 50 %2 = add nsw i64 %indvars.iv35, %1 51 %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2 52 %3 = load i32, i32* %arrayidx, align 4, !tbaa !2 53 %mul8 = mul nsw i32 %3, %3 54 %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2 55 store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2 56 %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1 57 %exitcond = icmp eq i64 %indvars.iv.next36, %wide.trip.count 58 br i1 %exitcond, label %outer.inc, label %inner.body 59 60outer.inc: ; preds = %inner.body, %outer.body 61 %indvars.iv.next39 = add nuw nsw i64 %indvars.iv38, 1 62 %exitcond42 = icmp eq i64 %indvars.iv.next39, %wide.trip.count41 63 br i1 %exitcond42, label %for.end15, label %outer.body, !llvm.loop !6 64 65for.end15: ; preds = %outer.inc, %entry 66 ret void 67} 68 69 70; Case 2 (for (j = 0; j < i; j++)): Inner loop with divergent upper-bound. 71 72; CHECK-LABEL: loop_ub 73; CHECK: LV: Not vectorizing: Outer loop contains divergent loops. 74; CHECK: LV: Not vectorizing: Unsupported outer loop. 75 76define void @loop_ub(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr { 77entry: 78 %cmp32 = icmp sgt i32 %N, 0 79 br i1 %cmp32, label %outer.ph, label %for.end15 80 81outer.ph: ; preds = %entry 82 %0 = sext i32 %M to i64 83 %wide.trip.count41 = zext i32 %N to i64 84 br label %outer.body 85 86outer.body: ; preds = %outer.inc, %outer.ph 87 %indvars.iv38 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next39, %outer.inc ] 88 %cmp230 = icmp eq i64 %indvars.iv38, 0 89 br i1 %cmp230, label %outer.inc, label %inner.ph 90 91inner.ph: ; preds = %outer.body 92 %1 = mul nsw i64 %indvars.iv38, %0 93 br label %inner.body 94 95inner.body: ; preds = %inner.body, %inner.ph 96 %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ] 97 %2 = add nsw i64 %indvars.iv, %1 98 %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2 99 %3 = load i32, i32* %arrayidx, align 4, !tbaa !2 100 %mul8 = mul nsw i32 %3, %3 101 %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2 102 store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2 103 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 104 %exitcond = icmp eq i64 %indvars.iv.next, %indvars.iv38 105 br i1 %exitcond, label %outer.inc, label %inner.body 106 107outer.inc: ; preds = %inner.body, %outer.body 108 %indvars.iv.next39 = add nuw nsw i64 %indvars.iv38, 1 109 %exitcond42 = icmp eq i64 %indvars.iv.next39, %wide.trip.count41 110 br i1 %exitcond42, label %for.end15, label %outer.body, !llvm.loop !6 111 112for.end15: ; preds = %outer.inc, %entry 113 ret void 114} 115 116; Case 3 (for (j = 0; j < M; j+=i)): Inner loop with divergent step. 117 118; CHECK-LABEL: iv_step 119; CHECK: LV: Not vectorizing: Outer loop contains divergent loops. 120; CHECK: LV: Not vectorizing: Unsupported outer loop. 121 122define void @iv_step(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr { 123entry: 124 %cmp33 = icmp sgt i32 %N, 0 125 br i1 %cmp33, label %outer.ph, label %for.end15 126 127outer.ph: ; preds = %entry 128 %cmp231 = icmp sgt i32 %M, 0 129 %0 = sext i32 %M to i64 130 %wide.trip.count = zext i32 %N to i64 131 br label %outer.body 132 133outer.body: ; preds = %for.inc14, %outer.ph 134 %indvars.iv39 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next40, %for.inc14 ] 135 br i1 %cmp231, label %inner.ph, label %for.inc14 136 137inner.ph: ; preds = %outer.body 138 %1 = mul nsw i64 %indvars.iv39, %0 139 br label %inner.body 140 141inner.body: ; preds = %inner.ph, %inner.body 142 %indvars.iv36 = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next37, %inner.body ] 143 %2 = add nsw i64 %indvars.iv36, %1 144 %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2 145 %3 = load i32, i32* %arrayidx, align 4, !tbaa !2 146 %mul8 = mul nsw i32 %3, %3 147 %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2 148 store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2 149 %indvars.iv.next37 = add nuw nsw i64 %indvars.iv36, %indvars.iv39 150 %cmp2 = icmp slt i64 %indvars.iv.next37, %0 151 br i1 %cmp2, label %inner.body, label %for.inc14 152 153for.inc14: ; preds = %inner.body, %outer.body 154 %indvars.iv.next40 = add nuw nsw i64 %indvars.iv39, 1 155 %exitcond = icmp eq i64 %indvars.iv.next40, %wide.trip.count 156 br i1 %exitcond, label %for.end15, label %outer.body, !llvm.loop !6 157 158for.end15: ; preds = %for.inc14, %entry 159 ret void 160} 161 162!llvm.module.flags = !{!0} 163!llvm.ident = !{!1} 164 165!0 = !{i32 1, !"wchar_size", i32 4} 166!1 = !{!"clang version 6.0.0"} 167!2 = !{!3, !3, i64 0} 168!3 = !{!"int", !4, i64 0} 169!4 = !{!"omnipotent char", !5, i64 0} 170!5 = !{!"Simple C/C++ TBAA"} 171!6 = distinct !{!6, !7, !8} 172!7 = !{!"llvm.loop.vectorize.width", i32 8} 173!8 = !{!"llvm.loop.vectorize.enable", i1 true} 174