1; RUN: opt < %s -passes=loop-vectorize -enable-vplan-native-path -pass-remarks-analysis=loop-vectorize -debug-only=loop-vectorize -S 2>&1 | FileCheck %s
2; REQUIRES: asserts
3
4; Verify that LV bails out on explicit vectorization outer loops that contain
5; divergent inner loops.
6
7; Root C/C++ source code for all the test cases
8; void foo(int *a, int *b, int N, int M)
9; {
10;   int i, j;
11; #pragma clang loop vectorize(enable) vectorize_width(8)
12;   for (i = 0; i < N; i++) {
13;     // Tested inner loop. It will be replaced per test.
14;     for (j = 0; j < M; j++) {
15;       a[i*M+j] = b[i*M+j] * b[i*M+j];
16;     }
17;   }
18; }
19
20; Case 1 (for (j = i; j < M; j++)): Inner loop with divergent IV start.
21
22; CHECK-LABEL: iv_start
23; CHECK: LV: Not vectorizing: Outer loop contains divergent loops.
24; CHECK: LV: Not vectorizing: Unsupported outer loop.
25
26target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
27
28define void @iv_start(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
29entry:
30  %cmp33 = icmp sgt i32 %N, 0
31  br i1 %cmp33, label %outer.ph, label %for.end15
32
33outer.ph:                                   ; preds = %entry
34  %0 = sext i32 %M to i64
35  %wide.trip.count = zext i32 %M to i64
36  %wide.trip.count41 = zext i32 %N to i64
37  br label %outer.body
38
39outer.body:                                 ; preds = %outer.inc, %outer.ph
40  %indvars.iv38 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next39, %outer.inc ]
41  %cmp231 = icmp slt i64 %indvars.iv38, %0
42  br i1 %cmp231, label %inner.ph, label %outer.inc
43
44inner.ph:                                   ; preds = %outer.body
45  %1 = mul nsw i64 %indvars.iv38, %0
46  br label %inner.body
47
48inner.body:                                 ; preds = %inner.body, %inner.ph
49  %indvars.iv35 = phi i64 [ %indvars.iv38, %inner.ph ], [ %indvars.iv.next36, %inner.body ]
50  %2 = add nsw i64 %indvars.iv35, %1
51  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
52  %3 = load i32, i32* %arrayidx, align 4, !tbaa !2
53  %mul8 = mul nsw i32 %3, %3
54  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
55  store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2
56  %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1
57  %exitcond = icmp eq i64 %indvars.iv.next36, %wide.trip.count
58  br i1 %exitcond, label %outer.inc, label %inner.body
59
60outer.inc:                                  ; preds = %inner.body, %outer.body
61  %indvars.iv.next39 = add nuw nsw i64 %indvars.iv38, 1
62  %exitcond42 = icmp eq i64 %indvars.iv.next39, %wide.trip.count41
63  br i1 %exitcond42, label %for.end15, label %outer.body, !llvm.loop !6
64
65for.end15:                                  ; preds = %outer.inc, %entry
66  ret void
67}
68
69
70; Case 2 (for (j = 0; j < i; j++)): Inner loop with divergent upper-bound.
71
72; CHECK-LABEL: loop_ub
73; CHECK: LV: Not vectorizing: Outer loop contains divergent loops.
74; CHECK: LV: Not vectorizing: Unsupported outer loop.
75
76define void @loop_ub(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
77entry:
78  %cmp32 = icmp sgt i32 %N, 0
79  br i1 %cmp32, label %outer.ph, label %for.end15
80
81outer.ph:                                   ; preds = %entry
82  %0 = sext i32 %M to i64
83  %wide.trip.count41 = zext i32 %N to i64
84  br label %outer.body
85
86outer.body:                                 ; preds = %outer.inc, %outer.ph
87  %indvars.iv38 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next39, %outer.inc ]
88  %cmp230 = icmp eq i64 %indvars.iv38, 0
89  br i1 %cmp230, label %outer.inc, label %inner.ph
90
91inner.ph:                                   ; preds = %outer.body
92  %1 = mul nsw i64 %indvars.iv38, %0
93  br label %inner.body
94
95inner.body:                                 ; preds = %inner.body, %inner.ph
96  %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ]
97  %2 = add nsw i64 %indvars.iv, %1
98  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
99  %3 = load i32, i32* %arrayidx, align 4, !tbaa !2
100  %mul8 = mul nsw i32 %3, %3
101  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
102  store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2
103  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
104  %exitcond = icmp eq i64 %indvars.iv.next, %indvars.iv38
105  br i1 %exitcond, label %outer.inc, label %inner.body
106
107outer.inc:                                  ; preds = %inner.body, %outer.body
108  %indvars.iv.next39 = add nuw nsw i64 %indvars.iv38, 1
109  %exitcond42 = icmp eq i64 %indvars.iv.next39, %wide.trip.count41
110  br i1 %exitcond42, label %for.end15, label %outer.body, !llvm.loop !6
111
112for.end15:                                  ; preds = %outer.inc, %entry
113  ret void
114}
115
116; Case 3 (for (j = 0; j < M; j+=i)): Inner loop with divergent step.
117
118; CHECK-LABEL: iv_step
119; CHECK: LV: Not vectorizing: Outer loop contains divergent loops.
120; CHECK: LV: Not vectorizing: Unsupported outer loop.
121
122define void @iv_step(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
123entry:
124  %cmp33 = icmp sgt i32 %N, 0
125  br i1 %cmp33, label %outer.ph, label %for.end15
126
127outer.ph:                                   ; preds = %entry
128  %cmp231 = icmp sgt i32 %M, 0
129  %0 = sext i32 %M to i64
130  %wide.trip.count = zext i32 %N to i64
131  br label %outer.body
132
133outer.body:                                 ; preds = %for.inc14, %outer.ph
134  %indvars.iv39 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next40, %for.inc14 ]
135  br i1 %cmp231, label %inner.ph, label %for.inc14
136
137inner.ph:                                   ; preds = %outer.body
138  %1 = mul nsw i64 %indvars.iv39, %0
139  br label %inner.body
140
141inner.body:                                 ; preds = %inner.ph, %inner.body
142  %indvars.iv36 = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next37, %inner.body ]
143  %2 = add nsw i64 %indvars.iv36, %1
144  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
145  %3 = load i32, i32* %arrayidx, align 4, !tbaa !2
146  %mul8 = mul nsw i32 %3, %3
147  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
148  store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2
149  %indvars.iv.next37 = add nuw nsw i64 %indvars.iv36, %indvars.iv39
150  %cmp2 = icmp slt i64 %indvars.iv.next37, %0
151  br i1 %cmp2, label %inner.body, label %for.inc14
152
153for.inc14:                                 ; preds = %inner.body, %outer.body
154  %indvars.iv.next40 = add nuw nsw i64 %indvars.iv39, 1
155  %exitcond = icmp eq i64 %indvars.iv.next40, %wide.trip.count
156  br i1 %exitcond, label %for.end15, label %outer.body, !llvm.loop !6
157
158for.end15:                                 ; preds = %for.inc14, %entry
159  ret void
160}
161
162!llvm.module.flags = !{!0}
163!llvm.ident = !{!1}
164
165!0 = !{i32 1, !"wchar_size", i32 4}
166!1 = !{!"clang version 6.0.0"}
167!2 = !{!3, !3, i64 0}
168!3 = !{!"int", !4, i64 0}
169!4 = !{!"omnipotent char", !5, i64 0}
170!5 = !{!"Simple C/C++ TBAA"}
171!6 = distinct !{!6, !7, !8}
172!7 = !{!"llvm.loop.vectorize.width", i32 8}
173!8 = !{!"llvm.loop.vectorize.enable", i1 true}
174