1; RUN: opt < %s -loop-vectorize -enable-vplan-native-path -debug-only=loop-vectorize -S 2>&1 | FileCheck %s
2; REQUIRES: asserts
3
4; Verify that outer loops annotated only with the expected explicit
5; vectorization hints are collected for vectorization instead of inner loops.
6
7; Root C/C++ source code for all the test cases
8; void foo(int *a, int *b, int N, int M)
9; {
10;   int i, j;
11; #pragma clang loop vectorize(enable)
12;   for (i = 0; i < N; i++) {
13;     for (j = 0; j < M; j++) {
14;       a[i*M+j] = b[i*M+j] * b[i*M+j];
15;     }
16;   }
17; }
18
19; Case 1: Annotated outer loop WITH vector width information must be collected.
20
21; CHECK-LABEL: vector_width
22; CHECK: LV: Loop hints: force=enabled width=4 unroll=0
23; CHECK: LV: We can vectorize this outer loop!
24; CHECK: LV: Using user VF 4.
25; CHECK-NOT: LV: Loop hints: force=?
26; CHECK-NOT: LV: Found a loop: inner.body
27
28target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
29
30define void @vector_width(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
31entry:
32  %cmp32 = icmp sgt i32 %N, 0
33  br i1 %cmp32, label %outer.ph, label %for.end15
34
35outer.ph:                                   ; preds = %entry
36  %cmp230 = icmp sgt i32 %M, 0
37  %0 = sext i32 %M to i64
38  %wide.trip.count = zext i32 %M to i64
39  %wide.trip.count38 = zext i32 %N to i64
40  br label %outer.body
41
42outer.body:                                 ; preds = %outer.inc, %outer.ph
43  %indvars.iv35 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next36, %outer.inc ]
44  br i1 %cmp230, label %inner.ph, label %outer.inc
45
46inner.ph:                                   ; preds = %outer.body
47  %1 = mul nsw i64 %indvars.iv35, %0
48  br label %inner.body
49
50inner.body:                                 ; preds = %inner.body, %inner.ph
51  %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ]
52  %2 = add nsw i64 %indvars.iv, %1
53  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
54  %3 = load i32, i32* %arrayidx, align 4, !tbaa !2
55  %mul8 = mul nsw i32 %3, %3
56  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
57  store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2
58  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
59  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
60  br i1 %exitcond, label %outer.inc, label %inner.body
61
62outer.inc:                                        ; preds = %inner.body, %outer.body
63  %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1
64  %exitcond39 = icmp eq i64 %indvars.iv.next36, %wide.trip.count38
65  br i1 %exitcond39, label %for.end15, label %outer.body, !llvm.loop !6
66
67for.end15:                                        ; preds = %outer.inc, %entry
68  ret void
69}
70
71; Case 2: Annotated outer loop WITHOUT vector width information doesn't have to
72; be collected.
73
74; CHECK-LABEL: case2
75; CHECK-NOT: LV: Loop hints: force=enabled
76; CHECK-NOT: LV: We can vectorize this outer loop!
77; CHECK: LV: Loop hints: force=?
78; CHECK: LV: Found a loop: inner.body
79
80define void @case2(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
81entry:
82  %cmp32 = icmp sgt i32 %N, 0
83  br i1 %cmp32, label %outer.ph, label %for.end15
84
85outer.ph:                                          ; preds = %entry
86  %cmp230 = icmp sgt i32 %M, 0
87  %0 = sext i32 %M to i64
88  %wide.trip.count = zext i32 %M to i64
89  %wide.trip.count38 = zext i32 %N to i64
90  br label %outer.body
91
92outer.body:                                        ; preds = %outer.inc, %outer.ph
93  %indvars.iv35 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next36, %outer.inc ]
94  br i1 %cmp230, label %inner.ph, label %outer.inc
95
96inner.ph:                                  ; preds = %outer.body
97  %1 = mul nsw i64 %indvars.iv35, %0
98  br label %inner.body
99
100inner.body:                                        ; preds = %inner.body, %inner.ph
101  %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ]
102  %2 = add nsw i64 %indvars.iv, %1
103  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
104  %3 = load i32, i32* %arrayidx, align 4, !tbaa !2
105  %mul8 = mul nsw i32 %3, %3
106  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
107  store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2
108  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
109  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
110  br i1 %exitcond, label %outer.inc, label %inner.body
111
112outer.inc:                                        ; preds = %inner.body, %outer.body
113  %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1
114  %exitcond39 = icmp eq i64 %indvars.iv.next36, %wide.trip.count38
115  br i1 %exitcond39, label %for.end15, label %outer.body, !llvm.loop !9
116
117for.end15:                                        ; preds = %outer.inc, %entry
118  ret void
119}
120
121; Case 3: Annotated outer loop WITH vector width and interleave information
122; doesn't have to be collected.
123
124; CHECK-LABEL: case3
125; CHECK-NOT: LV: Loop hints: force=enabled
126; CHECK-NOT: LV: We can vectorize this outer loop!
127; CHECK: LV: Loop hints: force=?
128; CHECK: LV: Found a loop: inner.body
129
130define void @case3(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
131entry:
132  %cmp32 = icmp sgt i32 %N, 0
133  br i1 %cmp32, label %outer.ph, label %for.end15
134
135outer.ph:                                         ; preds = %entry
136  %cmp230 = icmp sgt i32 %M, 0
137  %0 = sext i32 %M to i64
138  %wide.trip.count = zext i32 %M to i64
139  %wide.trip.count38 = zext i32 %N to i64
140  br label %outer.body
141
142outer.body:                                       ; preds = %outer.inc, %outer.ph
143  %indvars.iv35 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next36, %outer.inc ]
144  br i1 %cmp230, label %inner.ph, label %outer.inc
145
146inner.ph:                                         ; preds = %outer.body
147  %1 = mul nsw i64 %indvars.iv35, %0
148  br label %inner.body
149
150inner.body:                                       ; preds = %inner.body, %inner.ph
151  %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ]
152  %2 = add nsw i64 %indvars.iv, %1
153  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
154  %3 = load i32, i32* %arrayidx, align 4, !tbaa !2
155  %mul8 = mul nsw i32 %3, %3
156  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
157  store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2
158  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
159  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
160  br i1 %exitcond, label %outer.inc, label %inner.body
161
162outer.inc:                                        ; preds = %inner.body, %outer.body
163  %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1
164  %exitcond39 = icmp eq i64 %indvars.iv.next36, %wide.trip.count38
165  br i1 %exitcond39, label %for.end15, label %outer.body, !llvm.loop !11
166
167for.end15:                                        ; preds = %outer.inc, %entry
168  ret void
169}
170
171; Case 4: Outer loop without any explicit vectorization annotation doesn't have
172; to be collected.
173
174; CHECK-LABEL: case4
175; CHECK-NOT: LV: Loop hints: force=enabled
176; CHECK-NOT: LV: We can vectorize this outer loop!
177; CHECK: LV: Loop hints: force=?
178; CHECK: LV: Found a loop: inner.body
179
180define void @case4(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
181entry:
182  %cmp32 = icmp sgt i32 %N, 0
183  br i1 %cmp32, label %outer.ph, label %for.end15
184
185outer.ph:                                         ; preds = %entry
186  %cmp230 = icmp sgt i32 %M, 0
187  %0 = sext i32 %M to i64
188  %wide.trip.count = zext i32 %M to i64
189  %wide.trip.count38 = zext i32 %N to i64
190  br label %outer.body
191
192outer.body:                                       ; preds = %outer.inc, %outer.ph
193  %indvars.iv35 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next36, %outer.inc ]
194  br i1 %cmp230, label %inner.ph, label %outer.inc
195
196inner.ph:                                  ; preds = %outer.body
197  %1 = mul nsw i64 %indvars.iv35, %0
198  br label %inner.body
199
200inner.body:                                        ; preds = %inner.body, %inner.ph
201  %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ]
202  %2 = add nsw i64 %indvars.iv, %1
203  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
204  %3 = load i32, i32* %arrayidx, align 4, !tbaa !2
205  %mul8 = mul nsw i32 %3, %3
206  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
207  store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2
208  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
209  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
210  br i1 %exitcond, label %outer.inc, label %inner.body
211
212outer.inc:                                        ; preds = %inner.body, %outer.body
213  %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1
214  %exitcond39 = icmp eq i64 %indvars.iv.next36, %wide.trip.count38
215  br i1 %exitcond39, label %for.end15, label %outer.body
216
217for.end15:                                        ; preds = %outer.inc, %entry
218  ret void
219}
220
221!llvm.module.flags = !{!0}
222!llvm.ident = !{!1}
223
224!0 = !{i32 1, !"wchar_size", i32 4}
225!1 = !{!"clang version 6.0.0"}
226!2 = !{!3, !3, i64 0}
227!3 = !{!"int", !4, i64 0}
228!4 = !{!"omnipotent char", !5, i64 0}
229!5 = !{!"Simple C/C++ TBAA"}
230; Case 1
231!6 = distinct !{!6, !7, !8}
232!7 = !{!"llvm.loop.vectorize.width", i32 4}
233!8 = !{!"llvm.loop.vectorize.enable", i1 true}
234; Case 2
235!9 = distinct !{!9, !8}
236; Case 3
237!10 = !{!"llvm.loop.interleave.count", i32 2}
238!11 = distinct !{!11, !7, !10, !8}
239