1*2e14900dSBjorn Pettersson; RUN: opt < %s -passes=loop-vectorize -enable-vplan-native-path -debug-only=loop-vectorize -S 2>&1 | FileCheck %s
2cee313d2SEric Christopher; REQUIRES: asserts
3cee313d2SEric Christopher
4cee313d2SEric Christopher; Verify that outer loops annotated only with the expected explicit
5cee313d2SEric Christopher; vectorization hints are collected for vectorization instead of inner loops.
6cee313d2SEric Christopher
7cee313d2SEric Christopher; Root C/C++ source code for all the test cases
8cee313d2SEric Christopher; void foo(int *a, int *b, int N, int M)
9cee313d2SEric Christopher; {
10cee313d2SEric Christopher;   int i, j;
11cee313d2SEric Christopher; #pragma clang loop vectorize(enable)
12cee313d2SEric Christopher;   for (i = 0; i < N; i++) {
13cee313d2SEric Christopher;     for (j = 0; j < M; j++) {
14cee313d2SEric Christopher;       a[i*M+j] = b[i*M+j] * b[i*M+j];
15cee313d2SEric Christopher;     }
16cee313d2SEric Christopher;   }
17cee313d2SEric Christopher; }
18cee313d2SEric Christopher
19cee313d2SEric Christopher; Case 1: Annotated outer loop WITH vector width information must be collected.
20cee313d2SEric Christopher
21cee313d2SEric Christopher; CHECK-LABEL: vector_width
22ddb3b26aSBardia Mahjour; CHECK: LV: Loop hints: force=enabled width=4 interleave=0
23cee313d2SEric Christopher; CHECK: LV: We can vectorize this outer loop!
24cee313d2SEric Christopher; CHECK: LV: Using user VF 4 to build VPlans.
25cee313d2SEric Christopher; CHECK-NOT: LV: Loop hints: force=?
26cee313d2SEric Christopher; CHECK-NOT: LV: Found a loop: inner.body
27cee313d2SEric Christopher
28cee313d2SEric Christophertarget datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
29cee313d2SEric Christopher
30cee313d2SEric Christopherdefine void @vector_width(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
31cee313d2SEric Christopherentry:
32cee313d2SEric Christopher  %cmp32 = icmp sgt i32 %N, 0
33cee313d2SEric Christopher  br i1 %cmp32, label %outer.ph, label %for.end15
34cee313d2SEric Christopher
35cee313d2SEric Christopherouter.ph:                                   ; preds = %entry
36cee313d2SEric Christopher  %cmp230 = icmp sgt i32 %M, 0
37cee313d2SEric Christopher  %0 = sext i32 %M to i64
38cee313d2SEric Christopher  %wide.trip.count = zext i32 %M to i64
39cee313d2SEric Christopher  %wide.trip.count38 = zext i32 %N to i64
40cee313d2SEric Christopher  br label %outer.body
41cee313d2SEric Christopher
42cee313d2SEric Christopherouter.body:                                 ; preds = %outer.inc, %outer.ph
43cee313d2SEric Christopher  %indvars.iv35 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next36, %outer.inc ]
44cee313d2SEric Christopher  br i1 %cmp230, label %inner.ph, label %outer.inc
45cee313d2SEric Christopher
46cee313d2SEric Christopherinner.ph:                                   ; preds = %outer.body
47cee313d2SEric Christopher  %1 = mul nsw i64 %indvars.iv35, %0
48cee313d2SEric Christopher  br label %inner.body
49cee313d2SEric Christopher
50cee313d2SEric Christopherinner.body:                                 ; preds = %inner.body, %inner.ph
51cee313d2SEric Christopher  %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ]
52cee313d2SEric Christopher  %2 = add nsw i64 %indvars.iv, %1
53cee313d2SEric Christopher  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
54cee313d2SEric Christopher  %3 = load i32, i32* %arrayidx, align 4, !tbaa !2
55cee313d2SEric Christopher  %mul8 = mul nsw i32 %3, %3
56cee313d2SEric Christopher  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
57cee313d2SEric Christopher  store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2
58cee313d2SEric Christopher  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
59cee313d2SEric Christopher  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
60cee313d2SEric Christopher  br i1 %exitcond, label %outer.inc, label %inner.body
61cee313d2SEric Christopher
62cee313d2SEric Christopherouter.inc:                                        ; preds = %inner.body, %outer.body
63cee313d2SEric Christopher  %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1
64cee313d2SEric Christopher  %exitcond39 = icmp eq i64 %indvars.iv.next36, %wide.trip.count38
65cee313d2SEric Christopher  br i1 %exitcond39, label %for.end15, label %outer.body, !llvm.loop !6
66cee313d2SEric Christopher
67cee313d2SEric Christopherfor.end15:                                        ; preds = %outer.inc, %entry
68cee313d2SEric Christopher  ret void
69cee313d2SEric Christopher}
70cee313d2SEric Christopher
71cee313d2SEric Christopher; Case 2: Annotated outer loop WITHOUT vector width information must be collected.
72cee313d2SEric Christopher
73cee313d2SEric Christopher; CHECK-LABEL: case2
74ddb3b26aSBardia Mahjour; CHECK: LV: Loop hints: force=enabled width=0 interleave=0
75cee313d2SEric Christopher; CHECK: LV: We can vectorize this outer loop!
76cee313d2SEric Christopher; CHECK: LV: Using VF 1 to build VPlans.
77cee313d2SEric Christopher
78cee313d2SEric Christopherdefine void @case2(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
79cee313d2SEric Christopherentry:
80cee313d2SEric Christopher  %cmp32 = icmp sgt i32 %N, 0
81cee313d2SEric Christopher  br i1 %cmp32, label %outer.ph, label %for.end15
82cee313d2SEric Christopher
83cee313d2SEric Christopherouter.ph:                                          ; preds = %entry
84cee313d2SEric Christopher  %cmp230 = icmp sgt i32 %M, 0
85cee313d2SEric Christopher  %0 = sext i32 %M to i64
86cee313d2SEric Christopher  %wide.trip.count = zext i32 %M to i64
87cee313d2SEric Christopher  %wide.trip.count38 = zext i32 %N to i64
88cee313d2SEric Christopher  br label %outer.body
89cee313d2SEric Christopher
90cee313d2SEric Christopherouter.body:                                        ; preds = %outer.inc, %outer.ph
91cee313d2SEric Christopher  %indvars.iv35 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next36, %outer.inc ]
92cee313d2SEric Christopher  br i1 %cmp230, label %inner.ph, label %outer.inc
93cee313d2SEric Christopher
94cee313d2SEric Christopherinner.ph:                                  ; preds = %outer.body
95cee313d2SEric Christopher  %1 = mul nsw i64 %indvars.iv35, %0
96cee313d2SEric Christopher  br label %inner.body
97cee313d2SEric Christopher
98cee313d2SEric Christopherinner.body:                                        ; preds = %inner.body, %inner.ph
99cee313d2SEric Christopher  %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ]
100cee313d2SEric Christopher  %2 = add nsw i64 %indvars.iv, %1
101cee313d2SEric Christopher  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
102cee313d2SEric Christopher  %3 = load i32, i32* %arrayidx, align 4, !tbaa !2
103cee313d2SEric Christopher  %mul8 = mul nsw i32 %3, %3
104cee313d2SEric Christopher  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
105cee313d2SEric Christopher  store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2
106cee313d2SEric Christopher  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
107cee313d2SEric Christopher  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
108cee313d2SEric Christopher  br i1 %exitcond, label %outer.inc, label %inner.body
109cee313d2SEric Christopher
110cee313d2SEric Christopherouter.inc:                                        ; preds = %inner.body, %outer.body
111cee313d2SEric Christopher  %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1
112cee313d2SEric Christopher  %exitcond39 = icmp eq i64 %indvars.iv.next36, %wide.trip.count38
113cee313d2SEric Christopher  br i1 %exitcond39, label %for.end15, label %outer.body, !llvm.loop !9
114cee313d2SEric Christopher
115cee313d2SEric Christopherfor.end15:                                        ; preds = %outer.inc, %entry
116cee313d2SEric Christopher  ret void
117cee313d2SEric Christopher}
118cee313d2SEric Christopher
119cee313d2SEric Christopher; Case 3: Annotated outer loop WITH vector width and interleave information
120cee313d2SEric Christopher; doesn't have to be collected.
121cee313d2SEric Christopher
122cee313d2SEric Christopher; CHECK-LABEL: case3
123cee313d2SEric Christopher; CHECK-NOT: LV: Loop hints: force=enabled
124cee313d2SEric Christopher; CHECK-NOT: LV: We can vectorize this outer loop!
125cee313d2SEric Christopher; CHECK: LV: Loop hints: force=?
126cee313d2SEric Christopher; CHECK: LV: Found a loop: inner.body
127cee313d2SEric Christopher
128cee313d2SEric Christopherdefine void @case3(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
129cee313d2SEric Christopherentry:
130cee313d2SEric Christopher  %cmp32 = icmp sgt i32 %N, 0
131cee313d2SEric Christopher  br i1 %cmp32, label %outer.ph, label %for.end15
132cee313d2SEric Christopher
133cee313d2SEric Christopherouter.ph:                                         ; preds = %entry
134cee313d2SEric Christopher  %cmp230 = icmp sgt i32 %M, 0
135cee313d2SEric Christopher  %0 = sext i32 %M to i64
136cee313d2SEric Christopher  %wide.trip.count = zext i32 %M to i64
137cee313d2SEric Christopher  %wide.trip.count38 = zext i32 %N to i64
138cee313d2SEric Christopher  br label %outer.body
139cee313d2SEric Christopher
140cee313d2SEric Christopherouter.body:                                       ; preds = %outer.inc, %outer.ph
141cee313d2SEric Christopher  %indvars.iv35 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next36, %outer.inc ]
142cee313d2SEric Christopher  br i1 %cmp230, label %inner.ph, label %outer.inc
143cee313d2SEric Christopher
144cee313d2SEric Christopherinner.ph:                                         ; preds = %outer.body
145cee313d2SEric Christopher  %1 = mul nsw i64 %indvars.iv35, %0
146cee313d2SEric Christopher  br label %inner.body
147cee313d2SEric Christopher
148cee313d2SEric Christopherinner.body:                                       ; preds = %inner.body, %inner.ph
149cee313d2SEric Christopher  %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ]
150cee313d2SEric Christopher  %2 = add nsw i64 %indvars.iv, %1
151cee313d2SEric Christopher  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
152cee313d2SEric Christopher  %3 = load i32, i32* %arrayidx, align 4, !tbaa !2
153cee313d2SEric Christopher  %mul8 = mul nsw i32 %3, %3
154cee313d2SEric Christopher  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
155cee313d2SEric Christopher  store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2
156cee313d2SEric Christopher  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
157cee313d2SEric Christopher  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
158cee313d2SEric Christopher  br i1 %exitcond, label %outer.inc, label %inner.body
159cee313d2SEric Christopher
160cee313d2SEric Christopherouter.inc:                                        ; preds = %inner.body, %outer.body
161cee313d2SEric Christopher  %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1
162cee313d2SEric Christopher  %exitcond39 = icmp eq i64 %indvars.iv.next36, %wide.trip.count38
163cee313d2SEric Christopher  br i1 %exitcond39, label %for.end15, label %outer.body, !llvm.loop !11
164cee313d2SEric Christopher
165cee313d2SEric Christopherfor.end15:                                        ; preds = %outer.inc, %entry
166cee313d2SEric Christopher  ret void
167cee313d2SEric Christopher}
168cee313d2SEric Christopher
169cee313d2SEric Christopher; Case 4: Outer loop without any explicit vectorization annotation doesn't have
170cee313d2SEric Christopher; to be collected.
171cee313d2SEric Christopher
172cee313d2SEric Christopher; CHECK-LABEL: case4
173cee313d2SEric Christopher; CHECK-NOT: LV: Loop hints: force=enabled
174cee313d2SEric Christopher; CHECK-NOT: LV: We can vectorize this outer loop!
175cee313d2SEric Christopher; CHECK: LV: Loop hints: force=?
176cee313d2SEric Christopher; CHECK: LV: Found a loop: inner.body
177cee313d2SEric Christopher
178cee313d2SEric Christopherdefine void @case4(i32* nocapture %a, i32* nocapture readonly %b, i32 %N, i32 %M) local_unnamed_addr {
179cee313d2SEric Christopherentry:
180cee313d2SEric Christopher  %cmp32 = icmp sgt i32 %N, 0
181cee313d2SEric Christopher  br i1 %cmp32, label %outer.ph, label %for.end15
182cee313d2SEric Christopher
183cee313d2SEric Christopherouter.ph:                                         ; preds = %entry
184cee313d2SEric Christopher  %cmp230 = icmp sgt i32 %M, 0
185cee313d2SEric Christopher  %0 = sext i32 %M to i64
186cee313d2SEric Christopher  %wide.trip.count = zext i32 %M to i64
187cee313d2SEric Christopher  %wide.trip.count38 = zext i32 %N to i64
188cee313d2SEric Christopher  br label %outer.body
189cee313d2SEric Christopher
190cee313d2SEric Christopherouter.body:                                       ; preds = %outer.inc, %outer.ph
191cee313d2SEric Christopher  %indvars.iv35 = phi i64 [ 0, %outer.ph ], [ %indvars.iv.next36, %outer.inc ]
192cee313d2SEric Christopher  br i1 %cmp230, label %inner.ph, label %outer.inc
193cee313d2SEric Christopher
194cee313d2SEric Christopherinner.ph:                                  ; preds = %outer.body
195cee313d2SEric Christopher  %1 = mul nsw i64 %indvars.iv35, %0
196cee313d2SEric Christopher  br label %inner.body
197cee313d2SEric Christopher
198cee313d2SEric Christopherinner.body:                                        ; preds = %inner.body, %inner.ph
199cee313d2SEric Christopher  %indvars.iv = phi i64 [ 0, %inner.ph ], [ %indvars.iv.next, %inner.body ]
200cee313d2SEric Christopher  %2 = add nsw i64 %indvars.iv, %1
201cee313d2SEric Christopher  %arrayidx = getelementptr inbounds i32, i32* %b, i64 %2
202cee313d2SEric Christopher  %3 = load i32, i32* %arrayidx, align 4, !tbaa !2
203cee313d2SEric Christopher  %mul8 = mul nsw i32 %3, %3
204cee313d2SEric Christopher  %arrayidx12 = getelementptr inbounds i32, i32* %a, i64 %2
205cee313d2SEric Christopher  store i32 %mul8, i32* %arrayidx12, align 4, !tbaa !2
206cee313d2SEric Christopher  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
207cee313d2SEric Christopher  %exitcond = icmp eq i64 %indvars.iv.next, %wide.trip.count
208cee313d2SEric Christopher  br i1 %exitcond, label %outer.inc, label %inner.body
209cee313d2SEric Christopher
210cee313d2SEric Christopherouter.inc:                                        ; preds = %inner.body, %outer.body
211cee313d2SEric Christopher  %indvars.iv.next36 = add nuw nsw i64 %indvars.iv35, 1
212cee313d2SEric Christopher  %exitcond39 = icmp eq i64 %indvars.iv.next36, %wide.trip.count38
213cee313d2SEric Christopher  br i1 %exitcond39, label %for.end15, label %outer.body
214cee313d2SEric Christopher
215cee313d2SEric Christopherfor.end15:                                        ; preds = %outer.inc, %entry
216cee313d2SEric Christopher  ret void
217cee313d2SEric Christopher}
218cee313d2SEric Christopher
219cee313d2SEric Christopher!llvm.module.flags = !{!0}
220cee313d2SEric Christopher!llvm.ident = !{!1}
221cee313d2SEric Christopher
222cee313d2SEric Christopher!0 = !{i32 1, !"wchar_size", i32 4}
223cee313d2SEric Christopher!1 = !{!"clang version 6.0.0"}
224cee313d2SEric Christopher!2 = !{!3, !3, i64 0}
225cee313d2SEric Christopher!3 = !{!"int", !4, i64 0}
226cee313d2SEric Christopher!4 = !{!"omnipotent char", !5, i64 0}
227cee313d2SEric Christopher!5 = !{!"Simple C/C++ TBAA"}
228cee313d2SEric Christopher; Case 1
229cee313d2SEric Christopher!6 = distinct !{!6, !7, !8}
230cee313d2SEric Christopher!7 = !{!"llvm.loop.vectorize.width", i32 4}
231cee313d2SEric Christopher!8 = !{!"llvm.loop.vectorize.enable", i1 true}
232cee313d2SEric Christopher; Case 2
233cee313d2SEric Christopher!9 = distinct !{!9, !8}
234cee313d2SEric Christopher; Case 3
235cee313d2SEric Christopher!10 = !{!"llvm.loop.interleave.count", i32 2}
236cee313d2SEric Christopher!11 = distinct !{!11, !7, !10, !8}
237