1; REQUIRES: asserts
2; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=on        < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON
3; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=preferred < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_PREFERRED
4; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=off       < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_DISABLED
5; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -vectorizer-maximize-bandwidth -scalable-vectorization=preferred < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_PREFERRED_MAXBW
6
7; Test that the MaxVF for the following loop, that has no dependence distances,
8; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16
9; (maximized bandwidth for i8 in the loop).
10define void @test0(i32* %a, i8* %b, i32* %c) {
11; CHECK: LV: Checking a loop in "test0"
12; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
13; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4
14; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
15; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 16
16entry:
17  br label %loop
18
19loop:
20  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
21  %arrayidx = getelementptr inbounds i32, i32* %c, i64 %iv
22  %0 = load i32, i32* %arrayidx, align 4
23  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
24  %1 = load i8, i8* %arrayidx2, align 4
25  %zext = zext i8 %1 to i32
26  %add = add nsw i32 %zext, %0
27  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %iv
28  store i32 %add, i32* %arrayidx5, align 4
29  %iv.next = add nuw nsw i64 %iv, 1
30  %exitcond.not = icmp eq i64 %iv.next, 1024
31  br i1 %exitcond.not, label %exit, label %loop
32
33exit:
34  ret void
35}
36
37; Test that the MaxVF for the following loop, with a dependence distance
38; of 64 elements, is calculated as (maxvscale = 16) * 4.
39define void @test1(i32* %a, i8* %b) {
40; CHECK: LV: Checking a loop in "test1"
41; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
42; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4
43; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
44; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 4
45entry:
46  br label %loop
47
48loop:
49  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
50  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
51  %0 = load i32, i32* %arrayidx, align 4
52  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
53  %1 = load i8, i8* %arrayidx2, align 4
54  %zext = zext i8 %1 to i32
55  %add = add nsw i32 %zext, %0
56  %2 = add nuw nsw i64 %iv, 64
57  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
58  store i32 %add, i32* %arrayidx5, align 4
59  %iv.next = add nuw nsw i64 %iv, 1
60  %exitcond.not = icmp eq i64 %iv.next, 1024
61  br i1 %exitcond.not, label %exit, label %loop
62
63exit:
64  ret void
65}
66
67; Test that the MaxVF for the following loop, with a dependence distance
68; of 32 elements, is calculated as (maxvscale = 16) * 2.
69define void @test2(i32* %a, i8* %b) {
70; CHECK: LV: Checking a loop in "test2"
71; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2
72; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 2
73; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
74; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 2
75entry:
76  br label %loop
77
78loop:
79  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
80  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
81  %0 = load i32, i32* %arrayidx, align 4
82  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
83  %1 = load i8, i8* %arrayidx2, align 4
84  %zext = zext i8 %1 to i32
85  %add = add nsw i32 %zext, %0
86  %2 = add nuw nsw i64 %iv, 32
87  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
88  store i32 %add, i32* %arrayidx5, align 4
89  %iv.next = add nuw nsw i64 %iv, 1
90  %exitcond.not = icmp eq i64 %iv.next, 1024
91  br i1 %exitcond.not, label %exit, label %loop
92
93exit:
94  ret void
95}
96
97; Test that the MaxVF for the following loop, with a dependence distance
98; of 16 elements, is calculated as (maxvscale = 16) * 1.
99define void @test3(i32* %a, i8* %b) {
100; CHECK: LV: Checking a loop in "test3"
101; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1
102; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 1
103; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
104; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 1
105entry:
106  br label %loop
107
108loop:
109  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
110  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
111  %0 = load i32, i32* %arrayidx, align 4
112  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
113  %1 = load i8, i8* %arrayidx2, align 4
114  %zext = zext i8 %1 to i32
115  %add = add nsw i32 %zext, %0
116  %2 = add nuw nsw i64 %iv, 16
117  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
118  store i32 %add, i32* %arrayidx5, align 4
119  %iv.next = add nuw nsw i64 %iv, 1
120  %exitcond.not = icmp eq i64 %iv.next, 1024
121  br i1 %exitcond.not, label %exit, label %loop
122
123exit:
124  ret void
125}
126
127; Test the fallback mechanism when scalable vectors are not feasible due
128; to e.g. dependence distance.
129define void @test4(i32* %a, i32* %b) {
130; CHECK: LV: Checking a loop in "test4"
131; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
132; CHECK_SCALABLE_PREFERRED-NOT: LV: Found feasible scalable VF
133; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
134; CHECK_SCALABLE_PREFERRED_MAXBW-NOT: LV: Found feasible scalable VF
135entry:
136  br label %loop
137
138loop:
139  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
140  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
141  %0 = load i32, i32* %arrayidx, align 4
142  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
143  %1 = load i32, i32* %arrayidx2, align 4
144  %add = add nsw i32 %1, %0
145  %2 = add nuw nsw i64 %iv, 8
146  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
147  store i32 %add, i32* %arrayidx5, align 4
148  %iv.next = add nuw nsw i64 %iv, 1
149  %exitcond.not = icmp eq i64 %iv.next, 1024
150  br i1 %exitcond.not, label %exit, label %loop
151
152exit:
153  ret void
154}
155