1; REQUIRES: asserts
2; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=on        < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON
3; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=preferred < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_PREFERRED
4; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=off       < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_DISABLED
5; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -vectorizer-maximize-bandwidth -scalable-vectorization=preferred < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_PREFERRED_MAXBW
6
7; Test that the MaxVF for the following loop, that has no dependence distances,
8; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16
9; (maximized bandwidth for i8 in the loop).
10define void @test0(i32* %a, i8* %b, i32* %c) #0 {
11; CHECK: LV: Checking a loop in "test0"
12; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
13; CHECK_SCALABLE_ON: LV: Selecting VF: 4
14; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4
15; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: vscale x 4
16; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
17; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
18; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 16
19; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Selecting VF: vscale x 16
20entry:
21  br label %loop
22
23loop:
24  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
25  %arrayidx = getelementptr inbounds i32, i32* %c, i64 %iv
26  %0 = load i32, i32* %arrayidx, align 4
27  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
28  %1 = load i8, i8* %arrayidx2, align 4
29  %zext = zext i8 %1 to i32
30  %add = add nsw i32 %zext, %0
31  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %iv
32  store i32 %add, i32* %arrayidx5, align 4
33  %iv.next = add nuw nsw i64 %iv, 1
34  %exitcond.not = icmp eq i64 %iv.next, 1024
35  br i1 %exitcond.not, label %exit, label %loop
36
37exit:
38  ret void
39}
40
41; Test that the MaxVF for the following loop, with a dependence distance
42; of 64 elements, is calculated as (maxvscale = 16) * 4.
43define void @test1(i32* %a, i8* %b) #0 {
44; CHECK: LV: Checking a loop in "test1"
45; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4
46; CHECK_SCALABLE_ON: LV: Selecting VF: 4
47; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4
48; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: vscale x 4
49; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
50; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
51; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 4
52; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Selecting VF: 16
53entry:
54  br label %loop
55
56loop:
57  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
58  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
59  %0 = load i32, i32* %arrayidx, align 4
60  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
61  %1 = load i8, i8* %arrayidx2, align 4
62  %zext = zext i8 %1 to i32
63  %add = add nsw i32 %zext, %0
64  %2 = add nuw nsw i64 %iv, 64
65  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
66  store i32 %add, i32* %arrayidx5, align 4
67  %iv.next = add nuw nsw i64 %iv, 1
68  %exitcond.not = icmp eq i64 %iv.next, 1024
69  br i1 %exitcond.not, label %exit, label %loop
70
71exit:
72  ret void
73}
74
75; Test that the MaxVF for the following loop, with a dependence distance
76; of 32 elements, is calculated as (maxvscale = 16) * 2.
77define void @test2(i32* %a, i8* %b) #0 {
78; CHECK: LV: Checking a loop in "test2"
79; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2
80; CHECK_SCALABLE_ON: LV: Selecting VF: 4
81; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 2
82; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: 4
83; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
84; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
85; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 2
86; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Selecting VF: 16
87entry:
88  br label %loop
89
90loop:
91  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
92  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
93  %0 = load i32, i32* %arrayidx, align 4
94  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
95  %1 = load i8, i8* %arrayidx2, align 4
96  %zext = zext i8 %1 to i32
97  %add = add nsw i32 %zext, %0
98  %2 = add nuw nsw i64 %iv, 32
99  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
100  store i32 %add, i32* %arrayidx5, align 4
101  %iv.next = add nuw nsw i64 %iv, 1
102  %exitcond.not = icmp eq i64 %iv.next, 1024
103  br i1 %exitcond.not, label %exit, label %loop
104
105exit:
106  ret void
107}
108
109; Test that the MaxVF for the following loop, with a dependence distance
110; of 16 elements, is calculated as (maxvscale = 16) * 1.
111define void @test3(i32* %a, i8* %b) #0 {
112; CHECK: LV: Checking a loop in "test3"
113; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1
114; CHECK_SCALABLE_ON: LV: Selecting VF: 4
115; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 1
116; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: 4
117; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
118; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
119; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 1
120; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Selecting VF: 16
121entry:
122  br label %loop
123
124loop:
125  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
126  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
127  %0 = load i32, i32* %arrayidx, align 4
128  %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv
129  %1 = load i8, i8* %arrayidx2, align 4
130  %zext = zext i8 %1 to i32
131  %add = add nsw i32 %zext, %0
132  %2 = add nuw nsw i64 %iv, 16
133  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
134  store i32 %add, i32* %arrayidx5, align 4
135  %iv.next = add nuw nsw i64 %iv, 1
136  %exitcond.not = icmp eq i64 %iv.next, 1024
137  br i1 %exitcond.not, label %exit, label %loop
138
139exit:
140  ret void
141}
142
143; Test the fallback mechanism when scalable vectors are not feasible due
144; to e.g. dependence distance.
145define void @test4(i32* %a, i32* %b) #0 {
146; CHECK: LV: Checking a loop in "test4"
147; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF
148; CHECK_SCALABLE_ON: LV: Selecting VF: 4
149; CHECK_SCALABLE_PREFERRED-NOT: LV: Found feasible scalable VF
150; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: 4
151; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF
152; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4
153; CHECK_SCALABLE_PREFERRED_MAXBW-NOT: LV: Found feasible scalable VF
154; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Selecting VF: 4
155entry:
156  br label %loop
157
158loop:
159  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
160  %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv
161  %0 = load i32, i32* %arrayidx, align 4
162  %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv
163  %1 = load i32, i32* %arrayidx2, align 4
164  %add = add nsw i32 %1, %0
165  %2 = add nuw nsw i64 %iv, 8
166  %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2
167  store i32 %add, i32* %arrayidx5, align 4
168  %iv.next = add nuw nsw i64 %iv, 1
169  %exitcond.not = icmp eq i64 %iv.next, 1024
170  br i1 %exitcond.not, label %exit, label %loop
171
172exit:
173  ret void
174}
175
176attributes #0 = { vscale_range(0, 16) }
177