1; REQUIRES: asserts 2; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON 3; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=preferred < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_PREFERRED 4; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=off < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_DISABLED 5; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -vectorizer-maximize-bandwidth -scalable-vectorization=preferred < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_PREFERRED_MAXBW 6 7; Test that the MaxVF for the following loop, that has no dependence distances, 8; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16 9; (maximized bandwidth for i8 in the loop). 10define void @test0(i32* %a, i8* %b, i32* %c) { 11; CHECK: LV: Checking a loop in "test0" 12; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4 13; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4 14; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF 15; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 16 16entry: 17 br label %loop 18 19loop: 20 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 21 %arrayidx = getelementptr inbounds i32, i32* %c, i64 %iv 22 %0 = load i32, i32* %arrayidx, align 4 23 %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv 24 %1 = load i8, i8* %arrayidx2, align 4 25 %zext = zext i8 %1 to i32 26 %add = add nsw i32 %zext, %0 27 %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %iv 28 store i32 %add, i32* %arrayidx5, align 4 29 %iv.next = add nuw nsw i64 %iv, 1 30 %exitcond.not = icmp eq i64 %iv.next, 1024 31 br i1 %exitcond.not, label %exit, label %loop 32 33exit: 34 ret void 35} 36 37; Test that the MaxVF for the following loop, with a dependence distance 38; of 64 elements, is calculated as (maxvscale = 16) * 4. 39define void @test1(i32* %a, i8* %b) { 40; CHECK: LV: Checking a loop in "test1" 41; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4 42; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4 43; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF 44; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 4 45entry: 46 br label %loop 47 48loop: 49 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 50 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv 51 %0 = load i32, i32* %arrayidx, align 4 52 %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv 53 %1 = load i8, i8* %arrayidx2, align 4 54 %zext = zext i8 %1 to i32 55 %add = add nsw i32 %zext, %0 56 %2 = add nuw nsw i64 %iv, 64 57 %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 58 store i32 %add, i32* %arrayidx5, align 4 59 %iv.next = add nuw nsw i64 %iv, 1 60 %exitcond.not = icmp eq i64 %iv.next, 1024 61 br i1 %exitcond.not, label %exit, label %loop 62 63exit: 64 ret void 65} 66 67; Test that the MaxVF for the following loop, with a dependence distance 68; of 32 elements, is calculated as (maxvscale = 16) * 2. 69define void @test2(i32* %a, i8* %b) { 70; CHECK: LV: Checking a loop in "test2" 71; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2 72; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 2 73; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF 74; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 2 75entry: 76 br label %loop 77 78loop: 79 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 80 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv 81 %0 = load i32, i32* %arrayidx, align 4 82 %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv 83 %1 = load i8, i8* %arrayidx2, align 4 84 %zext = zext i8 %1 to i32 85 %add = add nsw i32 %zext, %0 86 %2 = add nuw nsw i64 %iv, 32 87 %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 88 store i32 %add, i32* %arrayidx5, align 4 89 %iv.next = add nuw nsw i64 %iv, 1 90 %exitcond.not = icmp eq i64 %iv.next, 1024 91 br i1 %exitcond.not, label %exit, label %loop 92 93exit: 94 ret void 95} 96 97; Test that the MaxVF for the following loop, with a dependence distance 98; of 16 elements, is calculated as (maxvscale = 16) * 1. 99define void @test3(i32* %a, i8* %b) { 100; CHECK: LV: Checking a loop in "test3" 101; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1 102; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 1 103; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF 104; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 1 105entry: 106 br label %loop 107 108loop: 109 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 110 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv 111 %0 = load i32, i32* %arrayidx, align 4 112 %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv 113 %1 = load i8, i8* %arrayidx2, align 4 114 %zext = zext i8 %1 to i32 115 %add = add nsw i32 %zext, %0 116 %2 = add nuw nsw i64 %iv, 16 117 %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 118 store i32 %add, i32* %arrayidx5, align 4 119 %iv.next = add nuw nsw i64 %iv, 1 120 %exitcond.not = icmp eq i64 %iv.next, 1024 121 br i1 %exitcond.not, label %exit, label %loop 122 123exit: 124 ret void 125} 126 127; Test the fallback mechanism when scalable vectors are not feasible due 128; to e.g. dependence distance. 129define void @test4(i32* %a, i32* %b) { 130; CHECK: LV: Checking a loop in "test4" 131; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF 132; CHECK_SCALABLE_PREFERRED-NOT: LV: Found feasible scalable VF 133; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF 134; CHECK_SCALABLE_PREFERRED_MAXBW-NOT: LV: Found feasible scalable VF 135entry: 136 br label %loop 137 138loop: 139 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 140 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv 141 %0 = load i32, i32* %arrayidx, align 4 142 %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv 143 %1 = load i32, i32* %arrayidx2, align 4 144 %add = add nsw i32 %1, %0 145 %2 = add nuw nsw i64 %iv, 8 146 %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 147 store i32 %add, i32* %arrayidx5, align 4 148 %iv.next = add nuw nsw i64 %iv, 1 149 %exitcond.not = icmp eq i64 %iv.next, 1024 150 br i1 %exitcond.not, label %exit, label %loop 151 152exit: 153 ret void 154} 155