1; REQUIRES: asserts 2; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=on < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_ON 3; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=preferred < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_PREFERRED 4; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -scalable-vectorization=off < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_DISABLED 5; RUN: opt -mtriple=aarch64-none-linux-gnu -mattr=+sve -force-target-instruction-cost=1 -loop-vectorize -S -debug-only=loop-vectorize -vectorizer-maximize-bandwidth -scalable-vectorization=preferred < %s 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK_SCALABLE_PREFERRED_MAXBW 6 7; Test that the MaxVF for the following loop, that has no dependence distances, 8; is calculated as vscale x 4 (max legal SVE vector size) or vscale x 16 9; (maximized bandwidth for i8 in the loop). 10define void @test0(i32* %a, i8* %b, i32* %c) #0 { 11; CHECK: LV: Checking a loop in "test0" 12; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4 13; CHECK_SCALABLE_ON: LV: Selecting VF: 4 14; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4 15; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: vscale x 4 16; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF 17; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4 18; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 16 19; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Selecting VF: vscale x 16 20entry: 21 br label %loop 22 23loop: 24 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 25 %arrayidx = getelementptr inbounds i32, i32* %c, i64 %iv 26 %0 = load i32, i32* %arrayidx, align 4 27 %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv 28 %1 = load i8, i8* %arrayidx2, align 4 29 %zext = zext i8 %1 to i32 30 %add = add nsw i32 %zext, %0 31 %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %iv 32 store i32 %add, i32* %arrayidx5, align 4 33 %iv.next = add nuw nsw i64 %iv, 1 34 %exitcond.not = icmp eq i64 %iv.next, 1024 35 br i1 %exitcond.not, label %exit, label %loop 36 37exit: 38 ret void 39} 40 41; Test that the MaxVF for the following loop, with a dependence distance 42; of 64 elements, is calculated as (maxvscale = 16) * 4. 43define void @test1(i32* %a, i8* %b) #0 { 44; CHECK: LV: Checking a loop in "test1" 45; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 4 46; CHECK_SCALABLE_ON: LV: Selecting VF: 4 47; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 4 48; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: vscale x 4 49; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF 50; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4 51; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 4 52; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Selecting VF: 16 53entry: 54 br label %loop 55 56loop: 57 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 58 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv 59 %0 = load i32, i32* %arrayidx, align 4 60 %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv 61 %1 = load i8, i8* %arrayidx2, align 4 62 %zext = zext i8 %1 to i32 63 %add = add nsw i32 %zext, %0 64 %2 = add nuw nsw i64 %iv, 64 65 %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 66 store i32 %add, i32* %arrayidx5, align 4 67 %iv.next = add nuw nsw i64 %iv, 1 68 %exitcond.not = icmp eq i64 %iv.next, 1024 69 br i1 %exitcond.not, label %exit, label %loop 70 71exit: 72 ret void 73} 74 75; Test that the MaxVF for the following loop, with a dependence distance 76; of 32 elements, is calculated as (maxvscale = 16) * 2. 77define void @test2(i32* %a, i8* %b) #0 { 78; CHECK: LV: Checking a loop in "test2" 79; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 2 80; CHECK_SCALABLE_ON: LV: Selecting VF: 4 81; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 2 82; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: 4 83; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF 84; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4 85; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 2 86; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Selecting VF: 16 87entry: 88 br label %loop 89 90loop: 91 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 92 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv 93 %0 = load i32, i32* %arrayidx, align 4 94 %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv 95 %1 = load i8, i8* %arrayidx2, align 4 96 %zext = zext i8 %1 to i32 97 %add = add nsw i32 %zext, %0 98 %2 = add nuw nsw i64 %iv, 32 99 %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 100 store i32 %add, i32* %arrayidx5, align 4 101 %iv.next = add nuw nsw i64 %iv, 1 102 %exitcond.not = icmp eq i64 %iv.next, 1024 103 br i1 %exitcond.not, label %exit, label %loop 104 105exit: 106 ret void 107} 108 109; Test that the MaxVF for the following loop, with a dependence distance 110; of 16 elements, is calculated as (maxvscale = 16) * 1. 111define void @test3(i32* %a, i8* %b) #0 { 112; CHECK: LV: Checking a loop in "test3" 113; CHECK_SCALABLE_ON: LV: Found feasible scalable VF = vscale x 1 114; CHECK_SCALABLE_ON: LV: Selecting VF: 4 115; CHECK_SCALABLE_PREFERRED: LV: Found feasible scalable VF = vscale x 1 116; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: 4 117; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF 118; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4 119; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Found feasible scalable VF = vscale x 1 120; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Selecting VF: 16 121entry: 122 br label %loop 123 124loop: 125 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 126 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv 127 %0 = load i32, i32* %arrayidx, align 4 128 %arrayidx2 = getelementptr inbounds i8, i8* %b, i64 %iv 129 %1 = load i8, i8* %arrayidx2, align 4 130 %zext = zext i8 %1 to i32 131 %add = add nsw i32 %zext, %0 132 %2 = add nuw nsw i64 %iv, 16 133 %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 134 store i32 %add, i32* %arrayidx5, align 4 135 %iv.next = add nuw nsw i64 %iv, 1 136 %exitcond.not = icmp eq i64 %iv.next, 1024 137 br i1 %exitcond.not, label %exit, label %loop 138 139exit: 140 ret void 141} 142 143; Test the fallback mechanism when scalable vectors are not feasible due 144; to e.g. dependence distance. 145define void @test4(i32* %a, i32* %b) #0 { 146; CHECK: LV: Checking a loop in "test4" 147; CHECK_SCALABLE_ON-NOT: LV: Found feasible scalable VF 148; CHECK_SCALABLE_ON: LV: Selecting VF: 4 149; CHECK_SCALABLE_PREFERRED-NOT: LV: Found feasible scalable VF 150; CHECK_SCALABLE_PREFERRED: LV: Selecting VF: 4 151; CHECK_SCALABLE_DISABLED-NOT: LV: Found feasible scalable VF 152; CHECK_SCALABLE_DISABLED: LV: Selecting VF: 4 153; CHECK_SCALABLE_PREFERRED_MAXBW-NOT: LV: Found feasible scalable VF 154; CHECK_SCALABLE_PREFERRED_MAXBW: LV: Selecting VF: 4 155entry: 156 br label %loop 157 158loop: 159 %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ] 160 %arrayidx = getelementptr inbounds i32, i32* %a, i64 %iv 161 %0 = load i32, i32* %arrayidx, align 4 162 %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv 163 %1 = load i32, i32* %arrayidx2, align 4 164 %add = add nsw i32 %1, %0 165 %2 = add nuw nsw i64 %iv, 8 166 %arrayidx5 = getelementptr inbounds i32, i32* %a, i64 %2 167 store i32 %add, i32* %arrayidx5, align 4 168 %iv.next = add nuw nsw i64 %iv, 1 169 %exitcond.not = icmp eq i64 %iv.next, 1024 170 br i1 %exitcond.not, label %exit, label %loop 171 172exit: 173 ret void 174} 175 176attributes #0 = { vscale_range(0, 16) } 177