1; RUN: opt < %s -debug-only=loop-vectorize -loop-vectorize -vectorizer-maximize-bandwidth -O2 -mtriple=powerpc64-unknown-linux -S -mcpu=pwr8 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR8 2; RUN: opt < %s -debug-only=loop-vectorize -loop-vectorize -vectorizer-maximize-bandwidth -O2 -mtriple=powerpc64le-unknown-linux -S -mcpu=pwr9 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR9 3; REQUIRES: asserts 4 5@a = global [1024 x i8] zeroinitializer, align 16 6@b = global [1024 x i8] zeroinitializer, align 16 7 8define i32 @foo() { 9; CHECK-LABEL: foo 10 11; CHECK-PWR8: Setting best plan to VF=16, UF=4 12 13; CHECK-PWR9: Setting best plan to VF=8, UF=8 14 15 16entry: 17 br label %for.body 18 19for.cond.cleanup: 20 %add.lcssa = phi i32 [ %add, %for.body ] 21 ret i32 %add.lcssa 22 23for.body: 24 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 25 %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ] 26 %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %indvars.iv 27 %0 = load i8, i8* %arrayidx, align 1 28 %conv = zext i8 %0 to i32 29 %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %indvars.iv 30 %1 = load i8, i8* %arrayidx2, align 1 31 %conv3 = zext i8 %1 to i32 32 %sub = sub nsw i32 %conv, %conv3 33 %ispos = icmp sgt i32 %sub, -1 34 %neg = sub nsw i32 0, %sub 35 %2 = select i1 %ispos, i32 %sub, i32 %neg 36 %add = add nsw i32 %2, %s.015 37 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 38 %exitcond = icmp eq i64 %indvars.iv.next, 1024 39 br i1 %exitcond, label %for.cond.cleanup, label %for.body 40} 41 42define i32 @goo() { 43; For indvars.iv used in a computating chain only feeding into getelementptr or cmp, 44; it will not have vector version and the vector register usage will not exceed the 45; available vector register number. 46 47; CHECK-LABEL: goo 48 49; CHECK: Setting best plan to VF=16, UF=4 50 51entry: 52 br label %for.body 53 54for.cond.cleanup: ; preds = %for.body 55 %add.lcssa = phi i32 [ %add, %for.body ] 56 ret i32 %add.lcssa 57 58for.body: ; preds = %for.body, %entry 59 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 60 %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ] 61 %tmp1 = add nsw i64 %indvars.iv, 3 62 %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %tmp1 63 %tmp = load i8, i8* %arrayidx, align 1 64 %conv = zext i8 %tmp to i32 65 %tmp2 = add nsw i64 %indvars.iv, 2 66 %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %tmp2 67 %tmp3 = load i8, i8* %arrayidx2, align 1 68 %conv3 = zext i8 %tmp3 to i32 69 %sub = sub nsw i32 %conv, %conv3 70 %ispos = icmp sgt i32 %sub, -1 71 %neg = sub nsw i32 0, %sub 72 %tmp4 = select i1 %ispos, i32 %sub, i32 %neg 73 %add = add nsw i32 %tmp4, %s.015 74 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 75 %exitcond = icmp eq i64 %indvars.iv.next, 1024 76 br i1 %exitcond, label %for.cond.cleanup, label %for.body 77} 78 79define i64 @bar(i64* nocapture %a) { 80; CHECK-LABEL: bar 81 82; CHECK: Setting best plan to VF=2, UF=12 83 84entry: 85 br label %for.body 86 87for.cond.cleanup: 88 %add2.lcssa = phi i64 [ %add2, %for.body ] 89 ret i64 %add2.lcssa 90 91for.body: 92 %i.012 = phi i64 [ 0, %entry ], [ %inc, %for.body ] 93 %s.011 = phi i64 [ 0, %entry ], [ %add2, %for.body ] 94 %arrayidx = getelementptr inbounds i64, i64* %a, i64 %i.012 95 %0 = load i64, i64* %arrayidx, align 8 96 %add = add nsw i64 %0, %i.012 97 store i64 %add, i64* %arrayidx, align 8 98 %add2 = add nsw i64 %add, %s.011 99 %inc = add nuw nsw i64 %i.012, 1 100 %exitcond = icmp eq i64 %inc, 1024 101 br i1 %exitcond, label %for.cond.cleanup, label %for.body 102} 103 104@d = external global [0 x i64], align 8 105@e = external global [0 x i32], align 4 106@c = external global [0 x i32], align 4 107 108define void @hoo(i32 %n) { 109; CHECK-LABEL: hoo 110; CHECK: Setting best plan to VF=1, UF=12 111 112entry: 113 br label %for.body 114 115for.body: ; preds = %for.body, %entry 116 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 117 %arrayidx = getelementptr inbounds [0 x i64], [0 x i64]* @d, i64 0, i64 %indvars.iv 118 %tmp = load i64, i64* %arrayidx, align 8 119 %arrayidx1 = getelementptr inbounds [0 x i32], [0 x i32]* @e, i64 0, i64 %tmp 120 %tmp1 = load i32, i32* %arrayidx1, align 4 121 %arrayidx3 = getelementptr inbounds [0 x i32], [0 x i32]* @c, i64 0, i64 %indvars.iv 122 store i32 %tmp1, i32* %arrayidx3, align 4 123 %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 124 %exitcond = icmp eq i64 %indvars.iv.next, 10000 125 br i1 %exitcond, label %for.end, label %for.body 126 127for.end: ; preds = %for.body 128 ret void 129} 130