115fefcb9SArthur Eubanks; RUN: opt < %s -debug-only=loop-vectorize -passes='function(loop-vectorize),default<O2>' -vectorizer-maximize-bandwidth -mtriple=powerpc64-unknown-linux -S -mcpu=pwr8 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR8 215fefcb9SArthur Eubanks; RUN: opt < %s -debug-only=loop-vectorize -passes='function(loop-vectorize),default<O2>' -vectorizer-maximize-bandwidth -mtriple=powerpc64le-unknown-linux -S -mcpu=pwr9 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR9 39802268aSZi Xuan Wu; REQUIRES: asserts 49802268aSZi Xuan Wu 59802268aSZi Xuan Wu@a = global [1024 x i8] zeroinitializer, align 16 69802268aSZi Xuan Wu@b = global [1024 x i8] zeroinitializer, align 16 79802268aSZi Xuan Wu 89802268aSZi Xuan Wudefine i32 @foo() { 9*872f7000SDávid Bolvanský; CHECK-LABEL: foo 109802268aSZi Xuan Wu 11*872f7000SDávid Bolvanský; CHECK-PWR8: Executing best plan with VF=16, UF=4 129802268aSZi Xuan Wu 13*872f7000SDávid Bolvanský; CHECK-PWR9: Executing best plan with VF=8, UF=8 149802268aSZi Xuan Wu 159802268aSZi Xuan Wu 169802268aSZi Xuan Wuentry: 179802268aSZi Xuan Wu br label %for.body 189802268aSZi Xuan Wu 199802268aSZi Xuan Wufor.cond.cleanup: 209802268aSZi Xuan Wu %add.lcssa = phi i32 [ %add, %for.body ] 219802268aSZi Xuan Wu ret i32 %add.lcssa 229802268aSZi Xuan Wu 239802268aSZi Xuan Wufor.body: 249802268aSZi Xuan Wu %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 259802268aSZi Xuan Wu %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ] 269802268aSZi Xuan Wu %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %indvars.iv 279802268aSZi Xuan Wu %0 = load i8, i8* %arrayidx, align 1 289802268aSZi Xuan Wu %conv = zext i8 %0 to i32 299802268aSZi Xuan Wu %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %indvars.iv 309802268aSZi Xuan Wu %1 = load i8, i8* %arrayidx2, align 1 319802268aSZi Xuan Wu %conv3 = zext i8 %1 to i32 329802268aSZi Xuan Wu %sub = sub nsw i32 %conv, %conv3 339802268aSZi Xuan Wu %ispos = icmp sgt i32 %sub, -1 349802268aSZi Xuan Wu %neg = sub nsw i32 0, %sub 359802268aSZi Xuan Wu %2 = select i1 %ispos, i32 %sub, i32 %neg 369802268aSZi Xuan Wu %add = add nsw i32 %2, %s.015 379802268aSZi Xuan Wu %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 389802268aSZi Xuan Wu %exitcond = icmp eq i64 %indvars.iv.next, 1024 399802268aSZi Xuan Wu br i1 %exitcond, label %for.cond.cleanup, label %for.body 409802268aSZi Xuan Wu} 419802268aSZi Xuan Wu 429802268aSZi Xuan Wudefine i32 @goo() { 439802268aSZi Xuan Wu; For indvars.iv used in a computating chain only feeding into getelementptr or cmp, 449802268aSZi Xuan Wu; it will not have vector version and the vector register usage will not exceed the 459802268aSZi Xuan Wu; available vector register number. 469802268aSZi Xuan Wu 47*872f7000SDávid Bolvanský; CHECK-LABEL: goo 48*872f7000SDávid Bolvanský 49*872f7000SDávid Bolvanský; CHECK: Executing best plan with VF=16, UF=4 509802268aSZi Xuan Wu 519802268aSZi Xuan Wuentry: 529802268aSZi Xuan Wu br label %for.body 539802268aSZi Xuan Wu 549802268aSZi Xuan Wufor.cond.cleanup: ; preds = %for.body 559802268aSZi Xuan Wu %add.lcssa = phi i32 [ %add, %for.body ] 569802268aSZi Xuan Wu ret i32 %add.lcssa 579802268aSZi Xuan Wu 589802268aSZi Xuan Wufor.body: ; preds = %for.body, %entry 599802268aSZi Xuan Wu %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 609802268aSZi Xuan Wu %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ] 619802268aSZi Xuan Wu %tmp1 = add nsw i64 %indvars.iv, 3 629802268aSZi Xuan Wu %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %tmp1 639802268aSZi Xuan Wu %tmp = load i8, i8* %arrayidx, align 1 649802268aSZi Xuan Wu %conv = zext i8 %tmp to i32 659802268aSZi Xuan Wu %tmp2 = add nsw i64 %indvars.iv, 2 669802268aSZi Xuan Wu %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %tmp2 679802268aSZi Xuan Wu %tmp3 = load i8, i8* %arrayidx2, align 1 689802268aSZi Xuan Wu %conv3 = zext i8 %tmp3 to i32 699802268aSZi Xuan Wu %sub = sub nsw i32 %conv, %conv3 709802268aSZi Xuan Wu %ispos = icmp sgt i32 %sub, -1 719802268aSZi Xuan Wu %neg = sub nsw i32 0, %sub 729802268aSZi Xuan Wu %tmp4 = select i1 %ispos, i32 %sub, i32 %neg 739802268aSZi Xuan Wu %add = add nsw i32 %tmp4, %s.015 749802268aSZi Xuan Wu %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 759802268aSZi Xuan Wu %exitcond = icmp eq i64 %indvars.iv.next, 1024 769802268aSZi Xuan Wu br i1 %exitcond, label %for.cond.cleanup, label %for.body 779802268aSZi Xuan Wu} 789802268aSZi Xuan Wu 799802268aSZi Xuan Wudefine i64 @bar(i64* nocapture %a) { 80*872f7000SDávid Bolvanský; CHECK-LABEL: bar 819802268aSZi Xuan Wu 82*872f7000SDávid Bolvanský; CHECK: Executing best plan with VF=2, UF=12 839802268aSZi Xuan Wu 849802268aSZi Xuan Wuentry: 859802268aSZi Xuan Wu br label %for.body 869802268aSZi Xuan Wu 879802268aSZi Xuan Wufor.cond.cleanup: 889802268aSZi Xuan Wu %add2.lcssa = phi i64 [ %add2, %for.body ] 899802268aSZi Xuan Wu ret i64 %add2.lcssa 909802268aSZi Xuan Wu 919802268aSZi Xuan Wufor.body: 929802268aSZi Xuan Wu %i.012 = phi i64 [ 0, %entry ], [ %inc, %for.body ] 939802268aSZi Xuan Wu %s.011 = phi i64 [ 0, %entry ], [ %add2, %for.body ] 949802268aSZi Xuan Wu %arrayidx = getelementptr inbounds i64, i64* %a, i64 %i.012 959802268aSZi Xuan Wu %0 = load i64, i64* %arrayidx, align 8 969802268aSZi Xuan Wu %add = add nsw i64 %0, %i.012 979802268aSZi Xuan Wu store i64 %add, i64* %arrayidx, align 8 989802268aSZi Xuan Wu %add2 = add nsw i64 %add, %s.011 999802268aSZi Xuan Wu %inc = add nuw nsw i64 %i.012, 1 1009802268aSZi Xuan Wu %exitcond = icmp eq i64 %inc, 1024 1019802268aSZi Xuan Wu br i1 %exitcond, label %for.cond.cleanup, label %for.body 1029802268aSZi Xuan Wu} 1039802268aSZi Xuan Wu 1049802268aSZi Xuan Wu@d = external global [0 x i64], align 8 1059802268aSZi Xuan Wu@e = external global [0 x i32], align 4 1069802268aSZi Xuan Wu@c = external global [0 x i32], align 4 1079802268aSZi Xuan Wu 1089802268aSZi Xuan Wudefine void @hoo(i32 %n) { 109*872f7000SDávid Bolvanský; CHECK-LABEL: hoo 110*872f7000SDávid Bolvanský; CHECK: Executing best plan with VF=1, UF=12 1119802268aSZi Xuan Wu 1129802268aSZi Xuan Wuentry: 1139802268aSZi Xuan Wu br label %for.body 1149802268aSZi Xuan Wu 1159802268aSZi Xuan Wufor.body: ; preds = %for.body, %entry 1169802268aSZi Xuan Wu %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 1179802268aSZi Xuan Wu %arrayidx = getelementptr inbounds [0 x i64], [0 x i64]* @d, i64 0, i64 %indvars.iv 1189802268aSZi Xuan Wu %tmp = load i64, i64* %arrayidx, align 8 1199802268aSZi Xuan Wu %arrayidx1 = getelementptr inbounds [0 x i32], [0 x i32]* @e, i64 0, i64 %tmp 1209802268aSZi Xuan Wu %tmp1 = load i32, i32* %arrayidx1, align 4 1219802268aSZi Xuan Wu %arrayidx3 = getelementptr inbounds [0 x i32], [0 x i32]* @c, i64 0, i64 %indvars.iv 1229802268aSZi Xuan Wu store i32 %tmp1, i32* %arrayidx3, align 4 1239802268aSZi Xuan Wu %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 1249802268aSZi Xuan Wu %exitcond = icmp eq i64 %indvars.iv.next, 10000 1259802268aSZi Xuan Wu br i1 %exitcond, label %for.end, label %for.body 1269802268aSZi Xuan Wu 1279802268aSZi Xuan Wufor.end: ; preds = %for.body 1289802268aSZi Xuan Wu ret void 1299802268aSZi Xuan Wu} 130e8c5600dSJinsong Ji 131e8c5600dSJinsong Jidefine float @float_(float* nocapture readonly %a, float* nocapture readonly %b, i32 %n) { 132*872f7000SDávid Bolvanský;CHECK-LABEL: float_ 133*872f7000SDávid Bolvanský;CHECK: LV(REG): VF = 1 134*872f7000SDávid Bolvanský;CHECK: LV(REG): Found max usage: 2 item 135*872f7000SDávid Bolvanský;CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers 136*872f7000SDávid Bolvanský;CHECK-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 3 registers 137*872f7000SDávid Bolvanský;CHECK: LV(REG): Found invariant usage: 1 item 138*872f7000SDávid Bolvanský;CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 1 registers 139e8c5600dSJinsong Ji 140e8c5600dSJinsong Jientry: 141e8c5600dSJinsong Ji %cmp = icmp sgt i32 %n, 0 142e8c5600dSJinsong Ji br i1 %cmp, label %preheader, label %for.end 143e8c5600dSJinsong Ji 144e8c5600dSJinsong Jipreheader: 145e8c5600dSJinsong Ji %t0 = sext i32 %n to i64 146e8c5600dSJinsong Ji br label %for 147e8c5600dSJinsong Ji 148e8c5600dSJinsong Jifor: 149e8c5600dSJinsong Ji %indvars.iv = phi i64 [ 0, %preheader ], [ %indvars.iv.next, %for ] 150e8c5600dSJinsong Ji %s.02 = phi float [ 0.0, %preheader ], [ %add4, %for ] 151e8c5600dSJinsong Ji %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv 152e8c5600dSJinsong Ji %t1 = load float, float* %arrayidx, align 4 153e8c5600dSJinsong Ji %arrayidx3 = getelementptr inbounds float, float* %b, i64 %indvars.iv 154e8c5600dSJinsong Ji %t2 = load float, float* %arrayidx3, align 4 155e8c5600dSJinsong Ji %add = fadd fast float %t1, %s.02 156e8c5600dSJinsong Ji %add4 = fadd fast float %add, %t2 157e8c5600dSJinsong Ji %indvars.iv.next = add nuw nsw i64 %indvars.iv, 32 158e8c5600dSJinsong Ji %cmp1 = icmp slt i64 %indvars.iv.next, %t0 159e8c5600dSJinsong Ji br i1 %cmp1, label %for, label %loopexit 160e8c5600dSJinsong Ji 161e8c5600dSJinsong Jiloopexit: 162e8c5600dSJinsong Ji %add4.lcssa = phi float [ %add4, %for ] 163e8c5600dSJinsong Ji br label %for.end 164e8c5600dSJinsong Ji 165e8c5600dSJinsong Jifor.end: 166e8c5600dSJinsong Ji %s.0.lcssa = phi float [ 0.0, %entry ], [ %add4.lcssa, %loopexit ] 167e8c5600dSJinsong Ji ret float %s.0.lcssa 168e8c5600dSJinsong Ji} 169e8c5600dSJinsong Ji 170e8c5600dSJinsong Ji 171e8c5600dSJinsong Jidefine void @double_(double* nocapture %A, i32 %n) nounwind uwtable ssp { 172*872f7000SDávid Bolvanský;CHECK-LABEL: double_ 173*872f7000SDávid Bolvanský;CHECK-PWR8: LV(REG): VF = 2 174*872f7000SDávid Bolvanský;CHECK-PWR8: LV(REG): Found max usage: 2 item 175*872f7000SDávid Bolvanský;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers 176*872f7000SDávid Bolvanský;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 5 registers 177*872f7000SDávid Bolvanský;CHECK-PWR8: LV(REG): Found invariant usage: 1 item 178*872f7000SDávid Bolvanský;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 1 registers 179e8c5600dSJinsong Ji 180*872f7000SDávid Bolvanský;CHECK-PWR9: LV(REG): VF = 1 181*872f7000SDávid Bolvanský;CHECK-PWR9: LV(REG): Found max usage: 2 item 182*872f7000SDávid Bolvanský;CHECK-PWR9-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers 183*872f7000SDávid Bolvanský;CHECK-PWR9-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 5 registers 184*872f7000SDávid Bolvanský;CHECK-PWR9: LV(REG): Found invariant usage: 1 item 185*872f7000SDávid Bolvanský;CHECK-PWR9-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 1 registers 186e8c5600dSJinsong Ji 187e8c5600dSJinsong Ji %1 = sext i32 %n to i64 188e8c5600dSJinsong Ji br label %2 189e8c5600dSJinsong Ji 190e8c5600dSJinsong Ji; <label>:2 ; preds = %2, %0 191e8c5600dSJinsong Ji %indvars.iv = phi i64 [ %indvars.iv.next, %2 ], [ %1, %0 ] 192e8c5600dSJinsong Ji %3 = getelementptr inbounds double, double* %A, i64 %indvars.iv 193e8c5600dSJinsong Ji %4 = load double, double* %3, align 8 194e8c5600dSJinsong Ji %5 = fadd double %4, 3.000000e+00 195e8c5600dSJinsong Ji %6 = fmul double %4, 2.000000e+00 196e8c5600dSJinsong Ji %7 = fadd double %5, %6 197e8c5600dSJinsong Ji %8 = fadd double %7, 2.000000e+00 198e8c5600dSJinsong Ji %9 = fmul double %8, 5.000000e-01 199e8c5600dSJinsong Ji %10 = fadd double %6, %9 200e8c5600dSJinsong Ji %11 = fsub double %10, %5 201e8c5600dSJinsong Ji %12 = fadd double %4, %11 202e8c5600dSJinsong Ji %13 = fdiv double %8, %12 203e8c5600dSJinsong Ji %14 = fmul double %13, %8 204e8c5600dSJinsong Ji %15 = fmul double %6, %14 205e8c5600dSJinsong Ji %16 = fmul double %5, %15 206e8c5600dSJinsong Ji %17 = fadd double %16, -3.000000e+00 207e8c5600dSJinsong Ji %18 = fsub double %4, %5 208e8c5600dSJinsong Ji %19 = fadd double %6, %18 209e8c5600dSJinsong Ji %20 = fadd double %13, %19 210e8c5600dSJinsong Ji %21 = fadd double %20, %17 211e8c5600dSJinsong Ji %22 = fadd double %21, 3.000000e+00 212e8c5600dSJinsong Ji %23 = fmul double %4, %22 213e8c5600dSJinsong Ji store double %23, double* %3, align 8 214e8c5600dSJinsong Ji %indvars.iv.next = add i64 %indvars.iv, -1 215e8c5600dSJinsong Ji %24 = trunc i64 %indvars.iv to i32 216e8c5600dSJinsong Ji %25 = icmp eq i32 %24, 0 217e8c5600dSJinsong Ji br i1 %25, label %26, label %2 218e8c5600dSJinsong Ji 219e8c5600dSJinsong Ji; <label>:26 ; preds = %2 220e8c5600dSJinsong Ji ret void 221e8c5600dSJinsong Ji} 2221d799022SJinsong Ji 2231d799022SJinsong Jidefine ppc_fp128 @fp128_(ppc_fp128* nocapture %n, ppc_fp128 %d) nounwind readonly { 224*872f7000SDávid Bolvanský;CHECK-LABEL: fp128_ 225*872f7000SDávid Bolvanský;CHECK: LV(REG): VF = 1 226*872f7000SDávid Bolvanský;CHECK: LV(REG): Found max usage: 2 item 227*872f7000SDávid Bolvanský;CHECK: LV(REG): RegisterClass: PPC::GPRRC, 2 registers 228*872f7000SDávid Bolvanský;CHECK: LV(REG): RegisterClass: PPC::VRRC, 2 registers 2291d799022SJinsong Jientry: 2301d799022SJinsong Ji br label %for.body 2311d799022SJinsong Ji 2321d799022SJinsong Jifor.body: ; preds = %for.body, %entry 2331d799022SJinsong Ji %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ] 2341d799022SJinsong Ji %x.05 = phi ppc_fp128 [ %d, %entry ], [ %sub, %for.body ] 2351d799022SJinsong Ji %arrayidx = getelementptr inbounds ppc_fp128, ppc_fp128* %n, i32 %i.06 2361d799022SJinsong Ji %0 = load ppc_fp128, ppc_fp128* %arrayidx, align 8 2377fb6d9f9SFlorian Hahn %sub = fsub fast ppc_fp128 %x.05, %0 2381d799022SJinsong Ji %inc = add nsw i32 %i.06, 1 2391d799022SJinsong Ji %exitcond = icmp eq i32 %inc, 2048 2401d799022SJinsong Ji br i1 %exitcond, label %for.end, label %for.body 2411d799022SJinsong Ji 2421d799022SJinsong Jifor.end: ; preds = %for.body 2431d799022SJinsong Ji ret ppc_fp128 %sub 2441d799022SJinsong Ji} 2451d799022SJinsong Ji 2461d799022SJinsong Ji 2471d799022SJinsong Jidefine void @fp16_(half* nocapture readonly %pIn, half* nocapture %pOut, i32 %numRows, i32 %numCols, i32 %scale.coerce) #0 { 248*872f7000SDávid Bolvanský;CHECK-LABEL: fp16_ 249*872f7000SDávid Bolvanský;CHECK: LV(REG): VF = 1 250*872f7000SDávid Bolvanský;CHECK: LV(REG): Found max usage: 2 item 251*872f7000SDávid Bolvanský;CHECK: LV(REG): RegisterClass: PPC::GPRRC, 4 registers 252*872f7000SDávid Bolvanský;CHECK: LV(REG): RegisterClass: PPC::VSXRC, 2 registers 2531d799022SJinsong Jientry: 2541d799022SJinsong Ji %tmp.0.extract.trunc = trunc i32 %scale.coerce to i16 2551d799022SJinsong Ji %0 = bitcast i16 %tmp.0.extract.trunc to half 2561d799022SJinsong Ji %mul = mul i32 %numCols, %numRows 2571d799022SJinsong Ji %shr = lshr i32 %mul, 2 2581d799022SJinsong Ji %cmp26 = icmp eq i32 %shr, 0 2591d799022SJinsong Ji br i1 %cmp26, label %while.end, label %while.body 2601d799022SJinsong Ji 2611d799022SJinsong Jiwhile.body: ; preds = %entry, %while.body 2621d799022SJinsong Ji %pIn.addr.029 = phi half* [ %add.ptr, %while.body ], [ %pIn, %entry ] 2631d799022SJinsong Ji %pOut.addr.028 = phi half* [ %add.ptr7, %while.body ], [ %pOut, %entry ] 2641d799022SJinsong Ji %blkCnt.027 = phi i32 [ %dec, %while.body ], [ %shr, %entry ] 2651d799022SJinsong Ji %1 = load half, half* %pIn.addr.029, align 2 2661d799022SJinsong Ji %arrayidx2 = getelementptr inbounds half, half* %pIn.addr.029, i32 1 2671d799022SJinsong Ji %2 = load half, half* %arrayidx2, align 2 2681d799022SJinsong Ji %mul3 = fmul half %1, %0 2691d799022SJinsong Ji %mul4 = fmul half %2, %0 2701d799022SJinsong Ji store half %mul3, half* %pOut.addr.028, align 2 2711d799022SJinsong Ji %arrayidx6 = getelementptr inbounds half, half* %pOut.addr.028, i32 1 2721d799022SJinsong Ji store half %mul4, half* %arrayidx6, align 2 2731d799022SJinsong Ji %add.ptr = getelementptr inbounds half, half* %pIn.addr.029, i32 2 2741d799022SJinsong Ji %add.ptr7 = getelementptr inbounds half, half* %pOut.addr.028, i32 2 2751d799022SJinsong Ji %dec = add nsw i32 %blkCnt.027, -1 2761d799022SJinsong Ji %cmp = icmp eq i32 %dec, 0 2771d799022SJinsong Ji br i1 %cmp, label %while.end, label %while.body 2781d799022SJinsong Ji 2791d799022SJinsong Jiwhile.end: ; preds = %while.body, %entry 2801d799022SJinsong Ji ret void 2811d799022SJinsong Ji} 282