115fefcb9SArthur Eubanks; RUN: opt < %s -debug-only=loop-vectorize -passes='function(loop-vectorize),default<O2>' -vectorizer-maximize-bandwidth -mtriple=powerpc64-unknown-linux -S -mcpu=pwr8 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR8
215fefcb9SArthur Eubanks; RUN: opt < %s -debug-only=loop-vectorize -passes='function(loop-vectorize),default<O2>' -vectorizer-maximize-bandwidth -mtriple=powerpc64le-unknown-linux -S -mcpu=pwr9 2>&1 | FileCheck %s --check-prefixes=CHECK,CHECK-PWR9
39802268aSZi Xuan Wu; REQUIRES: asserts
49802268aSZi Xuan Wu
59802268aSZi Xuan Wu@a = global [1024 x i8] zeroinitializer, align 16
69802268aSZi Xuan Wu@b = global [1024 x i8] zeroinitializer, align 16
79802268aSZi Xuan Wu
89802268aSZi Xuan Wudefine i32 @foo() {
9*872f7000SDávid Bolvanský; CHECK-LABEL: foo
109802268aSZi Xuan Wu
11*872f7000SDávid Bolvanský; CHECK-PWR8: Executing best plan with VF=16, UF=4
129802268aSZi Xuan Wu
13*872f7000SDávid Bolvanský; CHECK-PWR9: Executing best plan with VF=8, UF=8
149802268aSZi Xuan Wu
159802268aSZi Xuan Wu
169802268aSZi Xuan Wuentry:
179802268aSZi Xuan Wu  br label %for.body
189802268aSZi Xuan Wu
199802268aSZi Xuan Wufor.cond.cleanup:
209802268aSZi Xuan Wu  %add.lcssa = phi i32 [ %add, %for.body ]
219802268aSZi Xuan Wu  ret i32 %add.lcssa
229802268aSZi Xuan Wu
239802268aSZi Xuan Wufor.body:
249802268aSZi Xuan Wu  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
259802268aSZi Xuan Wu  %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ]
269802268aSZi Xuan Wu  %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %indvars.iv
279802268aSZi Xuan Wu  %0 = load i8, i8* %arrayidx, align 1
289802268aSZi Xuan Wu  %conv = zext i8 %0 to i32
299802268aSZi Xuan Wu  %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %indvars.iv
309802268aSZi Xuan Wu  %1 = load i8, i8* %arrayidx2, align 1
319802268aSZi Xuan Wu  %conv3 = zext i8 %1 to i32
329802268aSZi Xuan Wu  %sub = sub nsw i32 %conv, %conv3
339802268aSZi Xuan Wu  %ispos = icmp sgt i32 %sub, -1
349802268aSZi Xuan Wu  %neg = sub nsw i32 0, %sub
359802268aSZi Xuan Wu  %2 = select i1 %ispos, i32 %sub, i32 %neg
369802268aSZi Xuan Wu  %add = add nsw i32 %2, %s.015
379802268aSZi Xuan Wu  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
389802268aSZi Xuan Wu  %exitcond = icmp eq i64 %indvars.iv.next, 1024
399802268aSZi Xuan Wu  br i1 %exitcond, label %for.cond.cleanup, label %for.body
409802268aSZi Xuan Wu}
419802268aSZi Xuan Wu
429802268aSZi Xuan Wudefine i32 @goo() {
439802268aSZi Xuan Wu; For indvars.iv used in a computating chain only feeding into getelementptr or cmp,
449802268aSZi Xuan Wu; it will not have vector version and the vector register usage will not exceed the
459802268aSZi Xuan Wu; available vector register number.
469802268aSZi Xuan Wu
47*872f7000SDávid Bolvanský; CHECK-LABEL: goo
48*872f7000SDávid Bolvanský
49*872f7000SDávid Bolvanský; CHECK: Executing best plan with VF=16, UF=4
509802268aSZi Xuan Wu
519802268aSZi Xuan Wuentry:
529802268aSZi Xuan Wu  br label %for.body
539802268aSZi Xuan Wu
549802268aSZi Xuan Wufor.cond.cleanup:                                 ; preds = %for.body
559802268aSZi Xuan Wu  %add.lcssa = phi i32 [ %add, %for.body ]
569802268aSZi Xuan Wu  ret i32 %add.lcssa
579802268aSZi Xuan Wu
589802268aSZi Xuan Wufor.body:                                         ; preds = %for.body, %entry
599802268aSZi Xuan Wu  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
609802268aSZi Xuan Wu  %s.015 = phi i32 [ 0, %entry ], [ %add, %for.body ]
619802268aSZi Xuan Wu  %tmp1 = add nsw i64 %indvars.iv, 3
629802268aSZi Xuan Wu  %arrayidx = getelementptr inbounds [1024 x i8], [1024 x i8]* @a, i64 0, i64 %tmp1
639802268aSZi Xuan Wu  %tmp = load i8, i8* %arrayidx, align 1
649802268aSZi Xuan Wu  %conv = zext i8 %tmp to i32
659802268aSZi Xuan Wu  %tmp2 = add nsw i64 %indvars.iv, 2
669802268aSZi Xuan Wu  %arrayidx2 = getelementptr inbounds [1024 x i8], [1024 x i8]* @b, i64 0, i64 %tmp2
679802268aSZi Xuan Wu  %tmp3 = load i8, i8* %arrayidx2, align 1
689802268aSZi Xuan Wu  %conv3 = zext i8 %tmp3 to i32
699802268aSZi Xuan Wu  %sub = sub nsw i32 %conv, %conv3
709802268aSZi Xuan Wu  %ispos = icmp sgt i32 %sub, -1
719802268aSZi Xuan Wu  %neg = sub nsw i32 0, %sub
729802268aSZi Xuan Wu  %tmp4 = select i1 %ispos, i32 %sub, i32 %neg
739802268aSZi Xuan Wu  %add = add nsw i32 %tmp4, %s.015
749802268aSZi Xuan Wu  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
759802268aSZi Xuan Wu  %exitcond = icmp eq i64 %indvars.iv.next, 1024
769802268aSZi Xuan Wu  br i1 %exitcond, label %for.cond.cleanup, label %for.body
779802268aSZi Xuan Wu}
789802268aSZi Xuan Wu
799802268aSZi Xuan Wudefine i64 @bar(i64* nocapture %a) {
80*872f7000SDávid Bolvanský; CHECK-LABEL: bar
819802268aSZi Xuan Wu
82*872f7000SDávid Bolvanský; CHECK: Executing best plan with VF=2, UF=12
839802268aSZi Xuan Wu
849802268aSZi Xuan Wuentry:
859802268aSZi Xuan Wu  br label %for.body
869802268aSZi Xuan Wu
879802268aSZi Xuan Wufor.cond.cleanup:
889802268aSZi Xuan Wu  %add2.lcssa = phi i64 [ %add2, %for.body ]
899802268aSZi Xuan Wu  ret i64 %add2.lcssa
909802268aSZi Xuan Wu
919802268aSZi Xuan Wufor.body:
929802268aSZi Xuan Wu  %i.012 = phi i64 [ 0, %entry ], [ %inc, %for.body ]
939802268aSZi Xuan Wu  %s.011 = phi i64 [ 0, %entry ], [ %add2, %for.body ]
949802268aSZi Xuan Wu  %arrayidx = getelementptr inbounds i64, i64* %a, i64 %i.012
959802268aSZi Xuan Wu  %0 = load i64, i64* %arrayidx, align 8
969802268aSZi Xuan Wu  %add = add nsw i64 %0, %i.012
979802268aSZi Xuan Wu  store i64 %add, i64* %arrayidx, align 8
989802268aSZi Xuan Wu  %add2 = add nsw i64 %add, %s.011
999802268aSZi Xuan Wu  %inc = add nuw nsw i64 %i.012, 1
1009802268aSZi Xuan Wu  %exitcond = icmp eq i64 %inc, 1024
1019802268aSZi Xuan Wu  br i1 %exitcond, label %for.cond.cleanup, label %for.body
1029802268aSZi Xuan Wu}
1039802268aSZi Xuan Wu
1049802268aSZi Xuan Wu@d = external global [0 x i64], align 8
1059802268aSZi Xuan Wu@e = external global [0 x i32], align 4
1069802268aSZi Xuan Wu@c = external global [0 x i32], align 4
1079802268aSZi Xuan Wu
1089802268aSZi Xuan Wudefine void @hoo(i32 %n) {
109*872f7000SDávid Bolvanský; CHECK-LABEL: hoo
110*872f7000SDávid Bolvanský; CHECK: Executing best plan with VF=1, UF=12
1119802268aSZi Xuan Wu
1129802268aSZi Xuan Wuentry:
1139802268aSZi Xuan Wu  br label %for.body
1149802268aSZi Xuan Wu
1159802268aSZi Xuan Wufor.body:                                         ; preds = %for.body, %entry
1169802268aSZi Xuan Wu  %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ]
1179802268aSZi Xuan Wu  %arrayidx = getelementptr inbounds [0 x i64], [0 x i64]* @d, i64 0, i64 %indvars.iv
1189802268aSZi Xuan Wu  %tmp = load i64, i64* %arrayidx, align 8
1199802268aSZi Xuan Wu  %arrayidx1 = getelementptr inbounds [0 x i32], [0 x i32]* @e, i64 0, i64 %tmp
1209802268aSZi Xuan Wu  %tmp1 = load i32, i32* %arrayidx1, align 4
1219802268aSZi Xuan Wu  %arrayidx3 = getelementptr inbounds [0 x i32], [0 x i32]* @c, i64 0, i64 %indvars.iv
1229802268aSZi Xuan Wu  store i32 %tmp1, i32* %arrayidx3, align 4
1239802268aSZi Xuan Wu  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1
1249802268aSZi Xuan Wu  %exitcond = icmp eq i64 %indvars.iv.next, 10000
1259802268aSZi Xuan Wu  br i1 %exitcond, label %for.end, label %for.body
1269802268aSZi Xuan Wu
1279802268aSZi Xuan Wufor.end:                                          ; preds = %for.body
1289802268aSZi Xuan Wu  ret void
1299802268aSZi Xuan Wu}
130e8c5600dSJinsong Ji
131e8c5600dSJinsong Jidefine float @float_(float* nocapture readonly %a, float* nocapture readonly %b, i32 %n) {
132*872f7000SDávid Bolvanský;CHECK-LABEL: float_
133*872f7000SDávid Bolvanský;CHECK: LV(REG): VF = 1
134*872f7000SDávid Bolvanský;CHECK: LV(REG): Found max usage: 2 item
135*872f7000SDávid Bolvanský;CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
136*872f7000SDávid Bolvanský;CHECK-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 3 registers
137*872f7000SDávid Bolvanský;CHECK: LV(REG): Found invariant usage: 1 item
138*872f7000SDávid Bolvanský;CHECK-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 1 registers
139e8c5600dSJinsong Ji
140e8c5600dSJinsong Jientry:
141e8c5600dSJinsong Ji  %cmp = icmp sgt i32 %n, 0
142e8c5600dSJinsong Ji  br i1 %cmp, label %preheader, label %for.end
143e8c5600dSJinsong Ji
144e8c5600dSJinsong Jipreheader:
145e8c5600dSJinsong Ji  %t0 = sext i32 %n to i64
146e8c5600dSJinsong Ji  br label %for
147e8c5600dSJinsong Ji
148e8c5600dSJinsong Jifor:
149e8c5600dSJinsong Ji  %indvars.iv = phi i64 [ 0, %preheader ], [ %indvars.iv.next, %for ]
150e8c5600dSJinsong Ji  %s.02 = phi float [ 0.0, %preheader ], [ %add4, %for ]
151e8c5600dSJinsong Ji  %arrayidx = getelementptr inbounds float, float* %a, i64 %indvars.iv
152e8c5600dSJinsong Ji  %t1 = load float, float* %arrayidx, align 4
153e8c5600dSJinsong Ji  %arrayidx3 = getelementptr inbounds float, float* %b, i64 %indvars.iv
154e8c5600dSJinsong Ji  %t2 = load float, float* %arrayidx3, align 4
155e8c5600dSJinsong Ji  %add = fadd fast float %t1, %s.02
156e8c5600dSJinsong Ji  %add4 = fadd fast float %add, %t2
157e8c5600dSJinsong Ji  %indvars.iv.next = add nuw nsw i64 %indvars.iv, 32
158e8c5600dSJinsong Ji  %cmp1 = icmp slt i64 %indvars.iv.next, %t0
159e8c5600dSJinsong Ji  br i1 %cmp1, label %for, label %loopexit
160e8c5600dSJinsong Ji
161e8c5600dSJinsong Jiloopexit:
162e8c5600dSJinsong Ji  %add4.lcssa = phi float [ %add4, %for ]
163e8c5600dSJinsong Ji  br label %for.end
164e8c5600dSJinsong Ji
165e8c5600dSJinsong Jifor.end:
166e8c5600dSJinsong Ji  %s.0.lcssa = phi float [ 0.0, %entry ], [ %add4.lcssa, %loopexit ]
167e8c5600dSJinsong Ji  ret float %s.0.lcssa
168e8c5600dSJinsong Ji}
169e8c5600dSJinsong Ji
170e8c5600dSJinsong Ji
171e8c5600dSJinsong Jidefine void @double_(double* nocapture %A, i32 %n) nounwind uwtable ssp {
172*872f7000SDávid Bolvanský;CHECK-LABEL: double_
173*872f7000SDávid Bolvanský;CHECK-PWR8: LV(REG): VF = 2
174*872f7000SDávid Bolvanský;CHECK-PWR8: LV(REG): Found max usage: 2 item
175*872f7000SDávid Bolvanský;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
176*872f7000SDávid Bolvanský;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 5 registers
177*872f7000SDávid Bolvanský;CHECK-PWR8: LV(REG): Found invariant usage: 1 item
178*872f7000SDávid Bolvanský;CHECK-PWR8-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 1 registers
179e8c5600dSJinsong Ji
180*872f7000SDávid Bolvanský;CHECK-PWR9: LV(REG): VF = 1
181*872f7000SDávid Bolvanský;CHECK-PWR9: LV(REG): Found max usage: 2 item
182*872f7000SDávid Bolvanský;CHECK-PWR9-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
183*872f7000SDávid Bolvanský;CHECK-PWR9-NEXT: LV(REG): RegisterClass: PPC::VSXRC, 5 registers
184*872f7000SDávid Bolvanský;CHECK-PWR9: LV(REG): Found invariant usage: 1 item
185*872f7000SDávid Bolvanský;CHECK-PWR9-NEXT: LV(REG): RegisterClass: PPC::GPRRC, 1 registers
186e8c5600dSJinsong Ji
187e8c5600dSJinsong Ji  %1 = sext i32 %n to i64
188e8c5600dSJinsong Ji  br label %2
189e8c5600dSJinsong Ji
190e8c5600dSJinsong Ji; <label>:2                                       ; preds = %2, %0
191e8c5600dSJinsong Ji  %indvars.iv = phi i64 [ %indvars.iv.next, %2 ], [ %1, %0 ]
192e8c5600dSJinsong Ji  %3 = getelementptr inbounds double, double* %A, i64 %indvars.iv
193e8c5600dSJinsong Ji  %4 = load double, double* %3, align 8
194e8c5600dSJinsong Ji  %5 = fadd double %4, 3.000000e+00
195e8c5600dSJinsong Ji  %6 = fmul double %4, 2.000000e+00
196e8c5600dSJinsong Ji  %7 = fadd double %5, %6
197e8c5600dSJinsong Ji  %8 = fadd double %7, 2.000000e+00
198e8c5600dSJinsong Ji  %9 = fmul double %8, 5.000000e-01
199e8c5600dSJinsong Ji  %10 = fadd double %6, %9
200e8c5600dSJinsong Ji  %11 = fsub double %10, %5
201e8c5600dSJinsong Ji  %12 = fadd double %4, %11
202e8c5600dSJinsong Ji  %13 = fdiv double %8, %12
203e8c5600dSJinsong Ji  %14 = fmul double %13, %8
204e8c5600dSJinsong Ji  %15 = fmul double %6, %14
205e8c5600dSJinsong Ji  %16 = fmul double %5, %15
206e8c5600dSJinsong Ji  %17 = fadd double %16, -3.000000e+00
207e8c5600dSJinsong Ji  %18 = fsub double %4, %5
208e8c5600dSJinsong Ji  %19 = fadd double %6, %18
209e8c5600dSJinsong Ji  %20 = fadd double %13, %19
210e8c5600dSJinsong Ji  %21 = fadd double %20, %17
211e8c5600dSJinsong Ji  %22 = fadd double %21, 3.000000e+00
212e8c5600dSJinsong Ji  %23 = fmul double %4, %22
213e8c5600dSJinsong Ji  store double %23, double* %3, align 8
214e8c5600dSJinsong Ji  %indvars.iv.next = add i64 %indvars.iv, -1
215e8c5600dSJinsong Ji  %24 = trunc i64 %indvars.iv to i32
216e8c5600dSJinsong Ji  %25 = icmp eq i32 %24, 0
217e8c5600dSJinsong Ji  br i1 %25, label %26, label %2
218e8c5600dSJinsong Ji
219e8c5600dSJinsong Ji; <label>:26                                      ; preds = %2
220e8c5600dSJinsong Ji  ret void
221e8c5600dSJinsong Ji}
2221d799022SJinsong Ji
2231d799022SJinsong Jidefine ppc_fp128 @fp128_(ppc_fp128* nocapture %n, ppc_fp128 %d) nounwind readonly {
224*872f7000SDávid Bolvanský;CHECK-LABEL: fp128_
225*872f7000SDávid Bolvanský;CHECK: LV(REG): VF = 1
226*872f7000SDávid Bolvanský;CHECK: LV(REG): Found max usage: 2 item
227*872f7000SDávid Bolvanský;CHECK: LV(REG): RegisterClass: PPC::GPRRC, 2 registers
228*872f7000SDávid Bolvanský;CHECK: LV(REG): RegisterClass: PPC::VRRC, 2 registers
2291d799022SJinsong Jientry:
2301d799022SJinsong Ji  br label %for.body
2311d799022SJinsong Ji
2321d799022SJinsong Jifor.body:                                         ; preds = %for.body, %entry
2331d799022SJinsong Ji  %i.06 = phi i32 [ 0, %entry ], [ %inc, %for.body ]
2341d799022SJinsong Ji  %x.05 = phi ppc_fp128 [ %d, %entry ], [ %sub, %for.body ]
2351d799022SJinsong Ji  %arrayidx = getelementptr inbounds ppc_fp128, ppc_fp128* %n, i32 %i.06
2361d799022SJinsong Ji  %0 = load ppc_fp128, ppc_fp128* %arrayidx, align 8
2377fb6d9f9SFlorian Hahn  %sub = fsub fast ppc_fp128 %x.05, %0
2381d799022SJinsong Ji  %inc = add nsw i32 %i.06, 1
2391d799022SJinsong Ji  %exitcond = icmp eq i32 %inc, 2048
2401d799022SJinsong Ji  br i1 %exitcond, label %for.end, label %for.body
2411d799022SJinsong Ji
2421d799022SJinsong Jifor.end:                                          ; preds = %for.body
2431d799022SJinsong Ji  ret ppc_fp128 %sub
2441d799022SJinsong Ji}
2451d799022SJinsong Ji
2461d799022SJinsong Ji
2471d799022SJinsong Jidefine void @fp16_(half* nocapture readonly %pIn, half* nocapture %pOut, i32 %numRows, i32 %numCols, i32 %scale.coerce) #0 {
248*872f7000SDávid Bolvanský;CHECK-LABEL: fp16_
249*872f7000SDávid Bolvanský;CHECK: LV(REG): VF = 1
250*872f7000SDávid Bolvanský;CHECK: LV(REG): Found max usage: 2 item
251*872f7000SDávid Bolvanský;CHECK: LV(REG): RegisterClass: PPC::GPRRC, 4 registers
252*872f7000SDávid Bolvanský;CHECK: LV(REG): RegisterClass: PPC::VSXRC, 2 registers
2531d799022SJinsong Jientry:
2541d799022SJinsong Ji  %tmp.0.extract.trunc = trunc i32 %scale.coerce to i16
2551d799022SJinsong Ji  %0 = bitcast i16 %tmp.0.extract.trunc to half
2561d799022SJinsong Ji  %mul = mul i32 %numCols, %numRows
2571d799022SJinsong Ji  %shr = lshr i32 %mul, 2
2581d799022SJinsong Ji  %cmp26 = icmp eq i32 %shr, 0
2591d799022SJinsong Ji  br i1 %cmp26, label %while.end, label %while.body
2601d799022SJinsong Ji
2611d799022SJinsong Jiwhile.body:                                       ; preds = %entry, %while.body
2621d799022SJinsong Ji  %pIn.addr.029 = phi half* [ %add.ptr, %while.body ], [ %pIn, %entry ]
2631d799022SJinsong Ji  %pOut.addr.028 = phi half* [ %add.ptr7, %while.body ], [ %pOut, %entry ]
2641d799022SJinsong Ji  %blkCnt.027 = phi i32 [ %dec, %while.body ], [ %shr, %entry ]
2651d799022SJinsong Ji  %1 = load half, half* %pIn.addr.029, align 2
2661d799022SJinsong Ji  %arrayidx2 = getelementptr inbounds half, half* %pIn.addr.029, i32 1
2671d799022SJinsong Ji  %2 = load half, half* %arrayidx2, align 2
2681d799022SJinsong Ji  %mul3 = fmul half %1, %0
2691d799022SJinsong Ji  %mul4 = fmul half %2, %0
2701d799022SJinsong Ji  store half %mul3, half* %pOut.addr.028, align 2
2711d799022SJinsong Ji  %arrayidx6 = getelementptr inbounds half, half* %pOut.addr.028, i32 1
2721d799022SJinsong Ji  store half %mul4, half* %arrayidx6, align 2
2731d799022SJinsong Ji  %add.ptr = getelementptr inbounds half, half* %pIn.addr.029, i32 2
2741d799022SJinsong Ji  %add.ptr7 = getelementptr inbounds half, half* %pOut.addr.028, i32 2
2751d799022SJinsong Ji  %dec = add nsw i32 %blkCnt.027, -1
2761d799022SJinsong Ji  %cmp = icmp eq i32 %dec, 0
2771d799022SJinsong Ji  br i1 %cmp, label %while.end, label %while.body
2781d799022SJinsong Ji
2791d799022SJinsong Jiwhile.end:                                        ; preds = %while.body, %entry
2801d799022SJinsong Ji  ret void
2811d799022SJinsong Ji}
282