1cee313d2SEric Christopher; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -dce -instcombine -S | FileCheck %s 2cee313d2SEric Christopher 3cee313d2SEric Christophertarget datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 4cee313d2SEric Christopher 5cee313d2SEric Christopher;CHECK-LABEL: @flags1( 6cee313d2SEric Christopher;CHECK: load <4 x i32> 7cee313d2SEric Christopher;CHECK: mul nsw <4 x i32> 8cee313d2SEric Christopher;CHECK: store <4 x i32> 9cee313d2SEric Christopher;CHECK: ret i32 10cee313d2SEric Christopherdefine i32 @flags1(i32 %n, i32* nocapture %A) nounwind uwtable ssp { 11cee313d2SEric Christopher %1 = icmp sgt i32 %n, 9 12cee313d2SEric Christopher br i1 %1, label %.lr.ph, label %._crit_edge 13cee313d2SEric Christopher 14cee313d2SEric Christopher.lr.ph: ; preds = %0, %.lr.ph 15cee313d2SEric Christopher %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ] 16cee313d2SEric Christopher %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 17cee313d2SEric Christopher %3 = load i32, i32* %2, align 4 18cee313d2SEric Christopher %4 = mul nsw i32 %3, 3 19cee313d2SEric Christopher store i32 %4, i32* %2, align 4 20cee313d2SEric Christopher %indvars.iv.next = add i64 %indvars.iv, 1 21cee313d2SEric Christopher %lftr.wideiv = trunc i64 %indvars.iv.next to i32 22cee313d2SEric Christopher %exitcond = icmp eq i32 %lftr.wideiv, %n 23cee313d2SEric Christopher br i1 %exitcond, label %._crit_edge, label %.lr.ph 24cee313d2SEric Christopher 25cee313d2SEric Christopher._crit_edge: ; preds = %.lr.ph, %0 26cee313d2SEric Christopher ret i32 undef 27cee313d2SEric Christopher} 28cee313d2SEric Christopher 29cee313d2SEric Christopher 30cee313d2SEric Christopher;CHECK-LABEL: @flags2( 31cee313d2SEric Christopher;CHECK: load <4 x i32> 32cee313d2SEric Christopher;CHECK: mul <4 x i32> 33cee313d2SEric Christopher;CHECK: store <4 x i32> 34cee313d2SEric Christopher;CHECK: ret i32 35cee313d2SEric Christopherdefine i32 @flags2(i32 %n, i32* nocapture %A) nounwind uwtable ssp { 36cee313d2SEric Christopher %1 = icmp sgt i32 %n, 9 37cee313d2SEric Christopher br i1 %1, label %.lr.ph, label %._crit_edge 38cee313d2SEric Christopher 39cee313d2SEric Christopher.lr.ph: ; preds = %0, %.lr.ph 40cee313d2SEric Christopher %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 9, %0 ] 41cee313d2SEric Christopher %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 42cee313d2SEric Christopher %3 = load i32, i32* %2, align 4 43cee313d2SEric Christopher %4 = mul i32 %3, 3 44cee313d2SEric Christopher store i32 %4, i32* %2, align 4 45cee313d2SEric Christopher %indvars.iv.next = add i64 %indvars.iv, 1 46cee313d2SEric Christopher %lftr.wideiv = trunc i64 %indvars.iv.next to i32 47cee313d2SEric Christopher %exitcond = icmp eq i32 %lftr.wideiv, %n 48cee313d2SEric Christopher br i1 %exitcond, label %._crit_edge, label %.lr.ph 49cee313d2SEric Christopher 50cee313d2SEric Christopher._crit_edge: ; preds = %.lr.ph, %0 51cee313d2SEric Christopher ret i32 undef 52cee313d2SEric Christopher} 53cee313d2SEric Christopher 54cee313d2SEric Christopher; Make sure we copy fast math flags and use them for the final reduction. 55cee313d2SEric Christopher; CHECK-LABEL: fast_math 56cee313d2SEric Christopher; CHECK: load <4 x float> 57cee313d2SEric Christopher; CHECK: fadd fast <4 x float> 58cee313d2SEric Christopher; CHECK: br 59*79b1b4a5SSanjay Patel; CHECK: call fast float @llvm.vector.reduce.fadd.v4f32 60cee313d2SEric Christopherdefine float @fast_math(float* noalias %s) { 61cee313d2SEric Christopherentry: 62cee313d2SEric Christopher br label %for.body 63cee313d2SEric Christopher 64cee313d2SEric Christopherfor.body: 65cee313d2SEric Christopher %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.body ] 66cee313d2SEric Christopher %q.04 = phi float [ 0.000000e+00, %entry ], [ %add, %for.body ] 67cee313d2SEric Christopher %arrayidx = getelementptr inbounds float, float* %s, i64 %indvars.iv 68cee313d2SEric Christopher %0 = load float, float* %arrayidx, align 4 69cee313d2SEric Christopher %add = fadd fast float %q.04, %0 70cee313d2SEric Christopher %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1 71cee313d2SEric Christopher %exitcond = icmp eq i64 %indvars.iv.next, 256 72cee313d2SEric Christopher br i1 %exitcond, label %for.end, label %for.body 73cee313d2SEric Christopher 74cee313d2SEric Christopherfor.end: 75cee313d2SEric Christopher %add.lcssa = phi float [ %add, %for.body ] 76cee313d2SEric Christopher ret float %add.lcssa 77cee313d2SEric Christopher} 78