1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -force-reduction-intrinsics -dce -instcombine -S | FileCheck %s 3 4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128" 5 6define i32 @reduction_sum(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp { 7; CHECK-LABEL: @reduction_sum( 8; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 9; CHECK-NEXT: br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[DOT_CRIT_EDGE:%.*]] 10; CHECK: .lr.ph.preheader: 11; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[N]], -1 12; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 13; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 14; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 3 15; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 16; CHECK: vector.ph: 17; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP4]], 8589934588 18; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 19; CHECK: vector.body: 20; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 21; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] 22; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] 23; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 24; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* 25; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 26; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] 27; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>* 28; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4 29; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND2]] 30; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP9]], [[WIDE_LOAD]] 31; CHECK-NEXT: [[TMP11]] = add <4 x i32> [[TMP10]], [[WIDE_LOAD1]] 32; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 33; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], <i32 4, i32 4, i32 4, i32 4> 34; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 35; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !0 36; CHECK: middle.block: 37; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP11]]) 38; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] 39; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] 40; CHECK: scalar.ph: 41; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] 42; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] 43; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 44; CHECK: .lr.ph: 45; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 46; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[TMP21:%.*]], [[DOTLR_PH]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 47; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] 48; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 49; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] 50; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 51; CHECK-NEXT: [[TMP18:%.*]] = trunc i64 [[INDVARS_IV]] to i32 52; CHECK-NEXT: [[TMP19:%.*]] = add i32 [[SUM_02]], [[TMP18]] 53; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], [[TMP15]] 54; CHECK-NEXT: [[TMP21]] = add i32 [[TMP20]], [[TMP17]] 55; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 56; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 57; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 58; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop !2 59; CHECK: ._crit_edge.loopexit: 60; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP21]], [[DOTLR_PH]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] 61; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] 62; CHECK: ._crit_edge: 63; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[DOTLCSSA]], [[DOT_CRIT_EDGE_LOOPEXIT]] ] 64; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] 65; 66 %1 = icmp sgt i32 %n, 0 67 br i1 %1, label %.lr.ph, label %._crit_edge 68 69.lr.ph: ; preds = %0, %.lr.ph 70 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] 71 %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ] 72 %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 73 %3 = load i32, i32* %2, align 4 74 %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv 75 %5 = load i32, i32* %4, align 4 76 %6 = trunc i64 %indvars.iv to i32 77 %7 = add i32 %sum.02, %6 78 %8 = add i32 %7, %3 79 %9 = add i32 %8, %5 80 %indvars.iv.next = add i64 %indvars.iv, 1 81 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 82 %exitcond = icmp eq i32 %lftr.wideiv, %n 83 br i1 %exitcond, label %._crit_edge, label %.lr.ph 84 85._crit_edge: ; preds = %.lr.ph, %0 86 %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ] 87 ret i32 %sum.0.lcssa 88} 89 90define i32 @reduction_prod(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp { 91; CHECK-LABEL: @reduction_prod( 92; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 93; CHECK-NEXT: br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[DOT_CRIT_EDGE:%.*]] 94; CHECK: .lr.ph.preheader: 95; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[N]], -1 96; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 97; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 98; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 3 99; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 100; CHECK: vector.ph: 101; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP4]], 8589934588 102; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 103; CHECK: vector.body: 104; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 105; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1, i32 1, i32 1, i32 1>, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] 106; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] 107; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 108; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* 109; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 110; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] 111; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>* 112; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4 113; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[VEC_PHI]], [[VEC_IND2]] 114; CHECK-NEXT: [[TMP10:%.*]] = mul <4 x i32> [[TMP9]], [[WIDE_LOAD]] 115; CHECK-NEXT: [[TMP11]] = mul <4 x i32> [[TMP10]], [[WIDE_LOAD1]] 116; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 117; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], <i32 4, i32 4, i32 4, i32 4> 118; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 119; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !4 120; CHECK: middle.block: 121; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> [[TMP11]]) 122; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] 123; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] 124; CHECK: scalar.ph: 125; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] 126; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 1, [[DOTLR_PH_PREHEADER]] ] 127; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 128; CHECK: .lr.ph: 129; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 130; CHECK-NEXT: [[PROD_02:%.*]] = phi i32 [ [[TMP21:%.*]], [[DOTLR_PH]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 131; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] 132; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 133; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] 134; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 135; CHECK-NEXT: [[TMP18:%.*]] = trunc i64 [[INDVARS_IV]] to i32 136; CHECK-NEXT: [[TMP19:%.*]] = mul i32 [[PROD_02]], [[TMP18]] 137; CHECK-NEXT: [[TMP20:%.*]] = mul i32 [[TMP19]], [[TMP15]] 138; CHECK-NEXT: [[TMP21]] = mul i32 [[TMP20]], [[TMP17]] 139; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 140; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 141; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 142; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop !5 143; CHECK: ._crit_edge.loopexit: 144; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP21]], [[DOTLR_PH]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] 145; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] 146; CHECK: ._crit_edge: 147; CHECK-NEXT: [[PROD_0_LCSSA:%.*]] = phi i32 [ 1, [[TMP0:%.*]] ], [ [[DOTLCSSA]], [[DOT_CRIT_EDGE_LOOPEXIT]] ] 148; CHECK-NEXT: ret i32 [[PROD_0_LCSSA]] 149; 150 %1 = icmp sgt i32 %n, 0 151 br i1 %1, label %.lr.ph, label %._crit_edge 152 153.lr.ph: ; preds = %0, %.lr.ph 154 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] 155 %prod.02 = phi i32 [ %9, %.lr.ph ], [ 1, %0 ] 156 %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 157 %3 = load i32, i32* %2, align 4 158 %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv 159 %5 = load i32, i32* %4, align 4 160 %6 = trunc i64 %indvars.iv to i32 161 %7 = mul i32 %prod.02, %6 162 %8 = mul i32 %7, %3 163 %9 = mul i32 %8, %5 164 %indvars.iv.next = add i64 %indvars.iv, 1 165 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 166 %exitcond = icmp eq i32 %lftr.wideiv, %n 167 br i1 %exitcond, label %._crit_edge, label %.lr.ph 168 169._crit_edge: ; preds = %.lr.ph, %0 170 %prod.0.lcssa = phi i32 [ 1, %0 ], [ %9, %.lr.ph ] 171 ret i32 %prod.0.lcssa 172} 173 174define i32 @reduction_mix(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp { 175; CHECK-LABEL: @reduction_mix( 176; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 177; CHECK-NEXT: br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[DOT_CRIT_EDGE:%.*]] 178; CHECK: .lr.ph.preheader: 179; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[N]], -1 180; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 181; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 182; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 3 183; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 184; CHECK: vector.ph: 185; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP4]], 8589934588 186; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 187; CHECK: vector.body: 188; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 189; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] 190; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] 191; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 192; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* 193; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 194; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] 195; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>* 196; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4 197; CHECK-NEXT: [[TMP9:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] 198; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND2]] 199; CHECK-NEXT: [[TMP11]] = add <4 x i32> [[TMP10]], [[TMP9]] 200; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 201; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], <i32 4, i32 4, i32 4, i32 4> 202; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 203; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !6 204; CHECK: middle.block: 205; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP11]]) 206; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] 207; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] 208; CHECK: scalar.ph: 209; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] 210; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] 211; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 212; CHECK: .lr.ph: 213; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 214; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[TMP21:%.*]], [[DOTLR_PH]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 215; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] 216; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 217; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] 218; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 219; CHECK-NEXT: [[TMP18:%.*]] = mul nsw i32 [[TMP17]], [[TMP15]] 220; CHECK-NEXT: [[TMP19:%.*]] = trunc i64 [[INDVARS_IV]] to i32 221; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[SUM_02]], [[TMP19]] 222; CHECK-NEXT: [[TMP21]] = add i32 [[TMP20]], [[TMP18]] 223; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 224; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 225; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 226; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop !7 227; CHECK: ._crit_edge.loopexit: 228; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP21]], [[DOTLR_PH]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] 229; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] 230; CHECK: ._crit_edge: 231; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[DOTLCSSA]], [[DOT_CRIT_EDGE_LOOPEXIT]] ] 232; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] 233; 234 %1 = icmp sgt i32 %n, 0 235 br i1 %1, label %.lr.ph, label %._crit_edge 236 237.lr.ph: ; preds = %0, %.lr.ph 238 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] 239 %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ] 240 %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 241 %3 = load i32, i32* %2, align 4 242 %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv 243 %5 = load i32, i32* %4, align 4 244 %6 = mul nsw i32 %5, %3 245 %7 = trunc i64 %indvars.iv to i32 246 %8 = add i32 %sum.02, %7 247 %9 = add i32 %8, %6 248 %indvars.iv.next = add i64 %indvars.iv, 1 249 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 250 %exitcond = icmp eq i32 %lftr.wideiv, %n 251 br i1 %exitcond, label %._crit_edge, label %.lr.ph 252 253._crit_edge: ; preds = %.lr.ph, %0 254 %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ] 255 ret i32 %sum.0.lcssa 256} 257 258define i32 @reduction_mul(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp { 259; CHECK-LABEL: @reduction_mul( 260; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 261; CHECK-NEXT: br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[DOT_CRIT_EDGE:%.*]] 262; CHECK: .lr.ph.preheader: 263; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[N]], -1 264; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 265; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 266; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 3 267; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 268; CHECK: vector.ph: 269; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP4]], 8589934588 270; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 271; CHECK: vector.body: 272; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 273; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 19, i32 1, i32 1, i32 1>, [[VECTOR_PH]] ], [ [[TMP10:%.*]], [[VECTOR_BODY]] ] 274; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 275; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* 276; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 277; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] 278; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>* 279; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4 280; CHECK-NEXT: [[TMP9:%.*]] = mul <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] 281; CHECK-NEXT: [[TMP10]] = mul <4 x i32> [[TMP9]], [[WIDE_LOAD1]] 282; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 283; CHECK-NEXT: [[TMP11:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 284; CHECK-NEXT: br i1 [[TMP11]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !8 285; CHECK: middle.block: 286; CHECK-NEXT: [[TMP12:%.*]] = call i32 @llvm.experimental.vector.reduce.mul.v4i32(<4 x i32> [[TMP10]]) 287; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] 288; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] 289; CHECK: scalar.ph: 290; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] 291; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP12]], [[MIDDLE_BLOCK]] ], [ 19, [[DOTLR_PH_PREHEADER]] ] 292; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 293; CHECK: .lr.ph: 294; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 295; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[TMP18:%.*]], [[DOTLR_PH]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 296; CHECK-NEXT: [[TMP13:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] 297; CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 298; CHECK-NEXT: [[TMP15:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] 299; CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 300; CHECK-NEXT: [[TMP17:%.*]] = mul i32 [[SUM_02]], [[TMP14]] 301; CHECK-NEXT: [[TMP18]] = mul i32 [[TMP17]], [[TMP16]] 302; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 303; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 304; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 305; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop !9 306; CHECK: ._crit_edge.loopexit: 307; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ [[TMP18]], [[DOTLR_PH]] ], [ [[TMP12]], [[MIDDLE_BLOCK]] ] 308; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] 309; CHECK: ._crit_edge: 310; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[DOTLCSSA]], [[DOT_CRIT_EDGE_LOOPEXIT]] ] 311; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] 312; 313 %1 = icmp sgt i32 %n, 0 314 br i1 %1, label %.lr.ph, label %._crit_edge 315 316.lr.ph: ; preds = %0, %.lr.ph 317 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] 318 %sum.02 = phi i32 [ %7, %.lr.ph ], [ 19, %0 ] 319 %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 320 %3 = load i32, i32* %2, align 4 321 %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv 322 %5 = load i32, i32* %4, align 4 323 %6 = mul i32 %sum.02, %3 324 %7 = mul i32 %6, %5 325 %indvars.iv.next = add i64 %indvars.iv, 1 326 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 327 %exitcond = icmp eq i32 %lftr.wideiv, %n 328 br i1 %exitcond, label %._crit_edge, label %.lr.ph 329 330._crit_edge: ; preds = %.lr.ph, %0 331 %sum.0.lcssa = phi i32 [ 0, %0 ], [ %7, %.lr.ph ] 332 ret i32 %sum.0.lcssa 333} 334 335define i32 @start_at_non_zero(i32* nocapture %in, i32* nocapture %coeff, i32* nocapture %out, i32 %n) nounwind uwtable readonly ssp { 336; CHECK-LABEL: @start_at_non_zero( 337; CHECK-NEXT: entry: 338; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0 339; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 340; CHECK: for.body.preheader: 341; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 342; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 343; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 344; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 345; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 346; CHECK: vector.ph: 347; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 348; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 349; CHECK: vector.body: 350; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 351; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 120, i32 0, i32 0, i32 0>, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] 352; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[IN:%.*]], i64 [[INDEX]] 353; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 354; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 355; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[COEFF:%.*]], i64 [[INDEX]] 356; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* 357; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 358; CHECK-NEXT: [[TMP7:%.*]] = mul nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] 359; CHECK-NEXT: [[TMP8]] = add <4 x i32> [[TMP7]], [[VEC_PHI]] 360; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 361; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 362; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !10 363; CHECK: middle.block: 364; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP8]]) 365; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 366; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 367; CHECK: scalar.ph: 368; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 369; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 120, [[FOR_BODY_PREHEADER]] ] 370; CHECK-NEXT: br label [[FOR_BODY:%.*]] 371; CHECK: for.body: 372; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 373; CHECK-NEXT: [[SUM_09:%.*]] = phi i32 [ [[ADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 374; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[IN]], i64 [[INDVARS_IV]] 375; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 376; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[COEFF]], i64 [[INDVARS_IV]] 377; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 378; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], [[TMP11]] 379; CHECK-NEXT: [[ADD]] = add nsw i32 [[MUL]], [[SUM_09]] 380; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 381; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 382; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 383; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !11 384; CHECK: for.end.loopexit: 385; CHECK-NEXT: [[ADD_LCSSA:%.*]] = phi i32 [ [[ADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] 386; CHECK-NEXT: br label [[FOR_END]] 387; CHECK: for.end: 388; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 120, [[ENTRY:%.*]] ], [ [[ADD_LCSSA]], [[FOR_END_LOOPEXIT]] ] 389; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] 390; 391entry: 392 %cmp7 = icmp sgt i32 %n, 0 393 br i1 %cmp7, label %for.body, label %for.end 394 395for.body: ; preds = %entry, %for.body 396 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 397 %sum.09 = phi i32 [ %add, %for.body ], [ 120, %entry ] 398 %arrayidx = getelementptr inbounds i32, i32* %in, i64 %indvars.iv 399 %0 = load i32, i32* %arrayidx, align 4 400 %arrayidx2 = getelementptr inbounds i32, i32* %coeff, i64 %indvars.iv 401 %1 = load i32, i32* %arrayidx2, align 4 402 %mul = mul nsw i32 %1, %0 403 %add = add nsw i32 %mul, %sum.09 404 %indvars.iv.next = add i64 %indvars.iv, 1 405 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 406 %exitcond = icmp eq i32 %lftr.wideiv, %n 407 br i1 %exitcond, label %for.end, label %for.body 408 409for.end: ; preds = %for.body, %entry 410 %sum.0.lcssa = phi i32 [ 120, %entry ], [ %add, %for.body ] 411 ret i32 %sum.0.lcssa 412} 413 414define i32 @reduction_and(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly { 415; CHECK-LABEL: @reduction_and( 416; CHECK-NEXT: entry: 417; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0 418; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 419; CHECK: for.body.preheader: 420; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 421; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 422; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 423; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 424; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 425; CHECK: vector.ph: 426; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 427; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 428; CHECK: vector.body: 429; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 430; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 -1, i32 -1, i32 -1, i32 -1>, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] 431; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 432; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 433; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 434; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] 435; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* 436; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 437; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] 438; CHECK-NEXT: [[TMP8]] = and <4 x i32> [[TMP7]], [[WIDE_LOAD1]] 439; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 440; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 441; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !12 442; CHECK: middle.block: 443; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.and.v4i32(<4 x i32> [[TMP8]]) 444; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 445; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 446; CHECK: scalar.ph: 447; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 448; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ -1, [[FOR_BODY_PREHEADER]] ] 449; CHECK-NEXT: br label [[FOR_BODY:%.*]] 450; CHECK: for.body: 451; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 452; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[AND:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 453; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] 454; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 455; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] 456; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 457; CHECK-NEXT: [[ADD:%.*]] = and i32 [[RESULT_08]], [[TMP11]] 458; CHECK-NEXT: [[AND]] = and i32 [[ADD]], [[TMP12]] 459; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 460; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 461; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 462; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !13 463; CHECK: for.end.loopexit: 464; CHECK-NEXT: [[AND_LCSSA:%.*]] = phi i32 [ [[AND]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] 465; CHECK-NEXT: br label [[FOR_END]] 466; CHECK: for.end: 467; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ -1, [[ENTRY:%.*]] ], [ [[AND_LCSSA]], [[FOR_END_LOOPEXIT]] ] 468; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] 469; 470entry: 471 %cmp7 = icmp sgt i32 %n, 0 472 br i1 %cmp7, label %for.body, label %for.end 473 474for.body: ; preds = %entry, %for.body 475 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 476 %result.08 = phi i32 [ %and, %for.body ], [ -1, %entry ] 477 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 478 %0 = load i32, i32* %arrayidx, align 4 479 %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv 480 %1 = load i32, i32* %arrayidx2, align 4 481 %add = and i32 %result.08, %0 482 %and = and i32 %add, %1 483 %indvars.iv.next = add i64 %indvars.iv, 1 484 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 485 %exitcond = icmp eq i32 %lftr.wideiv, %n 486 br i1 %exitcond, label %for.end, label %for.body 487 488for.end: ; preds = %for.body, %entry 489 %result.0.lcssa = phi i32 [ -1, %entry ], [ %and, %for.body ] 490 ret i32 %result.0.lcssa 491} 492 493define i32 @reduction_or(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly { 494; CHECK-LABEL: @reduction_or( 495; CHECK-NEXT: entry: 496; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0 497; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 498; CHECK: for.body.preheader: 499; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 500; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 501; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 502; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 503; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 504; CHECK: vector.ph: 505; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 506; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 507; CHECK: vector.body: 508; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 509; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] 510; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 511; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 512; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 513; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] 514; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* 515; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 516; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] 517; CHECK-NEXT: [[TMP8]] = or <4 x i32> [[TMP7]], [[VEC_PHI]] 518; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 519; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 520; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !14 521; CHECK: middle.block: 522; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.or.v4i32(<4 x i32> [[TMP8]]) 523; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 524; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 525; CHECK: scalar.ph: 526; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 527; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 528; CHECK-NEXT: br label [[FOR_BODY:%.*]] 529; CHECK: for.body: 530; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 531; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[OR:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 532; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] 533; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 534; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] 535; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 536; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] 537; CHECK-NEXT: [[OR]] = or i32 [[ADD]], [[RESULT_08]] 538; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 539; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 540; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 541; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !15 542; CHECK: for.end.loopexit: 543; CHECK-NEXT: [[OR_LCSSA:%.*]] = phi i32 [ [[OR]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] 544; CHECK-NEXT: br label [[FOR_END]] 545; CHECK: for.end: 546; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[OR_LCSSA]], [[FOR_END_LOOPEXIT]] ] 547; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] 548; 549entry: 550 %cmp7 = icmp sgt i32 %n, 0 551 br i1 %cmp7, label %for.body, label %for.end 552 553for.body: ; preds = %entry, %for.body 554 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 555 %result.08 = phi i32 [ %or, %for.body ], [ 0, %entry ] 556 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 557 %0 = load i32, i32* %arrayidx, align 4 558 %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv 559 %1 = load i32, i32* %arrayidx2, align 4 560 %add = add nsw i32 %1, %0 561 %or = or i32 %add, %result.08 562 %indvars.iv.next = add i64 %indvars.iv, 1 563 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 564 %exitcond = icmp eq i32 %lftr.wideiv, %n 565 br i1 %exitcond, label %for.end, label %for.body 566 567for.end: ; preds = %for.body, %entry 568 %result.0.lcssa = phi i32 [ 0, %entry ], [ %or, %for.body ] 569 ret i32 %result.0.lcssa 570} 571 572define i32 @reduction_xor(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly { 573; CHECK-LABEL: @reduction_xor( 574; CHECK-NEXT: entry: 575; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0 576; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 577; CHECK: for.body.preheader: 578; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 579; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 580; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 581; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 582; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 583; CHECK: vector.ph: 584; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 585; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 586; CHECK: vector.body: 587; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 588; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] 589; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 590; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 591; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 592; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] 593; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* 594; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 595; CHECK-NEXT: [[TMP7:%.*]] = add nsw <4 x i32> [[WIDE_LOAD1]], [[WIDE_LOAD]] 596; CHECK-NEXT: [[TMP8]] = xor <4 x i32> [[TMP7]], [[VEC_PHI]] 597; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 598; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 599; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !16 600; CHECK: middle.block: 601; CHECK-NEXT: [[TMP10:%.*]] = call i32 @llvm.experimental.vector.reduce.xor.v4i32(<4 x i32> [[TMP8]]) 602; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 603; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 604; CHECK: scalar.ph: 605; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 606; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 607; CHECK-NEXT: br label [[FOR_BODY:%.*]] 608; CHECK: for.body: 609; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 610; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[XOR:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 611; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] 612; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 613; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] 614; CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 615; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP11]] 616; CHECK-NEXT: [[XOR]] = xor i32 [[ADD]], [[RESULT_08]] 617; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 618; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 619; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 620; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !17 621; CHECK: for.end.loopexit: 622; CHECK-NEXT: [[XOR_LCSSA:%.*]] = phi i32 [ [[XOR]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] 623; CHECK-NEXT: br label [[FOR_END]] 624; CHECK: for.end: 625; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[XOR_LCSSA]], [[FOR_END_LOOPEXIT]] ] 626; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] 627; 628entry: 629 %cmp7 = icmp sgt i32 %n, 0 630 br i1 %cmp7, label %for.body, label %for.end 631 632for.body: ; preds = %entry, %for.body 633 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 634 %result.08 = phi i32 [ %xor, %for.body ], [ 0, %entry ] 635 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 636 %0 = load i32, i32* %arrayidx, align 4 637 %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv 638 %1 = load i32, i32* %arrayidx2, align 4 639 %add = add nsw i32 %1, %0 640 %xor = xor i32 %add, %result.08 641 %indvars.iv.next = add i64 %indvars.iv, 1 642 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 643 %exitcond = icmp eq i32 %lftr.wideiv, %n 644 br i1 %exitcond, label %for.end, label %for.body 645 646for.end: ; preds = %for.body, %entry 647 %result.0.lcssa = phi i32 [ 0, %entry ], [ %xor, %for.body ] 648 ret i32 %result.0.lcssa 649} 650 651define float @reduction_fadd(i32 %n, float* nocapture %A, float* nocapture %B) nounwind uwtable readonly { 652; CHECK-LABEL: @reduction_fadd( 653; CHECK-NEXT: entry: 654; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0 655; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 656; CHECK: for.body.preheader: 657; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 658; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 659; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 660; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 661; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 662; CHECK: vector.ph: 663; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 664; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 665; CHECK: vector.body: 666; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 667; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] 668; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 669; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[TMP3]] to <4 x float>* 670; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4 671; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]] 672; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP5]] to <4 x float>* 673; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4 674; CHECK-NEXT: [[TMP7:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] 675; CHECK-NEXT: [[TMP8]] = fadd fast <4 x float> [[TMP7]], [[WIDE_LOAD1]] 676; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 677; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 678; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !18 679; CHECK: middle.block: 680; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[TMP8]]) 681; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 682; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 683; CHECK: scalar.ph: 684; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 685; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ] 686; CHECK-NEXT: br label [[FOR_BODY:%.*]] 687; CHECK: for.body: 688; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 689; CHECK-NEXT: [[RESULT_08:%.*]] = phi float [ [[FADD:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 690; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 691; CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[ARRAYIDX]], align 4 692; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]] 693; CHECK-NEXT: [[TMP12:%.*]] = load float, float* [[ARRAYIDX2]], align 4 694; CHECK-NEXT: [[ADD:%.*]] = fadd fast float [[RESULT_08]], [[TMP11]] 695; CHECK-NEXT: [[FADD]] = fadd fast float [[ADD]], [[TMP12]] 696; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 697; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 698; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 699; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !19 700; CHECK: for.end.loopexit: 701; CHECK-NEXT: [[FADD_LCSSA:%.*]] = phi float [ [[FADD]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] 702; CHECK-NEXT: br label [[FOR_END]] 703; CHECK: for.end: 704; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[FADD_LCSSA]], [[FOR_END_LOOPEXIT]] ] 705; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] 706; 707entry: 708 %cmp7 = icmp sgt i32 %n, 0 709 br i1 %cmp7, label %for.body, label %for.end 710 711for.body: ; preds = %entry, %for.body 712 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 713 %result.08 = phi float [ %fadd, %for.body ], [ 0.0, %entry ] 714 %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv 715 %0 = load float, float* %arrayidx, align 4 716 %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv 717 %1 = load float, float* %arrayidx2, align 4 718 %add = fadd fast float %result.08, %0 719 %fadd = fadd fast float %add, %1 720 %indvars.iv.next = add i64 %indvars.iv, 1 721 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 722 %exitcond = icmp eq i32 %lftr.wideiv, %n 723 br i1 %exitcond, label %for.end, label %for.body 724 725for.end: ; preds = %for.body, %entry 726 %result.0.lcssa = phi float [ 0.0, %entry ], [ %fadd, %for.body ] 727 ret float %result.0.lcssa 728} 729 730define float @reduction_fmul(i32 %n, float* nocapture %A, float* nocapture %B) nounwind uwtable readonly { 731; CHECK-LABEL: @reduction_fmul( 732; CHECK-NEXT: entry: 733; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0 734; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 735; CHECK: for.body.preheader: 736; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 737; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 738; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 739; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 740; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 741; CHECK: vector.ph: 742; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 743; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 744; CHECK: vector.body: 745; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 746; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ <float 0.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, [[VECTOR_PH]] ], [ [[TMP8:%.*]], [[VECTOR_BODY]] ] 747; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 748; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[TMP3]] to <4 x float>* 749; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4 750; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]] 751; CHECK-NEXT: [[TMP6:%.*]] = bitcast float* [[TMP5]] to <4 x float>* 752; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP6]], align 4 753; CHECK-NEXT: [[TMP7:%.*]] = fmul fast <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] 754; CHECK-NEXT: [[TMP8]] = fmul fast <4 x float> [[TMP7]], [[WIDE_LOAD1]] 755; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 756; CHECK-NEXT: [[TMP9:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 757; CHECK-NEXT: br i1 [[TMP9]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !20 758; CHECK: middle.block: 759; CHECK-NEXT: [[TMP10:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fmul.f32.v4f32(float 1.000000e+00, <4 x float> [[TMP8]]) 760; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 761; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 762; CHECK: scalar.ph: 763; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 764; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP10]], [[MIDDLE_BLOCK]] ], [ 0.000000e+00, [[FOR_BODY_PREHEADER]] ] 765; CHECK-NEXT: br label [[FOR_BODY:%.*]] 766; CHECK: for.body: 767; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 768; CHECK-NEXT: [[RESULT_08:%.*]] = phi float [ [[FMUL:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 769; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS_IV]] 770; CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[ARRAYIDX]], align 4 771; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[B]], i64 [[INDVARS_IV]] 772; CHECK-NEXT: [[TMP12:%.*]] = load float, float* [[ARRAYIDX2]], align 4 773; CHECK-NEXT: [[ADD:%.*]] = fmul fast float [[RESULT_08]], [[TMP11]] 774; CHECK-NEXT: [[FMUL]] = fmul fast float [[ADD]], [[TMP12]] 775; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 776; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 777; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 778; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !21 779; CHECK: for.end.loopexit: 780; CHECK-NEXT: [[FMUL_LCSSA:%.*]] = phi float [ [[FMUL]], [[FOR_BODY]] ], [ [[TMP10]], [[MIDDLE_BLOCK]] ] 781; CHECK-NEXT: br label [[FOR_END]] 782; CHECK: for.end: 783; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi float [ 0.000000e+00, [[ENTRY:%.*]] ], [ [[FMUL_LCSSA]], [[FOR_END_LOOPEXIT]] ] 784; CHECK-NEXT: ret float [[RESULT_0_LCSSA]] 785; 786entry: 787 %cmp7 = icmp sgt i32 %n, 0 788 br i1 %cmp7, label %for.body, label %for.end 789 790for.body: ; preds = %entry, %for.body 791 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 792 %result.08 = phi float [ %fmul, %for.body ], [ 0.0, %entry ] 793 %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv 794 %0 = load float, float* %arrayidx, align 4 795 %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv 796 %1 = load float, float* %arrayidx2, align 4 797 %add = fmul fast float %result.08, %0 798 %fmul = fmul fast float %add, %1 799 %indvars.iv.next = add i64 %indvars.iv, 1 800 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 801 %exitcond = icmp eq i32 %lftr.wideiv, %n 802 br i1 %exitcond, label %for.end, label %for.body 803 804for.end: ; preds = %for.body, %entry 805 %result.0.lcssa = phi float [ 0.0, %entry ], [ %fmul, %for.body ] 806 ret float %result.0.lcssa 807} 808 809define i32 @reduction_min(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly { 810; CHECK-LABEL: @reduction_min( 811; CHECK-NEXT: entry: 812; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0 813; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 814; CHECK: for.body.preheader: 815; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 816; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 817; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 818; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 819; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 820; CHECK: vector.ph: 821; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 822; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 823; CHECK: vector.body: 824; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 825; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1000, i32 1000, i32 1000, i32 1000>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] 826; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 827; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 828; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 829; CHECK-NEXT: [[TMP5:%.*]] = icmp slt <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] 830; CHECK-NEXT: [[TMP6]] = select <4 x i1> [[TMP5]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] 831; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 832; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 833; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !22 834; CHECK: middle.block: 835; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.smin.v4i32(<4 x i32> [[TMP6]]) 836; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 837; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 838; CHECK: scalar.ph: 839; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 840; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 1000, [[FOR_BODY_PREHEADER]] ] 841; CHECK-NEXT: br label [[FOR_BODY:%.*]] 842; CHECK: for.body: 843; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 844; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[V0:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 845; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] 846; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 847; CHECK-NEXT: [[C0:%.*]] = icmp slt i32 [[RESULT_08]], [[TMP9]] 848; CHECK-NEXT: [[V0]] = select i1 [[C0]], i32 [[RESULT_08]], i32 [[TMP9]] 849; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 850; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 851; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 852; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !23 853; CHECK: for.end.loopexit: 854; CHECK-NEXT: [[V0_LCSSA:%.*]] = phi i32 [ [[V0]], [[FOR_BODY]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] 855; CHECK-NEXT: br label [[FOR_END]] 856; CHECK: for.end: 857; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[V0_LCSSA]], [[FOR_END_LOOPEXIT]] ] 858; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] 859; 860entry: 861 %cmp7 = icmp sgt i32 %n, 0 862 br i1 %cmp7, label %for.body, label %for.end 863 864for.body: ; preds = %entry, %for.body 865 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 866 %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ] 867 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 868 %0 = load i32, i32* %arrayidx, align 4 869 %c0 = icmp slt i32 %result.08, %0 870 %v0 = select i1 %c0, i32 %result.08, i32 %0 871 %indvars.iv.next = add i64 %indvars.iv, 1 872 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 873 %exitcond = icmp eq i32 %lftr.wideiv, %n 874 br i1 %exitcond, label %for.end, label %for.body 875 876for.end: ; preds = %for.body, %entry 877 %result.0.lcssa = phi i32 [ 0, %entry ], [ %v0, %for.body ] 878 ret i32 %result.0.lcssa 879} 880 881define i32 @reduction_max(i32 %n, i32* nocapture %A, i32* nocapture %B) nounwind uwtable readonly { 882; CHECK-LABEL: @reduction_max( 883; CHECK-NEXT: entry: 884; CHECK-NEXT: [[CMP7:%.*]] = icmp sgt i32 [[N:%.*]], 0 885; CHECK-NEXT: br i1 [[CMP7]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 886; CHECK: for.body.preheader: 887; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 888; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 889; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 890; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 891; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 892; CHECK: vector.ph: 893; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 894; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 895; CHECK: vector.body: 896; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 897; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ <i32 1000, i32 1000, i32 1000, i32 1000>, [[VECTOR_PH]] ], [ [[TMP6:%.*]], [[VECTOR_BODY]] ] 898; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 899; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 900; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 901; CHECK-NEXT: [[TMP5:%.*]] = icmp ugt <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] 902; CHECK-NEXT: [[TMP6]] = select <4 x i1> [[TMP5]], <4 x i32> [[VEC_PHI]], <4 x i32> [[WIDE_LOAD]] 903; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 904; CHECK-NEXT: [[TMP7:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 905; CHECK-NEXT: br i1 [[TMP7]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !24 906; CHECK: middle.block: 907; CHECK-NEXT: [[TMP8:%.*]] = call i32 @llvm.experimental.vector.reduce.umax.v4i32(<4 x i32> [[TMP6]]) 908; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 909; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 910; CHECK: scalar.ph: 911; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 912; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP8]], [[MIDDLE_BLOCK]] ], [ 1000, [[FOR_BODY_PREHEADER]] ] 913; CHECK-NEXT: br label [[FOR_BODY:%.*]] 914; CHECK: for.body: 915; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 916; CHECK-NEXT: [[RESULT_08:%.*]] = phi i32 [ [[V0:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 917; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] 918; CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 919; CHECK-NEXT: [[C0:%.*]] = icmp ugt i32 [[RESULT_08]], [[TMP9]] 920; CHECK-NEXT: [[V0]] = select i1 [[C0]], i32 [[RESULT_08]], i32 [[TMP9]] 921; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 922; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 923; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 924; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !25 925; CHECK: for.end.loopexit: 926; CHECK-NEXT: [[V0_LCSSA:%.*]] = phi i32 [ [[V0]], [[FOR_BODY]] ], [ [[TMP8]], [[MIDDLE_BLOCK]] ] 927; CHECK-NEXT: br label [[FOR_END]] 928; CHECK: for.end: 929; CHECK-NEXT: [[RESULT_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[V0_LCSSA]], [[FOR_END_LOOPEXIT]] ] 930; CHECK-NEXT: ret i32 [[RESULT_0_LCSSA]] 931; 932entry: 933 %cmp7 = icmp sgt i32 %n, 0 934 br i1 %cmp7, label %for.body, label %for.end 935 936for.body: ; preds = %entry, %for.body 937 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 938 %result.08 = phi i32 [ %v0, %for.body ], [ 1000, %entry ] 939 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 940 %0 = load i32, i32* %arrayidx, align 4 941 %c0 = icmp ugt i32 %result.08, %0 942 %v0 = select i1 %c0, i32 %result.08, i32 %0 943 %indvars.iv.next = add i64 %indvars.iv, 1 944 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 945 %exitcond = icmp eq i32 %lftr.wideiv, %n 946 br i1 %exitcond, label %for.end, label %for.body 947 948for.end: ; preds = %for.body, %entry 949 %result.0.lcssa = phi i32 [ 0, %entry ], [ %v0, %for.body ] 950 ret i32 %result.0.lcssa 951} 952 953; Sub we can create a reduction, but not inloop 954define i32 @reduction_sub_lhs(i32 %n, i32* noalias nocapture %A) nounwind uwtable readonly { 955; CHECK-LABEL: @reduction_sub_lhs( 956; CHECK-NEXT: entry: 957; CHECK-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[N:%.*]], 0 958; CHECK-NEXT: br i1 [[CMP4]], label [[FOR_BODY_PREHEADER:%.*]], label [[FOR_END:%.*]] 959; CHECK: for.body.preheader: 960; CHECK-NEXT: [[TMP0:%.*]] = add i32 [[N]], -1 961; CHECK-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64 962; CHECK-NEXT: [[TMP2:%.*]] = add nuw nsw i64 [[TMP1]], 1 963; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP0]], 3 964; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 965; CHECK: vector.ph: 966; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP2]], 8589934588 967; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 968; CHECK: vector.body: 969; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 970; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] 971; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 972; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 973; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 974; CHECK-NEXT: [[TMP5]] = sub <4 x i32> [[VEC_PHI]], [[WIDE_LOAD]] 975; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 976; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 977; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !26 978; CHECK: middle.block: 979; CHECK-NEXT: [[TMP7:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP5]]) 980; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP2]], [[N_VEC]] 981; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END_LOOPEXIT:%.*]], label [[SCALAR_PH]] 982; CHECK: scalar.ph: 983; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 984; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP7]], [[MIDDLE_BLOCK]] ], [ 0, [[FOR_BODY_PREHEADER]] ] 985; CHECK-NEXT: br label [[FOR_BODY:%.*]] 986; CHECK: for.body: 987; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 988; CHECK-NEXT: [[X_05:%.*]] = phi i32 [ [[SUB:%.*]], [[FOR_BODY]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 989; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] 990; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 991; CHECK-NEXT: [[SUB]] = sub nsw i32 [[X_05]], [[TMP8]] 992; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 993; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 994; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 995; CHECK-NEXT: br i1 [[EXITCOND]], label [[FOR_END_LOOPEXIT]], label [[FOR_BODY]], !llvm.loop !27 996; CHECK: for.end.loopexit: 997; CHECK-NEXT: [[SUB_LCSSA:%.*]] = phi i32 [ [[SUB]], [[FOR_BODY]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 998; CHECK-NEXT: br label [[FOR_END]] 999; CHECK: for.end: 1000; CHECK-NEXT: [[X_0_LCSSA:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[SUB_LCSSA]], [[FOR_END_LOOPEXIT]] ] 1001; CHECK-NEXT: ret i32 [[X_0_LCSSA]] 1002; 1003entry: 1004 %cmp4 = icmp sgt i32 %n, 0 1005 br i1 %cmp4, label %for.body, label %for.end 1006 1007for.body: ; preds = %entry, %for.body 1008 %indvars.iv = phi i64 [ %indvars.iv.next, %for.body ], [ 0, %entry ] 1009 %x.05 = phi i32 [ %sub, %for.body ], [ 0, %entry ] 1010 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 1011 %0 = load i32, i32* %arrayidx, align 4 1012 %sub = sub nsw i32 %x.05, %0 1013 %indvars.iv.next = add i64 %indvars.iv, 1 1014 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 1015 %exitcond = icmp eq i32 %lftr.wideiv, %n 1016 br i1 %exitcond, label %for.end, label %for.body 1017 1018for.end: ; preds = %for.body, %entry 1019 %x.0.lcssa = phi i32 [ 0, %entry ], [ %sub, %for.body ] 1020 ret i32 %x.0.lcssa 1021} 1022 1023; Conditional reductions with multi-input phis. 1024define float @reduction_conditional(float* %A, float* %B, float* %C, float %S) { 1025; CHECK-LABEL: @reduction_conditional( 1026; CHECK-NEXT: entry: 1027; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1028; CHECK: vector.ph: 1029; CHECK-NEXT: [[TMP0:%.*]] = insertelement <4 x float> <float undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00>, float [[S:%.*]], i32 0 1030; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1031; CHECK: vector.body: 1032; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1033; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ [[TMP0]], [[VECTOR_PH]] ], [ [[PREDPHI3:%.*]], [[VECTOR_BODY]] ] 1034; CHECK-NEXT: [[TMP1:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 1035; CHECK-NEXT: [[TMP2:%.*]] = bitcast float* [[TMP1]] to <4 x float>* 1036; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP2]], align 4 1037; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds float, float* [[B:%.*]], i64 [[INDEX]] 1038; CHECK-NEXT: [[TMP4:%.*]] = bitcast float* [[TMP3]] to <4 x float>* 1039; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x float>, <4 x float>* [[TMP4]], align 4 1040; CHECK-NEXT: [[TMP5:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], [[WIDE_LOAD1]] 1041; CHECK-NEXT: [[TMP6:%.*]] = fcmp ule <4 x float> [[WIDE_LOAD1]], <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00> 1042; CHECK-NEXT: [[TMP7:%.*]] = fcmp ogt <4 x float> [[WIDE_LOAD]], <float 2.000000e+00, float 2.000000e+00, float 2.000000e+00, float 2.000000e+00> 1043; CHECK-NEXT: [[TMP8:%.*]] = and <4 x i1> [[TMP6]], [[TMP5]] 1044; CHECK-NEXT: [[TMP9:%.*]] = and <4 x i1> [[TMP7]], [[TMP8]] 1045; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true> 1046; CHECK-NEXT: [[TMP11:%.*]] = and <4 x i1> [[TMP8]], [[TMP10]] 1047; CHECK-NEXT: [[TMP12:%.*]] = xor <4 x i1> [[TMP5]], <i1 true, i1 true, i1 true, i1 true> 1048; CHECK-NEXT: [[PREDPHI_V:%.*]] = select <4 x i1> [[TMP9]], <4 x float> [[WIDE_LOAD1]], <4 x float> [[WIDE_LOAD]] 1049; CHECK-NEXT: [[PREDPHI:%.*]] = fadd fast <4 x float> [[VEC_PHI]], [[PREDPHI_V]] 1050; CHECK-NEXT: [[TMP13:%.*]] = or <4 x i1> [[TMP11]], [[TMP12]] 1051; CHECK-NEXT: [[PREDPHI3]] = select <4 x i1> [[TMP13]], <4 x float> [[VEC_PHI]], <4 x float> [[PREDPHI]] 1052; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 1053; CHECK-NEXT: [[TMP14:%.*]] = icmp eq i64 [[INDEX_NEXT]], 128 1054; CHECK-NEXT: br i1 [[TMP14]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !28 1055; CHECK: middle.block: 1056; CHECK-NEXT: [[TMP15:%.*]] = call fast float @llvm.experimental.vector.reduce.v2.fadd.f32.v4f32(float 0.000000e+00, <4 x float> [[PREDPHI3]]) 1057; CHECK-NEXT: br i1 true, label [[FOR_END:%.*]], label [[SCALAR_PH]] 1058; CHECK: scalar.ph: 1059; CHECK-NEXT: br label [[FOR_BODY:%.*]] 1060; CHECK: for.body: 1061; CHECK-NEXT: br i1 undef, label [[IF_THEN:%.*]], label [[FOR_INC:%.*]] 1062; CHECK: if.then: 1063; CHECK-NEXT: br i1 undef, label [[IF_THEN8:%.*]], label [[IF_ELSE:%.*]] 1064; CHECK: if.then8: 1065; CHECK-NEXT: br label [[FOR_INC]] 1066; CHECK: if.else: 1067; CHECK-NEXT: br i1 undef, label [[IF_THEN16:%.*]], label [[FOR_INC]] 1068; CHECK: if.then16: 1069; CHECK-NEXT: br label [[FOR_INC]] 1070; CHECK: for.inc: 1071; CHECK-NEXT: br i1 undef, label [[FOR_BODY]], label [[FOR_END]], !llvm.loop !29 1072; CHECK: for.end: 1073; CHECK-NEXT: [[SUM_1_LCSSA:%.*]] = phi float [ undef, [[FOR_INC]] ], [ [[TMP15]], [[MIDDLE_BLOCK]] ] 1074; CHECK-NEXT: ret float [[SUM_1_LCSSA]] 1075; 1076entry: 1077 br label %for.body 1078 1079for.body: 1080 %indvars.iv = phi i64 [ 0, %entry ], [ %indvars.iv.next, %for.inc ] 1081 %sum.033 = phi float [ %S, %entry ], [ %sum.1, %for.inc ] 1082 %arrayidx = getelementptr inbounds float, float* %A, i64 %indvars.iv 1083 %0 = load float, float* %arrayidx, align 4 1084 %arrayidx2 = getelementptr inbounds float, float* %B, i64 %indvars.iv 1085 %1 = load float, float* %arrayidx2, align 4 1086 %cmp3 = fcmp ogt float %0, %1 1087 br i1 %cmp3, label %if.then, label %for.inc 1088 1089if.then: 1090 %cmp6 = fcmp ogt float %1, 1.000000e+00 1091 br i1 %cmp6, label %if.then8, label %if.else 1092 1093if.then8: 1094 %add = fadd fast float %sum.033, %0 1095 br label %for.inc 1096 1097if.else: 1098 %cmp14 = fcmp ogt float %0, 2.000000e+00 1099 br i1 %cmp14, label %if.then16, label %for.inc 1100 1101if.then16: 1102 %add19 = fadd fast float %sum.033, %1 1103 br label %for.inc 1104 1105for.inc: 1106 %sum.1 = phi float [ %add, %if.then8 ], [ %add19, %if.then16 ], [ %sum.033, %if.else ], [ %sum.033, %for.body ] 1107 %indvars.iv.next = add i64 %indvars.iv, 1 1108 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 1109 %exitcond = icmp ne i32 %lftr.wideiv, 128 1110 br i1 %exitcond, label %for.body, label %for.end 1111 1112for.end: 1113 %sum.1.lcssa = phi float [ %sum.1, %for.inc ] 1114 ret float %sum.1.lcssa 1115} 1116 1117define i32 @reduction_sum_multiuse(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) { 1118; CHECK-LABEL: @reduction_sum_multiuse( 1119; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 1120; CHECK-NEXT: br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[END:%.*]] 1121; CHECK: .lr.ph.preheader: 1122; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[N]], -1 1123; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 1124; CHECK-NEXT: [[TMP4:%.*]] = add nuw nsw i64 [[TMP3]], 1 1125; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i32 [[TMP2]], 3 1126; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1127; CHECK: vector.ph: 1128; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[TMP4]], 8589934588 1129; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1130; CHECK: vector.body: 1131; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 1132; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP11:%.*]], [[VECTOR_BODY]] ] 1133; CHECK-NEXT: [[VEC_IND2:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT3:%.*]], [[VECTOR_BODY]] ] 1134; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 1135; CHECK-NEXT: [[TMP6:%.*]] = bitcast i32* [[TMP5]] to <4 x i32>* 1136; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP6]], align 4 1137; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] 1138; CHECK-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to <4 x i32>* 1139; CHECK-NEXT: [[WIDE_LOAD1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP8]], align 4 1140; CHECK-NEXT: [[TMP9:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND2]] 1141; CHECK-NEXT: [[TMP10:%.*]] = add <4 x i32> [[TMP9]], [[WIDE_LOAD]] 1142; CHECK-NEXT: [[TMP11]] = add <4 x i32> [[TMP10]], [[WIDE_LOAD1]] 1143; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 1144; CHECK-NEXT: [[VEC_IND_NEXT3]] = add <4 x i32> [[VEC_IND2]], <i32 4, i32 4, i32 4, i32 4> 1145; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1146; CHECK-NEXT: br i1 [[TMP12]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !30 1147; CHECK: middle.block: 1148; CHECK-NEXT: [[TMP13:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP11]]) 1149; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[TMP4]], [[N_VEC]] 1150; CHECK-NEXT: br i1 [[CMP_N]], label [[DOT_CRIT_EDGE:%.*]], label [[SCALAR_PH]] 1151; CHECK: scalar.ph: 1152; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] 1153; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP13]], [[MIDDLE_BLOCK]] ], [ 0, [[DOTLR_PH_PREHEADER]] ] 1154; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 1155; CHECK: .lr.ph: 1156; CHECK-NEXT: [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[DOTLR_PH]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 1157; CHECK-NEXT: [[SUM_02:%.*]] = phi i32 [ [[TMP21:%.*]], [[DOTLR_PH]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 1158; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[INDVARS_IV]] 1159; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 1160; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[INDVARS_IV]] 1161; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 1162; CHECK-NEXT: [[TMP18:%.*]] = trunc i64 [[INDVARS_IV]] to i32 1163; CHECK-NEXT: [[TMP19:%.*]] = add i32 [[SUM_02]], [[TMP18]] 1164; CHECK-NEXT: [[TMP20:%.*]] = add i32 [[TMP19]], [[TMP15]] 1165; CHECK-NEXT: [[TMP21]] = add i32 [[TMP20]], [[TMP17]] 1166; CHECK-NEXT: [[INDVARS_IV_NEXT]] = add i64 [[INDVARS_IV]], 1 1167; CHECK-NEXT: [[LFTR_WIDEIV:%.*]] = trunc i64 [[INDVARS_IV_NEXT]] to i32 1168; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i32 [[LFTR_WIDEIV]], [[N]] 1169; CHECK-NEXT: br i1 [[EXITCOND]], label [[DOT_CRIT_EDGE]], label [[DOTLR_PH]], !llvm.loop !31 1170; CHECK: ._crit_edge: 1171; CHECK-NEXT: [[SUM_LCSSA:%.*]] = phi i32 [ [[TMP21]], [[DOTLR_PH]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] 1172; CHECK-NEXT: [[SUM_COPY:%.*]] = phi i32 [ [[TMP21]], [[DOTLR_PH]] ], [ [[TMP13]], [[MIDDLE_BLOCK]] ] 1173; CHECK-NEXT: br label [[END]] 1174; CHECK: end: 1175; CHECK-NEXT: [[F1:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[SUM_LCSSA]], [[DOT_CRIT_EDGE]] ] 1176; CHECK-NEXT: [[F2:%.*]] = phi i32 [ 0, [[TMP0]] ], [ [[SUM_COPY]], [[DOT_CRIT_EDGE]] ] 1177; CHECK-NEXT: [[FINAL:%.*]] = add i32 [[F1]], [[F2]] 1178; CHECK-NEXT: ret i32 [[FINAL]] 1179; 1180 %1 = icmp sgt i32 %n, 0 1181 br i1 %1, label %.lr.ph.preheader, label %end 1182.lr.ph.preheader: ; preds = %0 1183 br label %.lr.ph 1184 1185.lr.ph: ; preds = %0, %.lr.ph 1186 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %.lr.ph.preheader ] 1187 %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %.lr.ph.preheader ] 1188 %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 1189 %3 = load i32, i32* %2, align 4 1190 %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv 1191 %5 = load i32, i32* %4, align 4 1192 %6 = trunc i64 %indvars.iv to i32 1193 %7 = add i32 %sum.02, %6 1194 %8 = add i32 %7, %3 1195 %9 = add i32 %8, %5 1196 %indvars.iv.next = add i64 %indvars.iv, 1 1197 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 1198 %exitcond = icmp eq i32 %lftr.wideiv, %n 1199 br i1 %exitcond, label %._crit_edge, label %.lr.ph 1200 1201._crit_edge: ; preds = %.lr.ph, %0 1202 %sum.lcssa = phi i32 [ %9, %.lr.ph ] 1203 %sum.copy = phi i32 [ %9, %.lr.ph ] 1204 br label %end 1205 1206end: 1207 %f1 = phi i32 [ 0, %0 ], [ %sum.lcssa, %._crit_edge ] 1208 %f2 = phi i32 [ 0, %0 ], [ %sum.copy, %._crit_edge ] 1209 %final = add i32 %f1, %f2 1210 ret i32 %final 1211} 1212 1213; Predicated loop, cannot (yet) use in-loop reductions. 1214define i32 @reduction_predicated(i32 %n, i32* noalias nocapture %A, i32* noalias nocapture %B) nounwind uwtable readonly noinline ssp { 1215; CHECK-LABEL: @reduction_predicated( 1216; CHECK-NEXT: [[TMP1:%.*]] = icmp sgt i32 [[N:%.*]], 0 1217; CHECK-NEXT: br i1 [[TMP1]], label [[DOTLR_PH_PREHEADER:%.*]], label [[DOT_CRIT_EDGE:%.*]] 1218; CHECK: .lr.ph.preheader: 1219; CHECK-NEXT: [[TMP2:%.*]] = add i32 [[N]], -1 1220; CHECK-NEXT: [[TMP3:%.*]] = zext i32 [[TMP2]] to i64 1221; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 1222; CHECK: vector.ph: 1223; CHECK-NEXT: [[N_RND_UP:%.*]] = add nuw nsw i64 [[TMP3]], 4 1224; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N_RND_UP]], 8589934588 1225; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i64> undef, i64 [[TMP3]], i32 0 1226; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i64> [[BROADCAST_SPLATINSERT]], <4 x i64> undef, <4 x i32> zeroinitializer 1227; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 1228; CHECK: vector.body: 1229; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] 1230; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT:%.*]], [[PRED_LOAD_CONTINUE14]] ] 1231; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x i32> [ zeroinitializer, [[VECTOR_PH]] ], [ [[TMP50:%.*]], [[PRED_LOAD_CONTINUE14]] ] 1232; CHECK-NEXT: [[VEC_IND15:%.*]] = phi <4 x i32> [ <i32 0, i32 1, i32 2, i32 3>, [[VECTOR_PH]] ], [ [[VEC_IND_NEXT16:%.*]], [[PRED_LOAD_CONTINUE14]] ] 1233; CHECK-NEXT: [[TMP4:%.*]] = or i64 [[INDEX]], 1 1234; CHECK-NEXT: [[TMP5:%.*]] = or i64 [[INDEX]], 2 1235; CHECK-NEXT: [[TMP6:%.*]] = or i64 [[INDEX]], 3 1236; CHECK-NEXT: [[TMP7:%.*]] = icmp ule <4 x i64> [[VEC_IND]], [[BROADCAST_SPLAT]] 1237; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0 1238; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 1239; CHECK: pred.load.if: 1240; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 1241; CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[TMP9]], align 4 1242; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x i32> undef, i32 [[TMP10]], i32 0 1243; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 1244; CHECK: pred.load.continue: 1245; CHECK-NEXT: [[TMP12:%.*]] = phi <4 x i32> [ undef, [[VECTOR_BODY]] ], [ [[TMP11]], [[PRED_LOAD_IF]] ] 1246; CHECK-NEXT: [[TMP13:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1 1247; CHECK-NEXT: br i1 [[TMP13]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 1248; CHECK: pred.load.if1: 1249; CHECK-NEXT: [[TMP14:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP4]] 1250; CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP14]], align 4 1251; CHECK-NEXT: [[TMP16:%.*]] = insertelement <4 x i32> [[TMP12]], i32 [[TMP15]], i32 1 1252; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 1253; CHECK: pred.load.continue2: 1254; CHECK-NEXT: [[TMP17:%.*]] = phi <4 x i32> [ [[TMP12]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP16]], [[PRED_LOAD_IF1]] ] 1255; CHECK-NEXT: [[TMP18:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2 1256; CHECK-NEXT: br i1 [[TMP18]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 1257; CHECK: pred.load.if3: 1258; CHECK-NEXT: [[TMP19:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP5]] 1259; CHECK-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 1260; CHECK-NEXT: [[TMP21:%.*]] = insertelement <4 x i32> [[TMP17]], i32 [[TMP20]], i32 2 1261; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 1262; CHECK: pred.load.continue4: 1263; CHECK-NEXT: [[TMP22:%.*]] = phi <4 x i32> [ [[TMP17]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP21]], [[PRED_LOAD_IF3]] ] 1264; CHECK-NEXT: [[TMP23:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3 1265; CHECK-NEXT: br i1 [[TMP23]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] 1266; CHECK: pred.load.if5: 1267; CHECK-NEXT: [[TMP24:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP6]] 1268; CHECK-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 1269; CHECK-NEXT: [[TMP26:%.*]] = insertelement <4 x i32> [[TMP22]], i32 [[TMP25]], i32 3 1270; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 1271; CHECK: pred.load.continue6: 1272; CHECK-NEXT: [[TMP27:%.*]] = phi <4 x i32> [ [[TMP22]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP26]], [[PRED_LOAD_IF5]] ] 1273; CHECK-NEXT: [[TMP28:%.*]] = extractelement <4 x i1> [[TMP7]], i32 0 1274; CHECK-NEXT: br i1 [[TMP28]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] 1275; CHECK: pred.load.if7: 1276; CHECK-NEXT: [[TMP29:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] 1277; CHECK-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 1278; CHECK-NEXT: [[TMP31:%.*]] = insertelement <4 x i32> undef, i32 [[TMP30]], i32 0 1279; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]] 1280; CHECK: pred.load.continue8: 1281; CHECK-NEXT: [[TMP32:%.*]] = phi <4 x i32> [ undef, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP31]], [[PRED_LOAD_IF7]] ] 1282; CHECK-NEXT: [[TMP33:%.*]] = extractelement <4 x i1> [[TMP7]], i32 1 1283; CHECK-NEXT: br i1 [[TMP33]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] 1284; CHECK: pred.load.if9: 1285; CHECK-NEXT: [[TMP34:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP4]] 1286; CHECK-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 1287; CHECK-NEXT: [[TMP36:%.*]] = insertelement <4 x i32> [[TMP32]], i32 [[TMP35]], i32 1 1288; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE10]] 1289; CHECK: pred.load.continue10: 1290; CHECK-NEXT: [[TMP37:%.*]] = phi <4 x i32> [ [[TMP32]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP36]], [[PRED_LOAD_IF9]] ] 1291; CHECK-NEXT: [[TMP38:%.*]] = extractelement <4 x i1> [[TMP7]], i32 2 1292; CHECK-NEXT: br i1 [[TMP38]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] 1293; CHECK: pred.load.if11: 1294; CHECK-NEXT: [[TMP39:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP5]] 1295; CHECK-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 1296; CHECK-NEXT: [[TMP41:%.*]] = insertelement <4 x i32> [[TMP37]], i32 [[TMP40]], i32 2 1297; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE12]] 1298; CHECK: pred.load.continue12: 1299; CHECK-NEXT: [[TMP42:%.*]] = phi <4 x i32> [ [[TMP37]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP41]], [[PRED_LOAD_IF11]] ] 1300; CHECK-NEXT: [[TMP43:%.*]] = extractelement <4 x i1> [[TMP7]], i32 3 1301; CHECK-NEXT: br i1 [[TMP43]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]] 1302; CHECK: pred.load.if13: 1303; CHECK-NEXT: [[TMP44:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP6]] 1304; CHECK-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 1305; CHECK-NEXT: [[TMP46:%.*]] = insertelement <4 x i32> [[TMP42]], i32 [[TMP45]], i32 3 1306; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]] 1307; CHECK: pred.load.continue14: 1308; CHECK-NEXT: [[TMP47:%.*]] = phi <4 x i32> [ [[TMP42]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP46]], [[PRED_LOAD_IF13]] ] 1309; CHECK-NEXT: [[TMP48:%.*]] = add <4 x i32> [[VEC_PHI]], [[VEC_IND15]] 1310; CHECK-NEXT: [[TMP49:%.*]] = add <4 x i32> [[TMP48]], [[TMP27]] 1311; CHECK-NEXT: [[TMP50]] = add <4 x i32> [[TMP49]], [[TMP47]] 1312; CHECK-NEXT: [[INDEX_NEXT]] = add i64 [[INDEX]], 4 1313; CHECK-NEXT: [[VEC_IND_NEXT]] = add <4 x i64> [[VEC_IND]], <i64 4, i64 4, i64 4, i64 4> 1314; CHECK-NEXT: [[VEC_IND_NEXT16]] = add <4 x i32> [[VEC_IND15]], <i32 4, i32 4, i32 4, i32 4> 1315; CHECK-NEXT: [[TMP51:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 1316; CHECK-NEXT: br i1 [[TMP51]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop !32 1317; CHECK: middle.block: 1318; CHECK-NEXT: [[TMP52:%.*]] = select <4 x i1> [[TMP7]], <4 x i32> [[TMP50]], <4 x i32> [[VEC_PHI]] 1319; CHECK-NEXT: [[TMP53:%.*]] = call i32 @llvm.experimental.vector.reduce.add.v4i32(<4 x i32> [[TMP52]]) 1320; CHECK-NEXT: br i1 true, label [[DOT_CRIT_EDGE_LOOPEXIT:%.*]], label [[SCALAR_PH]] 1321; CHECK: scalar.ph: 1322; CHECK-NEXT: br label [[DOTLR_PH:%.*]] 1323; CHECK: .lr.ph: 1324; CHECK-NEXT: br i1 undef, label [[DOT_CRIT_EDGE_LOOPEXIT]], label [[DOTLR_PH]], !llvm.loop !33 1325; CHECK: ._crit_edge.loopexit: 1326; CHECK-NEXT: [[DOTLCSSA:%.*]] = phi i32 [ undef, [[DOTLR_PH]] ], [ [[TMP53]], [[MIDDLE_BLOCK]] ] 1327; CHECK-NEXT: br label [[DOT_CRIT_EDGE]] 1328; CHECK: ._crit_edge: 1329; CHECK-NEXT: [[SUM_0_LCSSA:%.*]] = phi i32 [ 0, [[TMP0:%.*]] ], [ [[DOTLCSSA]], [[DOT_CRIT_EDGE_LOOPEXIT]] ] 1330; CHECK-NEXT: ret i32 [[SUM_0_LCSSA]] 1331; 1332 %1 = icmp sgt i32 %n, 0 1333 br i1 %1, label %.lr.ph, label %._crit_edge 1334 1335.lr.ph: ; preds = %0, %.lr.ph 1336 %indvars.iv = phi i64 [ %indvars.iv.next, %.lr.ph ], [ 0, %0 ] 1337 %sum.02 = phi i32 [ %9, %.lr.ph ], [ 0, %0 ] 1338 %2 = getelementptr inbounds i32, i32* %A, i64 %indvars.iv 1339 %3 = load i32, i32* %2, align 4 1340 %4 = getelementptr inbounds i32, i32* %B, i64 %indvars.iv 1341 %5 = load i32, i32* %4, align 4 1342 %6 = trunc i64 %indvars.iv to i32 1343 %7 = add i32 %sum.02, %6 1344 %8 = add i32 %7, %3 1345 %9 = add i32 %8, %5 1346 %indvars.iv.next = add i64 %indvars.iv, 1 1347 %lftr.wideiv = trunc i64 %indvars.iv.next to i32 1348 %exitcond = icmp eq i32 %lftr.wideiv, %n 1349 br i1 %exitcond, label %._crit_edge, label %.lr.ph, !llvm.loop !6 1350 1351._crit_edge: ; preds = %.lr.ph, %0 1352 %sum.0.lcssa = phi i32 [ 0, %0 ], [ %9, %.lr.ph ] 1353 ret i32 %sum.0.lcssa 1354} 1355 1356!6 = distinct !{!6, !7, !8} 1357!7 = !{!"llvm.loop.vectorize.predicate.enable", i1 true} 1358!8 = !{!"llvm.loop.vectorize.enable", i1 true} 1359