1; RUN: opt < %s -loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -dce -instcombine -S | FileCheck %s 2 3define float @cond_fadd(float* noalias nocapture readonly %a, float* noalias nocapture readonly %cond, i64 %N){ 4; CHECK-LABEL: @cond_fadd( 5; CHECK-NEXT: entry: 6; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 7; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 8; CHECK: vector.ph: 9; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4 10; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 11; CHECK: vector.body: 12; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 13; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP27:%.*]], [[PRED_LOAD_CONTINUE6]] ] 14; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, float* [[COND:%.*]], i64 [[INDEX]] 15; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>* 16; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 17; CHECK-NEXT: [[TMP2:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], <float 5.000000e+00, float 5.000000e+00, float 5.000000e+00, float 5.000000e+00> 18; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0 19; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 20; CHECK: pred.load.if: 21; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 22; CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[TMP4]], align 4 23; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 24; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 25; CHECK: pred.load.continue: 26; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ] 27; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1 28; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 29; CHECK: pred.load.if1: 30; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1 31; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP9]] 32; CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[TMP10]], align 4 33; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP11]], i64 1 34; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 35; CHECK: pred.load.continue2: 36; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x float> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] 37; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP2]], i64 2 38; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 39; CHECK: pred.load.if3: 40; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[INDEX]], 2 41; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP15]] 42; CHECK-NEXT: [[TMP17:%.*]] = load float, float* [[TMP16]], align 4 43; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP17]], i64 2 44; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 45; CHECK: pred.load.continue4: 46; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x float> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ] 47; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP2]], i64 3 48; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 49; CHECK: pred.load.if5: 50; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 3 51; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP21]] 52; CHECK-NEXT: [[TMP23:%.*]] = load float, float* [[TMP22]], align 4 53; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP23]], i64 3 54; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 55; CHECK: pred.load.continue6: 56; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x float> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] 57; CHECK-NEXT: [[TMP26:%.*]] = select fast <4 x i1> [[TMP2]], <4 x float> [[TMP25]], <4 x float> zeroinitializer 58; CHECK-NEXT: [[TMP27]] = call fast float @llvm.vector.reduce.fadd.v4f32(float [[VEC_PHI]], <4 x float> [[TMP26]]) 59; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 60; CHECK-NEXT: [[TMP28:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 61; CHECK-NEXT: br i1 [[TMP28]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 62; CHECK: middle.block: 63; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] 64; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 65; CHECK: scalar.ph: 66; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 67; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP27]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[ENTRY]] ] 68; CHECK-NEXT: br label [[FOR_BODY:%.*]] 69; CHECK: for.body: 70; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 71; CHECK-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RES:%.*]], [[FOR_INC]] ] 72; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[COND]], i64 [[IV]] 73; CHECK-NEXT: [[TMP29:%.*]] = load float, float* [[ARRAYIDX]], align 4 74; CHECK-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP29]], 5.000000e+00 75; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]] 76; CHECK: if.then: 77; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IV]] 78; CHECK-NEXT: [[TMP30:%.*]] = load float, float* [[ARRAYIDX2]], align 4 79; CHECK-NEXT: [[FADD:%.*]] = fadd fast float [[RDX]], [[TMP30]] 80; CHECK-NEXT: br label [[FOR_INC]] 81; CHECK: for.inc: 82; CHECK-NEXT: [[RES]] = phi float [ [[RDX]], [[FOR_BODY]] ], [ [[FADD]], [[IF_THEN]] ] 83; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 84; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 85; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 86; CHECK: for.end: 87; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi float [ [[RES]], [[FOR_INC]] ], [ [[TMP27]], [[MIDDLE_BLOCK]] ] 88; CHECK-NEXT: ret float [[RES_LCSSA]] 89; 90entry: 91 br label %for.body 92 93for.body: 94 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ] 95 %rdx = phi float [ 1.000000e+00, %entry ], [ %res, %for.inc ] 96 %arrayidx = getelementptr inbounds float, float* %cond, i64 %iv 97 %0 = load float, float* %arrayidx 98 %tobool = fcmp une float %0, 5.000000e+00 99 br i1 %tobool, label %if.then, label %for.inc 100 101if.then: 102 %arrayidx2 = getelementptr inbounds float, float* %a, i64 %iv 103 %1 = load float, float* %arrayidx2 104 %fadd = fadd fast float %rdx, %1 105 br label %for.inc 106 107for.inc: 108 %res = phi float [ %rdx, %for.body ], [ %fadd, %if.then ] 109 %iv.next = add i64 %iv, 1 110 %exitcond.not = icmp eq i64 %iv.next, %N 111 br i1 %exitcond.not, label %for.end, label %for.body 112 113for.end: 114 ret float %res 115} 116 117define float @cond_cmp_sel(float* noalias %a, float* noalias %cond, i64 %N) { 118; CHECK-LABEL: @cond_cmp_sel( 119; CHECK-NEXT: entry: 120; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 121; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 122; CHECK: vector.ph: 123; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4 124; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 125; CHECK: vector.body: 126; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 127; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[PRED_LOAD_CONTINUE6]] ] 128; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds float, float* [[COND:%.*]], i64 [[INDEX]] 129; CHECK-NEXT: [[TMP1:%.*]] = bitcast float* [[TMP0]] to <4 x float>* 130; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP1]], align 4 131; CHECK-NEXT: [[TMP2:%.*]] = fcmp une <4 x float> [[WIDE_LOAD]], <float 3.000000e+00, float 3.000000e+00, float 3.000000e+00, float 3.000000e+00> 132; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0 133; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 134; CHECK: pred.load.if: 135; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds float, float* [[A:%.*]], i64 [[INDEX]] 136; CHECK-NEXT: [[TMP5:%.*]] = load float, float* [[TMP4]], align 4 137; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x float> poison, float [[TMP5]], i64 0 138; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 139; CHECK: pred.load.continue: 140; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x float> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ] 141; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1 142; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 143; CHECK: pred.load.if1: 144; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1 145; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP9]] 146; CHECK-NEXT: [[TMP11:%.*]] = load float, float* [[TMP10]], align 4 147; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x float> [[TMP7]], float [[TMP11]], i64 1 148; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 149; CHECK: pred.load.continue2: 150; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x float> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] 151; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP2]], i64 2 152; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 153; CHECK: pred.load.if3: 154; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[INDEX]], 2 155; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP15]] 156; CHECK-NEXT: [[TMP17:%.*]] = load float, float* [[TMP16]], align 4 157; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x float> [[TMP13]], float [[TMP17]], i64 2 158; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 159; CHECK: pred.load.continue4: 160; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x float> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ] 161; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP2]], i64 3 162; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 163; CHECK: pred.load.if5: 164; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 3 165; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[TMP21]] 166; CHECK-NEXT: [[TMP23:%.*]] = load float, float* [[TMP22]], align 4 167; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x float> [[TMP19]], float [[TMP23]], i64 3 168; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 169; CHECK: pred.load.continue6: 170; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x float> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] 171; CHECK-NEXT: [[TMP26:%.*]] = select fast <4 x i1> [[TMP2]], <4 x float> [[TMP25]], <4 x float> <float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000, float 0xFFF0000000000000> 172; CHECK-NEXT: [[TMP27:%.*]] = call fast float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP26]]) 173; CHECK-NEXT: [[TMP28]] = call fast float @llvm.minnum.f32(float [[TMP27]], float [[VEC_PHI]]) 174; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 175; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 176; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 177; CHECK: middle.block: 178; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] 179; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 180; CHECK: scalar.ph: 181; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 182; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[TMP28]], [[MIDDLE_BLOCK]] ], [ 1.000000e+00, [[ENTRY]] ] 183; CHECK-NEXT: br label [[FOR_BODY:%.*]] 184; CHECK: for.body: 185; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 186; CHECK-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RES:%.*]], [[FOR_INC]] ] 187; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[COND]], i64 [[IV]] 188; CHECK-NEXT: [[TMP30:%.*]] = load float, float* [[ARRAYIDX]], align 4 189; CHECK-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP30]], 3.000000e+00 190; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]] 191; CHECK: if.then: 192; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IV]] 193; CHECK-NEXT: [[TMP31:%.*]] = load float, float* [[ARRAYIDX2]], align 4 194; CHECK-NEXT: [[TMP32:%.*]] = call fast float @llvm.minnum.f32(float [[RDX]], float [[TMP31]]) 195; CHECK-NEXT: br label [[FOR_INC]] 196; CHECK: for.inc: 197; CHECK-NEXT: [[RES]] = phi float [ [[RDX]], [[FOR_BODY]] ], [ [[TMP32]], [[IF_THEN]] ] 198; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 199; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 200; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 201; CHECK: for.end: 202; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi float [ [[RES]], [[FOR_INC]] ], [ [[TMP28]], [[MIDDLE_BLOCK]] ] 203; CHECK-NEXT: ret float [[RES_LCSSA]] 204; 205entry: 206 br label %for.body 207 208for.body: 209 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ] 210 %rdx = phi float [ 1.000000e+00, %entry ], [ %res, %for.inc ] 211 %arrayidx = getelementptr inbounds float, float* %cond, i64 %iv 212 %0 = load float, float* %arrayidx 213 %tobool = fcmp une float %0, 3.000000e+00 214 br i1 %tobool, label %if.then, label %for.inc 215 216if.then: 217 %arrayidx2 = getelementptr inbounds float, float* %a, i64 %iv 218 %1 = load float, float* %arrayidx2 219 %fcmp = fcmp fast olt float %rdx, %1 220 %fsel = select fast i1 %fcmp, float %rdx, float %1 221 br label %for.inc 222 223for.inc: 224 %res = phi float [ %rdx, %for.body ], [ %fsel, %if.then ] 225 %iv.next = add i64 %iv, 1 226 %exitcond.not = icmp eq i64 %iv.next, %N 227 br i1 %exitcond.not, label %for.end, label %for.body 228 229for.end: 230 ret float %res 231} 232 233define i32 @conditional_and(i32* noalias %A, i32* noalias %B, i32 %cond, i64 noundef %N) #0 { 234; CHECK-LABEL: @conditional_and( 235; CHECK-NEXT: entry: 236; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 237; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 238; CHECK: vector.ph: 239; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4 240; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement <4 x i32> poison, i32 [[COND:%.*]], i64 0 241; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector <4 x i32> [[BROADCAST_SPLATINSERT]], <4 x i32> poison, <4 x i32> zeroinitializer 242; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 243; CHECK: vector.body: 244; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE6:%.*]] ] 245; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 7, [[VECTOR_PH]] ], [ [[TMP28:%.*]], [[PRED_LOAD_CONTINUE6]] ] 246; CHECK-NEXT: [[TMP0:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 247; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32* [[TMP0]] to <4 x i32>* 248; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP1]], align 4 249; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], [[BROADCAST_SPLAT]] 250; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x i1> [[TMP2]], i64 0 251; CHECK-NEXT: br i1 [[TMP3]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 252; CHECK: pred.load.if: 253; CHECK-NEXT: [[TMP4:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] 254; CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 255; CHECK-NEXT: [[TMP6:%.*]] = insertelement <4 x i32> poison, i32 [[TMP5]], i64 0 256; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 257; CHECK: pred.load.continue: 258; CHECK-NEXT: [[TMP7:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP6]], [[PRED_LOAD_IF]] ] 259; CHECK-NEXT: [[TMP8:%.*]] = extractelement <4 x i1> [[TMP2]], i64 1 260; CHECK-NEXT: br i1 [[TMP8]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 261; CHECK: pred.load.if1: 262; CHECK-NEXT: [[TMP9:%.*]] = or i64 [[INDEX]], 1 263; CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP9]] 264; CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 265; CHECK-NEXT: [[TMP12:%.*]] = insertelement <4 x i32> [[TMP7]], i32 [[TMP11]], i64 1 266; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 267; CHECK: pred.load.continue2: 268; CHECK-NEXT: [[TMP13:%.*]] = phi <4 x i32> [ [[TMP7]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP12]], [[PRED_LOAD_IF1]] ] 269; CHECK-NEXT: [[TMP14:%.*]] = extractelement <4 x i1> [[TMP2]], i64 2 270; CHECK-NEXT: br i1 [[TMP14]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 271; CHECK: pred.load.if3: 272; CHECK-NEXT: [[TMP15:%.*]] = or i64 [[INDEX]], 2 273; CHECK-NEXT: [[TMP16:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP15]] 274; CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 4 275; CHECK-NEXT: [[TMP18:%.*]] = insertelement <4 x i32> [[TMP13]], i32 [[TMP17]], i64 2 276; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 277; CHECK: pred.load.continue4: 278; CHECK-NEXT: [[TMP19:%.*]] = phi <4 x i32> [ [[TMP13]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP18]], [[PRED_LOAD_IF3]] ] 279; CHECK-NEXT: [[TMP20:%.*]] = extractelement <4 x i1> [[TMP2]], i64 3 280; CHECK-NEXT: br i1 [[TMP20]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6]] 281; CHECK: pred.load.if5: 282; CHECK-NEXT: [[TMP21:%.*]] = or i64 [[INDEX]], 3 283; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP21]] 284; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 285; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP19]], i32 [[TMP23]], i64 3 286; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 287; CHECK: pred.load.continue6: 288; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x i32> [ [[TMP19]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] 289; CHECK-NEXT: [[TMP26:%.*]] = select <4 x i1> [[TMP2]], <4 x i32> [[TMP25]], <4 x i32> <i32 -1, i32 -1, i32 -1, i32 -1> 290; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.and.v4i32(<4 x i32> [[TMP26]]) 291; CHECK-NEXT: [[TMP28]] = and i32 [[TMP27]], [[VEC_PHI]] 292; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 293; CHECK-NEXT: [[TMP29:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 294; CHECK-NEXT: br i1 [[TMP29]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP6:![0-9]+]] 295; CHECK: middle.block: 296; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] 297; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 298; CHECK: scalar.ph: 299; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 300; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP28]], [[MIDDLE_BLOCK]] ], [ 7, [[ENTRY]] ] 301; CHECK-NEXT: br label [[FOR_BODY:%.*]] 302; CHECK: for.body: 303; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 304; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RES:%.*]], [[FOR_INC]] ] 305; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IV]] 306; CHECK-NEXT: [[TMP30:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 307; CHECK-NEXT: [[TOBOOL:%.*]] = icmp eq i32 [[TMP30]], [[COND]] 308; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]] 309; CHECK: if.then: 310; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[IV]] 311; CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 312; CHECK-NEXT: [[AND:%.*]] = and i32 [[TMP31]], [[RDX]] 313; CHECK-NEXT: br label [[FOR_INC]] 314; CHECK: for.inc: 315; CHECK-NEXT: [[RES]] = phi i32 [ [[AND]], [[IF_THEN]] ], [ [[RDX]], [[FOR_BODY]] ] 316; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 317; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 318; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP7:![0-9]+]] 319; CHECK: for.end: 320; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[RES]], [[FOR_INC]] ], [ [[TMP28]], [[MIDDLE_BLOCK]] ] 321; CHECK-NEXT: ret i32 [[RES_LCSSA]] 322; 323entry: 324 br label %for.body 325 326for.body: 327 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ] 328 %rdx = phi i32 [ 7, %entry ], [ %res, %for.inc ] 329 %arrayidx = getelementptr inbounds i32, i32* %A, i64 %iv 330 %0 = load i32, i32* %arrayidx 331 %tobool = icmp eq i32 %0, %cond 332 br i1 %tobool, label %if.then, label %for.inc 333 334if.then: 335 %arrayidx2 = getelementptr inbounds i32, i32* %B, i64 %iv 336 %1 = load i32, i32* %arrayidx2 337 %and = and i32 %1, %rdx 338 br label %for.inc 339 340for.inc: 341 %res = phi i32 [ %and, %if.then ], [ %rdx, %for.body ] 342 %iv.next = add nuw nsw i64 %iv, 1 343 %exitcond.not = icmp eq i64 %iv.next, %N 344 br i1 %exitcond.not, label %for.end, label %for.body 345 346for.end: 347 ret i32 %res 348} 349 350define i32 @simple_chained_rdx(i32* noalias %a, i32* noalias %b, i32* noalias %cond, i64 noundef %N) { 351; CHECK-LABEL: @simple_chained_rdx( 352; CHECK-NEXT: entry: 353; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4 354; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 355; CHECK: vector.ph: 356; CHECK-NEXT: [[N_VEC:%.*]] = and i64 [[N]], -4 357; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 358; CHECK: vector.body: 359; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[PRED_LOAD_CONTINUE14:%.*]] ] 360; CHECK-NEXT: [[VEC_PHI:%.*]] = phi i32 [ 5, [[VECTOR_PH]] ], [ [[TMP51:%.*]], [[PRED_LOAD_CONTINUE14]] ] 361; CHECK-NEXT: [[TMP0:%.*]] = or i64 [[INDEX]], 1 362; CHECK-NEXT: [[TMP1:%.*]] = or i64 [[INDEX]], 2 363; CHECK-NEXT: [[TMP2:%.*]] = or i64 [[INDEX]], 3 364; CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i32, i32* [[COND:%.*]], i64 [[INDEX]] 365; CHECK-NEXT: [[TMP4:%.*]] = bitcast i32* [[TMP3]] to <4 x i32>* 366; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, <4 x i32>* [[TMP4]], align 4 367; CHECK-NEXT: [[TMP5:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD]], zeroinitializer 368; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x i1> [[TMP5]], i64 0 369; CHECK-NEXT: br i1 [[TMP6]], label [[PRED_LOAD_IF:%.*]], label [[PRED_LOAD_CONTINUE:%.*]] 370; CHECK: pred.load.if: 371; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i32, i32* [[A:%.*]], i64 [[INDEX]] 372; CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 373; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x i32> poison, i32 [[TMP8]], i64 0 374; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE]] 375; CHECK: pred.load.continue: 376; CHECK-NEXT: [[TMP10:%.*]] = phi <4 x i32> [ poison, [[VECTOR_BODY]] ], [ [[TMP9]], [[PRED_LOAD_IF]] ] 377; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x i1> [[TMP5]], i64 1 378; CHECK-NEXT: br i1 [[TMP11]], label [[PRED_LOAD_IF1:%.*]], label [[PRED_LOAD_CONTINUE2:%.*]] 379; CHECK: pred.load.if1: 380; CHECK-NEXT: [[TMP12:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP0]] 381; CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 382; CHECK-NEXT: [[TMP14:%.*]] = insertelement <4 x i32> [[TMP10]], i32 [[TMP13]], i64 1 383; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE2]] 384; CHECK: pred.load.continue2: 385; CHECK-NEXT: [[TMP15:%.*]] = phi <4 x i32> [ [[TMP10]], [[PRED_LOAD_CONTINUE]] ], [ [[TMP14]], [[PRED_LOAD_IF1]] ] 386; CHECK-NEXT: [[TMP16:%.*]] = extractelement <4 x i1> [[TMP5]], i64 2 387; CHECK-NEXT: br i1 [[TMP16]], label [[PRED_LOAD_IF3:%.*]], label [[PRED_LOAD_CONTINUE4:%.*]] 388; CHECK: pred.load.if3: 389; CHECK-NEXT: [[TMP17:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP1]] 390; CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 391; CHECK-NEXT: [[TMP19:%.*]] = insertelement <4 x i32> [[TMP15]], i32 [[TMP18]], i64 2 392; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE4]] 393; CHECK: pred.load.continue4: 394; CHECK-NEXT: [[TMP20:%.*]] = phi <4 x i32> [ [[TMP15]], [[PRED_LOAD_CONTINUE2]] ], [ [[TMP19]], [[PRED_LOAD_IF3]] ] 395; CHECK-NEXT: [[TMP21:%.*]] = extractelement <4 x i1> [[TMP5]], i64 3 396; CHECK-NEXT: br i1 [[TMP21]], label [[PRED_LOAD_IF5:%.*]], label [[PRED_LOAD_CONTINUE6:%.*]] 397; CHECK: pred.load.if5: 398; CHECK-NEXT: [[TMP22:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[TMP2]] 399; CHECK-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 400; CHECK-NEXT: [[TMP24:%.*]] = insertelement <4 x i32> [[TMP20]], i32 [[TMP23]], i64 3 401; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE6]] 402; CHECK: pred.load.continue6: 403; CHECK-NEXT: [[TMP25:%.*]] = phi <4 x i32> [ [[TMP20]], [[PRED_LOAD_CONTINUE4]] ], [ [[TMP24]], [[PRED_LOAD_IF5]] ] 404; CHECK-NEXT: [[TMP26:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP25]], <4 x i32> zeroinitializer 405; CHECK-NEXT: [[TMP27:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP26]]) 406; CHECK-NEXT: [[TMP28:%.*]] = add i32 [[TMP27]], [[VEC_PHI]] 407; CHECK-NEXT: [[TMP29:%.*]] = extractelement <4 x i1> [[TMP5]], i64 0 408; CHECK-NEXT: br i1 [[TMP29]], label [[PRED_LOAD_IF7:%.*]], label [[PRED_LOAD_CONTINUE8:%.*]] 409; CHECK: pred.load.if7: 410; CHECK-NEXT: [[TMP30:%.*]] = getelementptr inbounds i32, i32* [[B:%.*]], i64 [[INDEX]] 411; CHECK-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 412; CHECK-NEXT: [[TMP32:%.*]] = insertelement <4 x i32> poison, i32 [[TMP31]], i64 0 413; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE8]] 414; CHECK: pred.load.continue8: 415; CHECK-NEXT: [[TMP33:%.*]] = phi <4 x i32> [ poison, [[PRED_LOAD_CONTINUE6]] ], [ [[TMP32]], [[PRED_LOAD_IF7]] ] 416; CHECK-NEXT: [[TMP34:%.*]] = extractelement <4 x i1> [[TMP5]], i64 1 417; CHECK-NEXT: br i1 [[TMP34]], label [[PRED_LOAD_IF9:%.*]], label [[PRED_LOAD_CONTINUE10:%.*]] 418; CHECK: pred.load.if9: 419; CHECK-NEXT: [[TMP35:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP0]] 420; CHECK-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 421; CHECK-NEXT: [[TMP37:%.*]] = insertelement <4 x i32> [[TMP33]], i32 [[TMP36]], i64 1 422; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE10]] 423; CHECK: pred.load.continue10: 424; CHECK-NEXT: [[TMP38:%.*]] = phi <4 x i32> [ [[TMP33]], [[PRED_LOAD_CONTINUE8]] ], [ [[TMP37]], [[PRED_LOAD_IF9]] ] 425; CHECK-NEXT: [[TMP39:%.*]] = extractelement <4 x i1> [[TMP5]], i64 2 426; CHECK-NEXT: br i1 [[TMP39]], label [[PRED_LOAD_IF11:%.*]], label [[PRED_LOAD_CONTINUE12:%.*]] 427; CHECK: pred.load.if11: 428; CHECK-NEXT: [[TMP40:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP1]] 429; CHECK-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 430; CHECK-NEXT: [[TMP42:%.*]] = insertelement <4 x i32> [[TMP38]], i32 [[TMP41]], i64 2 431; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE12]] 432; CHECK: pred.load.continue12: 433; CHECK-NEXT: [[TMP43:%.*]] = phi <4 x i32> [ [[TMP38]], [[PRED_LOAD_CONTINUE10]] ], [ [[TMP42]], [[PRED_LOAD_IF11]] ] 434; CHECK-NEXT: [[TMP44:%.*]] = extractelement <4 x i1> [[TMP5]], i64 3 435; CHECK-NEXT: br i1 [[TMP44]], label [[PRED_LOAD_IF13:%.*]], label [[PRED_LOAD_CONTINUE14]] 436; CHECK: pred.load.if13: 437; CHECK-NEXT: [[TMP45:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[TMP2]] 438; CHECK-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 439; CHECK-NEXT: [[TMP47:%.*]] = insertelement <4 x i32> [[TMP43]], i32 [[TMP46]], i64 3 440; CHECK-NEXT: br label [[PRED_LOAD_CONTINUE14]] 441; CHECK: pred.load.continue14: 442; CHECK-NEXT: [[TMP48:%.*]] = phi <4 x i32> [ [[TMP43]], [[PRED_LOAD_CONTINUE12]] ], [ [[TMP47]], [[PRED_LOAD_IF13]] ] 443; CHECK-NEXT: [[TMP49:%.*]] = select <4 x i1> [[TMP5]], <4 x i32> [[TMP48]], <4 x i32> zeroinitializer 444; CHECK-NEXT: [[TMP50:%.*]] = call i32 @llvm.vector.reduce.add.v4i32(<4 x i32> [[TMP49]]) 445; CHECK-NEXT: [[TMP51]] = add i32 [[TMP50]], [[TMP28]] 446; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 447; CHECK-NEXT: [[TMP52:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 448; CHECK-NEXT: br i1 [[TMP52]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 449; CHECK: middle.block: 450; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N_VEC]], [[N]] 451; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 452; CHECK: scalar.ph: 453; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 454; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi i32 [ [[TMP51]], [[MIDDLE_BLOCK]] ], [ 5, [[ENTRY]] ] 455; CHECK-NEXT: br label [[FOR_BODY:%.*]] 456; CHECK: for.body: 457; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 458; CHECK-NEXT: [[RDX:%.*]] = phi i32 [ [[RES:%.*]], [[FOR_INC]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 459; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[COND]], i64 [[IV]] 460; CHECK-NEXT: [[TMP53:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 461; CHECK-NEXT: [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP53]], 0 462; CHECK-NEXT: br i1 [[TOBOOL_NOT]], label [[FOR_INC]], label [[IF_THEN:%.*]] 463; CHECK: if.then: 464; CHECK-NEXT: [[ARRAYIDX1:%.*]] = getelementptr inbounds i32, i32* [[A]], i64 [[IV]] 465; CHECK-NEXT: [[TMP54:%.*]] = load i32, i32* [[ARRAYIDX1]], align 4 466; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP54]], [[RDX]] 467; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds i32, i32* [[B]], i64 [[IV]] 468; CHECK-NEXT: [[TMP55:%.*]] = load i32, i32* [[ARRAYIDX2]], align 4 469; CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[ADD]], [[TMP55]] 470; CHECK-NEXT: br label [[FOR_INC]] 471; CHECK: for.inc: 472; CHECK-NEXT: [[RES]] = phi i32 [ [[ADD3]], [[IF_THEN]] ], [ [[RDX]], [[FOR_BODY]] ] 473; CHECK-NEXT: [[IV_NEXT]] = add nuw nsw i64 [[IV]], 1 474; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 475; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 476; CHECK: for.end: 477; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi i32 [ [[RES]], [[FOR_INC]] ], [ [[TMP51]], [[MIDDLE_BLOCK]] ] 478; CHECK-NEXT: ret i32 [[RES_LCSSA]] 479; 480entry: 481 br label %for.body 482 483for.body: 484 %iv = phi i64 [ %iv.next, %for.inc ], [ 0, %entry ] 485 %rdx = phi i32 [ %res, %for.inc ], [ 5, %entry ] 486 %arrayidx = getelementptr inbounds i32, i32* %cond, i64 %iv 487 %0 = load i32, i32* %arrayidx 488 %tobool.not = icmp eq i32 %0, 0 489 br i1 %tobool.not, label %for.inc, label %if.then 490 491if.then: 492 %arrayidx1 = getelementptr inbounds i32, i32* %a, i64 %iv 493 %1 = load i32, i32* %arrayidx1 494 %add = add nsw i32 %1, %rdx 495 %arrayidx2 = getelementptr inbounds i32, i32* %b, i64 %iv 496 %2 = load i32, i32* %arrayidx2 497 %add3 = add nsw i32 %add, %2 498 br label %for.inc 499 500for.inc: 501 %res = phi i32 [ %add3, %if.then ], [ %rdx, %for.body ] 502 %iv.next = add nuw nsw i64 %iv, 1 503 %exitcond.not = icmp eq i64 %iv.next, %N 504 br i1 %exitcond.not, label %for.end, label %for.body 505 506for.end: 507 ret i32 %res 508} 509 510; 511; Negative Tests 512; 513 514; 515; Reduction not performed in loop as the phi has more than two incoming values 516; 517define i64 @nested_cond_and(i64* noalias nocapture readonly %a, i64* noalias nocapture readonly %b, i64* noalias nocapture readonly %cond, i64 %N){ 518; CHECK-LABEL: @nested_cond_and( 519; CHECK: vector.body: 520; CHECK-NOT: @llvm.vector.reduce.and 521; CHECK: middle.block: 522; CHECK: @llvm.vector.reduce.and 523; CHECK: scalar.ph 524entry: 525 br label %for.body 526 527for.body: 528 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ] 529 %rdx = phi i64 [ 5, %entry ], [ %res, %for.inc ] 530 %arrayidx = getelementptr inbounds i64, i64* %cond, i64 %iv 531 %0 = load i64, i64* %arrayidx 532 %tobool = icmp eq i64 %0, 0 533 br i1 %tobool, label %if.then, label %for.inc 534 535if.then: 536 %arrayidx2 = getelementptr inbounds i64, i64* %a, i64 %iv 537 %1 = load i64, i64* %arrayidx2 538 %and1 = and i64 %rdx, %1 539 %tobool2 = icmp eq i64 %1, 3 540 br i1 %tobool2, label %if.then.2, label %for.inc 541 542if.then.2: 543 %arrayidx3 = getelementptr inbounds i64, i64* %b, i64 %iv 544 %2 = load i64, i64* %arrayidx3 545 %and2 = and i64 %rdx, %2 546 br label %for.inc 547 548for.inc: 549 %res = phi i64 [ %and2, %if.then.2 ], [ %and1, %if.then ], [ %rdx, %for.body ] 550 %iv.next = add nuw nsw i64 %iv, 1 551 %exitcond.not = icmp eq i64 %iv.next, %N 552 br i1 %exitcond.not, label %for.end, label %for.body 553 554for.end: 555 ret i64 %res 556} 557 558; Chain of conditional & unconditional reductions. We currently only support conditional reductions 559; if they are the last in the chain, i.e. the loop exit instruction is a Phi node. Therefore we reject 560; the Phi (%rdx1) as it has more than one use. 561; 562define i32 @cond-uncond(i32* noalias %src1, i32* noalias %src2, i32* noalias %cond, i64 noundef %n) #0 { 563; CHECK-LABEL: @cond-uncond( 564; CHECK: pred.load.continue6: 565; CHECK-NOT: @llvm.vector.reduce.add 566; CHECK: middle.block: 567; CHECK: @llvm.vector.reduce.add 568; CHECK: scalar.ph 569entry: 570 br label %for.body 571 572for.body: 573 %rdx1 = phi i32 [ %add2, %if.end ], [ 0, %entry ] 574 %iv = phi i64 [ %iv.next, %if.end ], [ 0, %entry] 575 %arrayidx = getelementptr inbounds i32, i32* %cond, i64 %iv 576 %0 = load i32, i32* %arrayidx 577 %tobool.not = icmp eq i32 %0, 0 578 br i1 %tobool.not, label %if.end, label %if.then 579 580if.then: 581 %arrayidx1 = getelementptr inbounds i32, i32* %src2, i64 %iv 582 %1 = load i32, i32* %arrayidx1 583 %add = add nsw i32 %1, %rdx1 584 br label %if.end 585 586if.end: 587 %res = phi i32 [ %add, %if.then ], [ %rdx1, %for.body ] 588 %arrayidx2 = getelementptr inbounds i32, i32* %src1, i64 %iv 589 %2 = load i32, i32* %arrayidx2 590 %add2 = add nsw i32 %2, %res 591 %iv.next = add nuw nsw i64 %iv, 1 592 %exitcond.not = icmp eq i64 %iv.next, %n 593 br i1 %exitcond.not, label %for.end, label %for.body 594 595for.end: 596 ret i32 %add2 597} 598 599; 600; Chain of two conditional reductions. We do not vectorise this with in-loop reductions as neither 601; of the incoming values of the LoopExitInstruction (%res) is the reduction Phi (%rdx1). 602; 603define float @cond_cond(float* noalias %src1, float* noalias %src2, float* noalias %cond, i64 %n) #0 { 604; CHECK-LABEL: @cond_cond( 605; CHECK: pred.load.continue14: 606; CHECK-NOT: @llvm.vector.reduce.fadd 607; CHECK: middle.block: 608; CHECK: @llvm.vector.reduce.fadd 609; CHECK: scalar.ph 610entry: 611 br label %for.body 612 613for.body: 614 %rdx1 = phi float [ %res, %for.inc ], [ 2.000000e+00, %entry ] 615 %iv = phi i64 [ %iv.next, %for.inc ], [ 0, %entry ] 616 %arrayidx = getelementptr inbounds float, float* %cond, i64 %iv 617 %0 = load float, float* %arrayidx 618 %cmp1 = fcmp fast oeq float %0, 3.000000e+00 619 br i1 %cmp1, label %if.then, label %if.end 620 621if.then: 622 %arrayidx2 = getelementptr inbounds float, float* %src1, i64 %iv 623 %1 = load float, float* %arrayidx2 624 %add = fadd fast float %1, %rdx1 625 br label %if.end 626 627if.end: 628 %rdx2 = phi float [ %add, %if.then ], [ %rdx1, %for.body ] 629 %cmp5 = fcmp fast oeq float %0, 7.000000e+00 630 br i1 %cmp5, label %if.then6, label %for.inc 631 632if.then6: 633 %arrayidx7 = getelementptr inbounds float, float* %src2, i64 %iv 634 %2 = load float, float* %arrayidx7 635 %add2 = fadd fast float %2, %rdx2 636 br label %for.inc 637 638for.inc: 639 %res = phi float [ %add2, %if.then6 ], [ %rdx2, %if.end ] 640 %iv.next = add nuw nsw i64 %iv, 1 641 %exitcond.not = icmp eq i64 %iv.next, %n 642 br i1 %exitcond.not, label %for.end, label %for.body 643 644for.end: 645 ret float %res 646} 647 648; 649; Chain of an unconditional & a conditional reduction. We do not vectorise this in-loop as neither of the 650; incoming values of the LoopExitInstruction (%res) is the reduction Phi (%rdx). 651; 652define i32 @uncond_cond(i32* noalias %src1, i32* noalias %src2, i32* noalias %cond, i64 %N) #0 { 653; CHECK-LABEL: @uncond_cond( 654; CHECK: pred.load.continue7: 655; CHECK-NOT: @llvm.vector.reduce.add 656; CHECK: middle.block: 657; CHECK: @llvm.vector.reduce.add 658; CHECK: scalar.ph 659entry: 660 br label %for.body 661 662for.body: 663 %rdx = phi i32 [ %res, %for.inc ], [ 0, %entry ] 664 %iv = phi i64 [ %iv.next, %for.inc ], [ 0, %entry ] 665 %arrayidx = getelementptr inbounds i32, i32* %src1, i64 %iv 666 %0 = load i32, i32* %arrayidx 667 %add1 = add nsw i32 %0, %rdx 668 %arrayidx1 = getelementptr inbounds i32, i32* %cond, i64 %iv 669 %1 = load i32, i32* %arrayidx1 670 %tobool.not = icmp eq i32 %1, 0 671 br i1 %tobool.not, label %for.inc, label %if.then 672 673if.then: 674 %arrayidx2 = getelementptr inbounds i32, i32* %src2, i64 %iv 675 %2 = load i32, i32* %arrayidx2 676 %add2 = add nsw i32 %2, %add1 677 br label %for.inc 678 679for.inc: 680 %res = phi i32 [ %add2, %if.then ], [ %add1, %for.body ] 681 %iv.next = add nuw nsw i64 %iv, 1 682 %exitcond.not = icmp eq i64 %iv.next, %N 683 br i1 %exitcond.not, label %for.end, label %for.body 684 685for.end: 686 ret i32 %res 687} 688 689; 690; Chain of multiple unconditional & conditional reductions. Does not vectorise in-loop as when we look back 691; through the chain and check the number of uses of %add1, we find more than the expected one use. 692; 693define i32 @uncond_cond_uncond(i32* noalias %src1, i32* noalias %src2, i32* noalias %cond, i64 noundef %N) { 694; CHECK-LABEL: @uncond_cond_uncond( 695; CHECK: pred.load.continue7: 696; CHECK-NOT: @llvm.vector.reduce.add 697; CHECK: middle.block: 698; CHECK: @llvm.vector.reduce.add 699; CHECK: scalar.ph 700entry: 701 br label %for.body 702 703for.body: 704 %rdx = phi i32 [ %add3, %if.end ], [ 0, %entry ] 705 %iv = phi i64 [ %iv.next, %if.end ], [ 0, %entry ] 706 %arrayidx = getelementptr inbounds i32, i32* %src1, i64 %iv 707 %0 = load i32, i32* %arrayidx 708 %add1 = add nsw i32 %0, %rdx 709 %arrayidx1 = getelementptr inbounds i32, i32* %cond, i64 %iv 710 %1 = load i32, i32* %arrayidx1 711 %tobool.not = icmp eq i32 %1, 0 712 br i1 %tobool.not, label %if.end, label %if.then 713 714if.then: 715 %arrayidx2 = getelementptr inbounds i32, i32* %src2, i64 %iv 716 %2 = load i32, i32* %arrayidx2 717 %add2 = add nsw i32 %2, %add1 718 br label %if.end 719 720if.end: 721 %res = phi i32 [ %add2, %if.then ], [ %add1, %for.body ] 722 %add3 = add nsw i32 %res, %0 723 %iv.next = add nuw nsw i64 %iv, 1 724 %exitcond.not = icmp eq i64 %iv.next, %N 725 br i1 %exitcond.not, label %for.end, label %for.body 726 727for.end: 728 ret i32 %add3 729} 730