1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -loop-vectorize -prefer-predicate-over-epilogue=scalar-epilogue -mtriple aarch64-unknown-linux-gnu \ 3; RUN: -mattr=+sve -force-vector-interleave=1 -force-vector-width=4 -prefer-inloop-reductions -S | FileCheck %s 4 5define float @cond_fadd(float* noalias nocapture readonly %a, float* noalias nocapture readonly %cond, i64 %N){ 6; CHECK-LABEL: @cond_fadd( 7; CHECK-NEXT: entry: 8; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 9; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 10; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 11; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 12; CHECK: vector.ph: 13; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 14; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 15; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 16; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 17; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 18; CHECK: vector.body: 19; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 20; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[TMP14:%.*]], [[VECTOR_BODY]] ] 21; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 22; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[COND:%.*]], i64 [[TMP4]] 23; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP5]], i32 0 24; CHECK-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <vscale x 4 x float>* 25; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, <vscale x 4 x float>* [[TMP7]], align 4 26; CHECK-NEXT: [[TMP8:%.*]] = fcmp une <vscale x 4 x float> [[WIDE_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 2.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) 27; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, float* [[A:%.*]], i64 [[TMP4]] 28; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, float* [[TMP9]], i32 0 29; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP10]] to <vscale x 4 x float>* 30; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* [[TMP11]], i32 4, <vscale x 4 x i1> [[TMP8]], <vscale x 4 x float> poison) 31; CHECK-NEXT: [[TMP12:%.*]] = select fast <vscale x 4 x i1> [[TMP8]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> zeroinitializer 32; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fadd.nxv4f32(float -0.000000e+00, <vscale x 4 x float> [[TMP12]]) 33; CHECK-NEXT: [[TMP14]] = fadd fast float [[TMP13]], [[VEC_PHI]] 34; CHECK-NEXT: [[TMP15:%.*]] = call i64 @llvm.vscale.i64() 35; CHECK-NEXT: [[TMP16:%.*]] = mul i64 [[TMP15]], 4 36; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP16]] 37; CHECK-NEXT: [[TMP17:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 38; CHECK-NEXT: br i1 [[TMP17]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] 39; CHECK: middle.block: 40; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 41; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 42; CHECK: scalar.ph: 43; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 44; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] 45; CHECK-NEXT: br label [[FOR_BODY:%.*]] 46; CHECK: for.body: 47; CHECK-NEXT: [[INDVARS:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[INDVARS_NEXT:%.*]], [[FOR_INC:%.*]] ] 48; CHECK-NEXT: [[RDX:%.*]] = phi float [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ], [ [[RES:%.*]], [[FOR_INC]] ] 49; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[COND]], i64 [[INDVARS]] 50; CHECK-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX]], align 4 51; CHECK-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP18]], 2.000000e+00 52; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]] 53; CHECK: if.then: 54; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[INDVARS]] 55; CHECK-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX2]], align 4 56; CHECK-NEXT: [[FADD:%.*]] = fadd fast float [[RDX]], [[TMP19]] 57; CHECK-NEXT: br label [[FOR_INC]] 58; CHECK: for.inc: 59; CHECK-NEXT: [[RES]] = phi float [ [[FADD]], [[IF_THEN]] ], [ [[RDX]], [[FOR_BODY]] ] 60; CHECK-NEXT: [[INDVARS_NEXT]] = add nuw nsw i64 [[INDVARS]], 1 61; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[INDVARS_NEXT]], [[N]] 62; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP2:![0-9]+]] 63; CHECK: for.end: 64; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi float [ [[RES]], [[FOR_INC]] ], [ [[TMP14]], [[MIDDLE_BLOCK]] ] 65; CHECK-NEXT: ret float [[RES_LCSSA]] 66; 67entry: 68 br label %for.body 69 70for.body: 71 %indvars = phi i64 [ 0, %entry ], [ %indvars.next, %for.inc ] 72 %rdx = phi float [ 1.000000e+00, %entry ], [ %res, %for.inc ] 73 %arrayidx = getelementptr inbounds float, float* %cond, i64 %indvars 74 %0 = load float, float* %arrayidx 75 %tobool = fcmp une float %0, 2.000000e+00 76 br i1 %tobool, label %if.then, label %for.inc 77 78if.then: 79 %arrayidx2 = getelementptr inbounds float, float* %a, i64 %indvars 80 %1 = load float, float* %arrayidx2 81 %fadd = fadd fast float %rdx, %1 82 br label %for.inc 83 84for.inc: 85 %res = phi float [ %fadd, %if.then ], [ %rdx, %for.body ] 86 %indvars.next = add nuw nsw i64 %indvars, 1 87 %exitcond.not = icmp eq i64 %indvars.next, %N 88 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 89 90for.end: 91 ret float %res 92} 93 94define float @cond_cmp_sel(float* noalias %a, float* noalias %cond, i64 %N) { 95; CHECK-LABEL: @cond_cmp_sel( 96; CHECK-NEXT: entry: 97; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() 98; CHECK-NEXT: [[TMP1:%.*]] = mul i64 [[TMP0]], 4 99; CHECK-NEXT: [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], [[TMP1]] 100; CHECK-NEXT: br i1 [[MIN_ITERS_CHECK]], label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 101; CHECK: vector.ph: 102; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() 103; CHECK-NEXT: [[TMP3:%.*]] = mul i64 [[TMP2]], 4 104; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 [[N]], [[TMP3]] 105; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 [[N]], [[N_MOD_VF]] 106; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 107; CHECK: vector.body: 108; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 109; CHECK-NEXT: [[VEC_PHI:%.*]] = phi float [ 1.000000e+00, [[VECTOR_PH]] ], [ [[RDX_MINMAX_SELECT:%.*]], [[VECTOR_BODY]] ] 110; CHECK-NEXT: [[TMP4:%.*]] = add i64 [[INDEX]], 0 111; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float, float* [[COND:%.*]], i64 [[TMP4]] 112; CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds float, float* [[TMP5]], i32 0 113; CHECK-NEXT: [[TMP7:%.*]] = bitcast float* [[TMP6]] to <vscale x 4 x float>* 114; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <vscale x 4 x float>, <vscale x 4 x float>* [[TMP7]], align 4 115; CHECK-NEXT: [[TMP8:%.*]] = fcmp une <vscale x 4 x float> [[WIDE_LOAD]], shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 3.000000e+00, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) 116; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, float* [[A:%.*]], i64 [[TMP4]] 117; CHECK-NEXT: [[TMP10:%.*]] = getelementptr float, float* [[TMP9]], i32 0 118; CHECK-NEXT: [[TMP11:%.*]] = bitcast float* [[TMP10]] to <vscale x 4 x float>* 119; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <vscale x 4 x float> @llvm.masked.load.nxv4f32.p0nxv4f32(<vscale x 4 x float>* [[TMP11]], i32 4, <vscale x 4 x i1> [[TMP8]], <vscale x 4 x float> poison) 120; CHECK-NEXT: [[TMP12:%.*]] = select fast <vscale x 4 x i1> [[TMP8]], <vscale x 4 x float> [[WIDE_MASKED_LOAD]], <vscale x 4 x float> shufflevector (<vscale x 4 x float> insertelement (<vscale x 4 x float> poison, float 0xFFF0000000000000, i32 0), <vscale x 4 x float> poison, <vscale x 4 x i32> zeroinitializer) 121; CHECK-NEXT: [[TMP13:%.*]] = call fast float @llvm.vector.reduce.fmin.nxv4f32(<vscale x 4 x float> [[TMP12]]) 122; CHECK-NEXT: [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt float [[TMP13]], [[VEC_PHI]] 123; CHECK-NEXT: [[RDX_MINMAX_SELECT]] = select fast i1 [[RDX_MINMAX_CMP]], float [[TMP13]], float [[VEC_PHI]] 124; CHECK-NEXT: [[TMP14:%.*]] = call i64 @llvm.vscale.i64() 125; CHECK-NEXT: [[TMP15:%.*]] = mul i64 [[TMP14]], 4 126; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP15]] 127; CHECK-NEXT: [[TMP16:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] 128; CHECK-NEXT: br i1 [[TMP16]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP4:![0-9]+]] 129; CHECK: middle.block: 130; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 [[N]], [[N_VEC]] 131; CHECK-NEXT: br i1 [[CMP_N]], label [[FOR_END:%.*]], label [[SCALAR_PH]] 132; CHECK: scalar.ph: 133; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ [[N_VEC]], [[MIDDLE_BLOCK]] ], [ 0, [[ENTRY:%.*]] ] 134; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ 1.000000e+00, [[ENTRY]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ] 135; CHECK-NEXT: br label [[FOR_BODY:%.*]] 136; CHECK: for.body: 137; CHECK-NEXT: [[IV:%.*]] = phi i64 [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], [[FOR_INC:%.*]] ] 138; CHECK-NEXT: [[RDX:%.*]] = phi float [ [[RES:%.*]], [[FOR_INC]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 139; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[COND]], i64 [[IV]] 140; CHECK-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4 141; CHECK-NEXT: [[TOBOOL:%.*]] = fcmp une float [[TMP17]], 3.000000e+00 142; CHECK-NEXT: br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[FOR_INC]] 143; CHECK: if.then: 144; CHECK-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[A]], i64 [[IV]] 145; CHECK-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX2]], align 4 146; CHECK-NEXT: [[FCMP:%.*]] = fcmp fast olt float [[RDX]], [[TMP18]] 147; CHECK-NEXT: [[FSEL:%.*]] = select fast i1 [[FCMP]], float [[RDX]], float [[TMP18]] 148; CHECK-NEXT: br label [[FOR_INC]] 149; CHECK: for.inc: 150; CHECK-NEXT: [[RES]] = phi float [ [[RDX]], [[FOR_BODY]] ], [ [[FSEL]], [[IF_THEN]] ] 151; CHECK-NEXT: [[IV_NEXT]] = add i64 [[IV]], 1 152; CHECK-NEXT: [[EXITCOND_NOT:%.*]] = icmp eq i64 [[IV_NEXT]], [[N]] 153; CHECK-NEXT: br i1 [[EXITCOND_NOT]], label [[FOR_END]], label [[FOR_BODY]], !llvm.loop [[LOOP5:![0-9]+]] 154; CHECK: for.end: 155; CHECK-NEXT: [[RES_LCSSA:%.*]] = phi float [ [[RES]], [[FOR_INC]] ], [ [[RDX_MINMAX_SELECT]], [[MIDDLE_BLOCK]] ] 156; CHECK-NEXT: ret float [[RES_LCSSA]] 157; 158entry: 159 br label %for.body 160 161for.body: 162 %iv = phi i64 [ 0, %entry ], [ %iv.next, %for.inc ] 163 %rdx = phi float [ %res, %for.inc ], [ 1.000000e+00, %entry ] 164 %arrayidx = getelementptr inbounds float, float* %cond, i64 %iv 165 %0 = load float, float* %arrayidx 166 %tobool = fcmp une float %0, 3.000000e+00 167 br i1 %tobool, label %if.then, label %for.inc 168 169if.then: 170 %arrayidx2 = getelementptr inbounds float, float* %a, i64 %iv 171 %1 = load float, float* %arrayidx2 172 %fcmp = fcmp fast olt float %rdx, %1 173 %fsel = select fast i1 %fcmp, float %rdx, float %1 174 br label %for.inc 175 176for.inc: 177 %res = phi float [ %rdx, %for.body ], [ %fsel, %if.then ] 178 %iv.next = add i64 %iv, 1 179 %exitcond.not = icmp eq i64 %iv.next, %N 180 br i1 %exitcond.not, label %for.end, label %for.body, !llvm.loop !0 181 182for.end: 183 ret float %res 184} 185 186!0 = distinct !{!0, !1} 187!1 = !{!"llvm.loop.vectorize.scalable.enable", i1 true} 188