1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2; RUN: opt < %s -passes=loop-vectorize -force-vector-width=4 -S | FileCheck %s 3 4; The function finds the smallest value from a float vector. 5; Check if vectorization is enabled by instruction flag `fcmp nnan`. 6 7define float @minloop(float* nocapture readonly %arg) { 8; CHECK-LABEL: @minloop( 9; CHECK-NEXT: top: 10; CHECK-NEXT: [[T:%.*]] = load float, float* [[ARG:%.*]], align 4 11; CHECK-NEXT: br label [[LOOP:%.*]] 12; CHECK: loop: 13; CHECK-NEXT: [[T1:%.*]] = phi i64 [ [[T7:%.*]], [[LOOP]] ], [ 1, [[TOP:%.*]] ] 14; CHECK-NEXT: [[T2:%.*]] = phi float [ [[T6:%.*]], [[LOOP]] ], [ [[T]], [[TOP]] ] 15; CHECK-NEXT: [[T3:%.*]] = getelementptr float, float* [[ARG]], i64 [[T1]] 16; CHECK-NEXT: [[T4:%.*]] = load float, float* [[T3]], align 4 17; CHECK-NEXT: [[T5:%.*]] = fcmp nnan olt float [[T2]], [[T4]] 18; CHECK-NEXT: [[T6]] = select i1 [[T5]], float [[T2]], float [[T4]] 19; CHECK-NEXT: [[T7]] = add i64 [[T1]], 1 20; CHECK-NEXT: [[T8:%.*]] = icmp eq i64 [[T7]], 65537 21; CHECK-NEXT: br i1 [[T8]], label [[OUT:%.*]], label [[LOOP]] 22; CHECK: out: 23; CHECK-NEXT: [[T6_LCSSA:%.*]] = phi float [ [[T6]], [[LOOP]] ] 24; CHECK-NEXT: ret float [[T6_LCSSA]] 25; 26top: 27 %t = load float, float* %arg 28 br label %loop 29 30loop: ; preds = %loop, %top 31 %t1 = phi i64 [ %t7, %loop ], [ 1, %top ] 32 %t2 = phi float [ %t6, %loop ], [ %t, %top ] 33 %t3 = getelementptr float, float* %arg, i64 %t1 34 %t4 = load float, float* %t3, align 4 35 %t5 = fcmp nnan olt float %t2, %t4 36 %t6 = select i1 %t5, float %t2, float %t4 37 %t7 = add i64 %t1, 1 38 %t8 = icmp eq i64 %t7, 65537 39 br i1 %t8, label %out, label %loop 40 41out: ; preds = %loop 42 ret float %t6 43} 44 45; Check if vectorization is still enabled by function attribute. 46 47define float @minloopattr(float* nocapture readonly %arg) #0 { 48; CHECK-LABEL: @minloopattr( 49; CHECK-NEXT: top: 50; CHECK-NEXT: [[T:%.*]] = load float, float* [[ARG:%.*]], align 4 51; CHECK-NEXT: br i1 false, label [[SCALAR_PH:%.*]], label [[VECTOR_PH:%.*]] 52; CHECK: vector.ph: 53; CHECK-NEXT: [[MINMAX_IDENT_SPLATINSERT:%.*]] = insertelement <4 x float> poison, float [[T]], i32 0 54; CHECK-NEXT: [[MINMAX_IDENT_SPLAT:%.*]] = shufflevector <4 x float> [[MINMAX_IDENT_SPLATINSERT]], <4 x float> poison, <4 x i32> zeroinitializer 55; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] 56; CHECK: vector.body: 57; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] 58; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <4 x float> [ [[MINMAX_IDENT_SPLAT]], [[VECTOR_PH]] ], [ [[TMP5:%.*]], [[VECTOR_BODY]] ] 59; CHECK-NEXT: [[OFFSET_IDX:%.*]] = add i64 1, [[INDEX]] 60; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[OFFSET_IDX]], 0 61; CHECK-NEXT: [[TMP1:%.*]] = getelementptr float, float* [[ARG]], i64 [[TMP0]] 62; CHECK-NEXT: [[TMP2:%.*]] = getelementptr float, float* [[TMP1]], i32 0 63; CHECK-NEXT: [[TMP3:%.*]] = bitcast float* [[TMP2]] to <4 x float>* 64; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x float>, <4 x float>* [[TMP3]], align 4 65; CHECK-NEXT: [[TMP4:%.*]] = fcmp olt <4 x float> [[VEC_PHI]], [[WIDE_LOAD]] 66; CHECK-NEXT: [[TMP5]] = select <4 x i1> [[TMP4]], <4 x float> [[VEC_PHI]], <4 x float> [[WIDE_LOAD]] 67; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4 68; CHECK-NEXT: [[TMP6:%.*]] = icmp eq i64 [[INDEX_NEXT]], 65536 69; CHECK-NEXT: br i1 [[TMP6]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], [[LOOP0:!llvm.loop !.*]] 70; CHECK: middle.block: 71; CHECK-NEXT: [[TMP7:%.*]] = call float @llvm.vector.reduce.fmin.v4f32(<4 x float> [[TMP5]]) 72; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 65536, 65536 73; CHECK-NEXT: br i1 [[CMP_N]], label [[OUT:%.*]], label [[SCALAR_PH]] 74; CHECK: scalar.ph: 75; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i64 [ 65537, [[MIDDLE_BLOCK]] ], [ 1, [[TOP:%.*]] ] 76; CHECK-NEXT: [[BC_MERGE_RDX:%.*]] = phi float [ [[T]], [[TOP]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 77; CHECK-NEXT: br label [[LOOP:%.*]] 78; CHECK: loop: 79; CHECK-NEXT: [[T1:%.*]] = phi i64 [ [[T7:%.*]], [[LOOP]] ], [ [[BC_RESUME_VAL]], [[SCALAR_PH]] ] 80; CHECK-NEXT: [[T2:%.*]] = phi float [ [[T6:%.*]], [[LOOP]] ], [ [[BC_MERGE_RDX]], [[SCALAR_PH]] ] 81; CHECK-NEXT: [[T3:%.*]] = getelementptr float, float* [[ARG]], i64 [[T1]] 82; CHECK-NEXT: [[T4:%.*]] = load float, float* [[T3]], align 4 83; CHECK-NEXT: [[T5:%.*]] = fcmp olt float [[T2]], [[T4]] 84; CHECK-NEXT: [[T6]] = select i1 [[T5]], float [[T2]], float [[T4]] 85; CHECK-NEXT: [[T7]] = add i64 [[T1]], 1 86; CHECK-NEXT: [[T8:%.*]] = icmp eq i64 [[T7]], 65537 87; CHECK-NEXT: br i1 [[T8]], label [[OUT]], label [[LOOP]], [[LOOP2:!llvm.loop !.*]] 88; CHECK: out: 89; CHECK-NEXT: [[T6_LCSSA:%.*]] = phi float [ [[T6]], [[LOOP]] ], [ [[TMP7]], [[MIDDLE_BLOCK]] ] 90; CHECK-NEXT: ret float [[T6_LCSSA]] 91; 92top: 93 %t = load float, float* %arg 94 br label %loop 95 96loop: ; preds = %loop, %top 97 %t1 = phi i64 [ %t7, %loop ], [ 1, %top ] 98 %t2 = phi float [ %t6, %loop ], [ %t, %top ] 99 %t3 = getelementptr float, float* %arg, i64 %t1 100 %t4 = load float, float* %t3, align 4 101 %t5 = fcmp olt float %t2, %t4 102 %t6 = select i1 %t5, float %t2, float %t4 103 %t7 = add i64 %t1, 1 104 %t8 = icmp eq i64 %t7, 65537 105 br i1 %t8, label %out, label %loop 106 107out: ; preds = %loop 108 ret float %t6 109} 110 111; Check if vectorization is prevented without the flag or attribute. 112 113define float @minloopnovec(float* nocapture readonly %arg) { 114; CHECK-LABEL: @minloopnovec( 115; CHECK-NEXT: top: 116; CHECK-NEXT: [[T:%.*]] = load float, float* [[ARG:%.*]], align 4 117; CHECK-NEXT: br label [[LOOP:%.*]] 118; CHECK: loop: 119; CHECK-NEXT: [[T1:%.*]] = phi i64 [ [[T7:%.*]], [[LOOP]] ], [ 1, [[TOP:%.*]] ] 120; CHECK-NEXT: [[T2:%.*]] = phi float [ [[T6:%.*]], [[LOOP]] ], [ [[T]], [[TOP]] ] 121; CHECK-NEXT: [[T3:%.*]] = getelementptr float, float* [[ARG]], i64 [[T1]] 122; CHECK-NEXT: [[T4:%.*]] = load float, float* [[T3]], align 4 123; CHECK-NEXT: [[T5:%.*]] = fcmp olt float [[T2]], [[T4]] 124; CHECK-NEXT: [[T6]] = select i1 [[T5]], float [[T2]], float [[T4]] 125; CHECK-NEXT: [[T7]] = add i64 [[T1]], 1 126; CHECK-NEXT: [[T8:%.*]] = icmp eq i64 [[T7]], 65537 127; CHECK-NEXT: br i1 [[T8]], label [[OUT:%.*]], label [[LOOP]] 128; CHECK: out: 129; CHECK-NEXT: [[T6_LCSSA:%.*]] = phi float [ [[T6]], [[LOOP]] ] 130; CHECK-NEXT: ret float [[T6_LCSSA]] 131; 132top: 133 %t = load float, float* %arg 134 br label %loop 135 136loop: ; preds = %loop, %top 137 %t1 = phi i64 [ %t7, %loop ], [ 1, %top ] 138 %t2 = phi float [ %t6, %loop ], [ %t, %top ] 139 %t3 = getelementptr float, float* %arg, i64 %t1 140 %t4 = load float, float* %t3, align 4 141 %t5 = fcmp olt float %t2, %t4 142 %t6 = select i1 %t5, float %t2, float %t4 143 %t7 = add i64 %t1, 1 144 %t8 = icmp eq i64 %t7, 65537 145 br i1 %t8, label %out, label %loop 146 147out: ; preds = %loop 148 ret float %t6 149} 150 151; This test is checking that we don't vectorize when only one of the required attributes is set. 152; Note that this test should not vectorize even after switching to IR-level FMF. 153define float @minloopmissingnsz(float* nocapture readonly %arg) #1 { 154; CHECK-LABEL: @minloopmissingnsz( 155; CHECK-NEXT: top: 156; CHECK-NEXT: [[T:%.*]] = load float, float* [[ARG:%.*]], align 4 157; CHECK-NEXT: br label [[LOOP:%.*]] 158; CHECK: loop: 159; CHECK-NEXT: [[T1:%.*]] = phi i64 [ [[T7:%.*]], [[LOOP]] ], [ 1, [[TOP:%.*]] ] 160; CHECK-NEXT: [[T2:%.*]] = phi float [ [[T6:%.*]], [[LOOP]] ], [ [[T]], [[TOP]] ] 161; CHECK-NEXT: [[T3:%.*]] = getelementptr float, float* [[ARG]], i64 [[T1]] 162; CHECK-NEXT: [[T4:%.*]] = load float, float* [[T3]], align 4 163; CHECK-NEXT: [[T5:%.*]] = fcmp olt float [[T2]], [[T4]] 164; CHECK-NEXT: [[T6]] = select i1 [[T5]], float [[T2]], float [[T4]] 165; CHECK-NEXT: [[T7]] = add i64 [[T1]], 1 166; CHECK-NEXT: [[T8:%.*]] = icmp eq i64 [[T7]], 65537 167; CHECK-NEXT: br i1 [[T8]], label [[OUT:%.*]], label [[LOOP]] 168; CHECK: out: 169; CHECK-NEXT: [[T6_LCSSA:%.*]] = phi float [ [[T6]], [[LOOP]] ] 170; CHECK-NEXT: ret float [[T6_LCSSA]] 171; 172top: 173 %t = load float, float* %arg 174 br label %loop 175 176loop: ; preds = %loop, %top 177 %t1 = phi i64 [ %t7, %loop ], [ 1, %top ] 178 %t2 = phi float [ %t6, %loop ], [ %t, %top ] 179 %t3 = getelementptr float, float* %arg, i64 %t1 180 %t4 = load float, float* %t3, align 4 181 %t5 = fcmp olt float %t2, %t4 182 %t6 = select i1 %t5, float %t2, float %t4 183 %t7 = add i64 %t1, 1 184 %t8 = icmp eq i64 %t7, 65537 185 br i1 %t8, label %out, label %loop 186 187out: ; preds = %loop 188 ret float %t6 189} 190 191; This would assert on FMF propagation. 192 193define void @not_a_min_max() { 194; CHECK-LABEL: @not_a_min_max( 195; CHECK-NEXT: entry: 196; CHECK-NEXT: br label [[LOOP:%.*]] 197; CHECK: loop: 198; CHECK-NEXT: [[F9_S0_V0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[ADD:%.*]], [[LOOP]] ] 199; CHECK-NEXT: [[T14:%.*]] = icmp eq i32 [[F9_S0_V0]], 5 200; CHECK-NEXT: [[T15:%.*]] = select reassoc nnan ninf nsz contract afn i1 [[T14]], float 0x36A0000000000000, float 0.000000e+00 201; CHECK-NEXT: [[ADD]] = add nuw nsw i32 [[F9_S0_V0]], 1 202; CHECK-NEXT: br i1 false, label [[END:%.*]], label [[LOOP]] 203; CHECK: end: 204; CHECK-NEXT: ret void 205; 206entry: 207 br label %loop 208 209loop: 210 %f9.s0.v0 = phi i32 [ 0, %entry ], [ %add, %loop ] 211 %t14 = icmp eq i32 %f9.s0.v0, 5 212 %t15 = select reassoc nnan ninf nsz contract afn i1 %t14, float 0x36A0000000000000, float 0.0 213 %add = add nuw nsw i32 %f9.s0.v0, 1 214 br i1 false, label %end, label %loop 215 216end: 217 ret void 218} 219 220attributes #0 = { "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" } 221attributes #1 = { "no-nans-fp-math"="true" } 222