1; RUN: opt %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s 2 3; Make sure that integer poison-generating flags (i.e., nuw/nsw, exact and inbounds) 4; are dropped from instructions in blocks that need predication and are linearized 5; and masked after vectorization. We only drop flags from scalar instructions that 6; contribute to the address computation of a masked vector load/store. After 7; linearizing the control flow and removing their guarding condition, these 8; instructions could generate a poison value which would be used as base address of 9; the masked vector load/store (see PR52111). For gather/scatter cases, 10; posiong-generating flags can be preserved since poison addresses in the vector GEP 11; reaching the gather/scatter instruction will be masked-out by the gather/scatter 12; instruction itself and won't be used. 13; We need AVX512 target features for the loop to be vectorized with masks instead of 14; predicates. 15 16target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128" 17target triple = "x86_64-pc-linux-gnu" 18 19; Drop poison-generating flags from 'sub' and 'getelementptr' feeding a masked load. 20; Test for PR52111. 21define void @drop_scalar_nuw_nsw(float* noalias nocapture readonly %input, 22 float* %output) local_unnamed_addr #0 { 23; CHECK-LABEL: @drop_scalar_nuw_nsw( 24; CHECK: vector.body: 25; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ] 26; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ] 27; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 28; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer 29; CHECK-NEXT: [[TMP5:%.*]] = sub i64 [[TMP0]], 1 30; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, float* [[INPUT:%.*]], i64 [[TMP5]] 31; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true> 32; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, float* [[TMP6]], i32 0 33; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>* 34; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP9]], i32 4, <4 x i1> [[TMP7]], <4 x float> poison), !invariant.load !0 35entry: 36 br label %loop.header 37 38loop.header: 39 %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] 40 %i23 = icmp eq i64 %iv, 0 41 br i1 %i23, label %if.end, label %if.then 42 43if.then: 44 %i27 = sub nuw nsw i64 %iv, 1 45 %i29 = getelementptr inbounds float, float* %input, i64 %i27 46 %i30 = load float, float* %i29, align 4, !invariant.load !0 47 br label %if.end 48 49if.end: 50 %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ] 51 %i35 = getelementptr inbounds float, float* %output, i64 %iv 52 store float %i34, float* %i35, align 4 53 %iv.inc = add nuw nsw i64 %iv, 1 54 %exitcond = icmp eq i64 %iv.inc, 4 55 br i1 %exitcond, label %loop.exit, label %loop.header 56 57loop.exit: 58 ret void 59} 60 61; Drop poison-generating flags from 'sub' and 'getelementptr' feeding a masked load. 62; In this case, 'sub' and 'getelementptr' are not guarded by the predicate. 63define void @drop_nonpred_scalar_nuw_nsw(float* noalias nocapture readonly %input, 64 float* %output) local_unnamed_addr #0 { 65; CHECK-LABEL: @drop_nonpred_scalar_nuw_nsw( 66; CHECK: vector.body: 67; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ] 68; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ] 69; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 70; CHECK: [[TMP5:%.*]] = sub i64 [[TMP0]], 1 71; CHECK-NEXT: [[TMP6:%.*]] = getelementptr float, float* [[INPUT:%.*]], i64 [[TMP5]] 72; CHECK-NEXT: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer 73; CHECK-NEXT: [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true> 74; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, float* [[TMP6]], i32 0 75; CHECK-NEXT: [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>* 76; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP9]], i32 4, <4 x i1> [[TMP7]], <4 x float> poison), !invariant.load !0 77entry: 78 br label %loop.header 79 80loop.header: 81 %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] 82 %i27 = sub i64 %iv, 1 83 %i29 = getelementptr float, float* %input, i64 %i27 84 %i23 = icmp eq i64 %iv, 0 85 br i1 %i23, label %if.end, label %if.then 86 87if.then: 88 %i30 = load float, float* %i29, align 4, !invariant.load !0 89 br label %if.end 90 91if.end: 92 %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ] 93 %i35 = getelementptr inbounds float, float* %output, i64 %iv 94 store float %i34, float* %i35, align 4 95 %iv.inc = add nuw nsw i64 %iv, 1 96 %exitcond = icmp eq i64 %iv.inc, 4 97 br i1 %exitcond, label %loop.exit, label %loop.header 98 99loop.exit: 100 ret void 101} 102 103; Preserve poison-generating flags from vector 'sub', 'mul' and 'getelementptr' feeding a masked gather. 104define void @preserve_vector_nuw_nsw(float* noalias nocapture readonly %input, 105 float* %output) local_unnamed_addr #0 { 106; CHECK-LABEL: @preserve_vector_nuw_nsw( 107; CHECK: vector.body: 108; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ] 109; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ] 110; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 111; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer 112; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1> 113; CHECK-NEXT: [[TMP6:%.*]] = mul nuw nsw <4 x i64> [[TMP5]], <i64 2, i64 2, i64 2, i64 2> 114; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds float, float* [[INPUT:%.*]], <4 x i64> [[TMP6]] 115; CHECK-NEXT: [[TMP8:%.*]] = xor <4 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true> 116; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> [[TMP7]], i32 4, <4 x i1> [[TMP8]], <4 x float> undef), !invariant.load !0 117entry: 118 br label %loop.header 119 120loop.header: 121 %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] 122 %i23 = icmp eq i64 %iv, 0 123 br i1 %i23, label %if.end, label %if.then 124 125if.then: 126 %i27 = sub nuw nsw i64 %iv, 1 127 %i28 = mul nuw nsw i64 %i27, 2 128 %i29 = getelementptr inbounds float, float* %input, i64 %i28 129 %i30 = load float, float* %i29, align 4, !invariant.load !0 130 br label %if.end 131 132if.end: 133 %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ] 134 %i35 = getelementptr inbounds float, float* %output, i64 %iv 135 store float %i34, float* %i35, align 4 136 %iv.inc = add nuw nsw i64 %iv, 1 137 %exitcond = icmp eq i64 %iv.inc, 4 138 br i1 %exitcond, label %loop.exit, label %loop.header 139 140loop.exit: 141 ret void 142} 143 144; Drop poison-generating flags from vector 'sub' and 'gep' feeding a masked load. 145define void @drop_vector_nuw_nsw(float* noalias nocapture readonly %input, 146 float* %output, float** noalias %ptrs) local_unnamed_addr #0 { 147; CHECK-LABEL: @drop_vector_nuw_nsw( 148; CHECK: vector.body: 149; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ] 150; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ] 151; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 152; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer 153; CHECK-NEXT: [[TMP5:%.*]] = getelementptr inbounds float*, float** [[PTRS:%.*]], i64 [[TMP0]] 154; CHECK-NEXT: [[TMP6:%.*]] = sub <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1> 155; CHECK-NEXT: [[TMP7:%.*]] = getelementptr float, float* [[INPUT:%.*]], <4 x i64> [[TMP6]] 156; CHECK: [[TMP10:%.*]] = xor <4 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true> 157; CHECK-NEXT: [[TMP11:%.*]] = extractelement <4 x float*> [[TMP7]], i32 0 158; CHECK-NEXT: [[TMP12:%.*]] = getelementptr float, float* [[TMP11]], i32 0 159; CHECK-NEXT: [[TMP13:%.*]] = bitcast float* [[TMP12]] to <4 x float>* 160; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP13]], i32 4, <4 x i1> [[TMP10]], <4 x float> poison), !invariant.load !0 161entry: 162 br label %loop.header 163 164loop.header: 165 %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] 166 %i23 = icmp eq i64 %iv, 0 167 %gep = getelementptr inbounds float*, float** %ptrs, i64 %iv 168 %i27 = sub nuw nsw i64 %iv, 1 169 %i29 = getelementptr inbounds float, float* %input, i64 %i27 170 store float* %i29, float** %gep 171 br i1 %i23, label %if.end, label %if.then 172 173if.then: 174 %i30 = load float, float* %i29, align 4, !invariant.load !0 175 br label %if.end 176 177if.end: 178 %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ] 179 %i35 = getelementptr inbounds float, float* %output, i64 %iv 180 store float %i34, float* %i35, align 4 181 %iv.inc = add nuw nsw i64 %iv, 1 182 %exitcond = icmp eq i64 %iv.inc, 4 183 br i1 %exitcond, label %loop.exit, label %loop.header 184 185loop.exit: 186 ret void 187} 188 189; Preserve poison-generating flags from 'sub', which is not contributing to any address computation 190; of any masked load/store/gather/scatter. 191define void @preserve_nuw_nsw_no_addr(i64* %output) local_unnamed_addr #0 { 192; CHECK-LABEL: @preserve_nuw_nsw_no_addr( 193; CHECK: vector.body: 194; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ] 195; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ] 196; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 197; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer 198; CHECK-NEXT: [[TMP5:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1> 199; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true> 200; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP5]], <4 x i64> zeroinitializer 201; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[OUTPUT:%.*]], i64 [[TMP0]] 202; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, i64* [[TMP7]], i32 0 203; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64* [[TMP8]] to <4 x i64>* 204; CHECK-NEXT: store <4 x i64> [[PREDPHI]], <4 x i64>* [[TMP9]], align 4 205entry: 206 br label %loop.header 207 208loop.header: 209 %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] 210 %i23 = icmp eq i64 %iv, 0 211 br i1 %i23, label %if.end, label %if.then 212 213if.then: 214 %i27 = sub nuw nsw i64 %iv, 1 215 br label %if.end 216 217if.end: 218 %i34 = phi i64 [ 0, %loop.header ], [ %i27, %if.then ] 219 %i35 = getelementptr inbounds i64, i64* %output, i64 %iv 220 store i64 %i34, i64* %i35, align 4 221 %iv.inc = add nuw nsw i64 %iv, 1 222 %exitcond = icmp eq i64 %iv.inc, 4 223 br i1 %exitcond, label %loop.exit, label %loop.header 224 225loop.exit: 226 ret void 227} 228 229; Drop poison-generating flags from 'sdiv' and 'getelementptr' feeding a masked load. 230define void @drop_scalar_exact(float* noalias nocapture readonly %input, 231 float* %output) local_unnamed_addr #0 { 232; CHECK-LABEL: @drop_scalar_exact( 233; CHECK: vector.body: 234; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ] 235; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ] 236; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 237; CHECK: [[TMP4:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer 238; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1> 239; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], zeroinitializer 240; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP4]], [[TMP6]] 241; CHECK-NEXT: [[TMP8:%.*]] = sdiv i64 [[TMP0]], 1 242; CHECK-NEXT: [[TMP9:%.*]] = getelementptr float, float* [[INPUT:%.*]], i64 [[TMP8]] 243; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true> 244; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, float* [[TMP9]], i32 0 245; CHECK-NEXT: [[TMP12:%.*]] = bitcast float* [[TMP11]] to <4 x float>* 246; CHECK-NEXT: [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP12]], i32 4, <4 x i1> [[TMP10]], <4 x float> poison), !invariant.load !0 247entry: 248 br label %loop.header 249 250loop.header: 251 %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] 252 %i7 = icmp ne i64 %iv, 0 253 %i8 = and i64 %iv, 1 254 %i9 = icmp eq i64 %i8, 0 255 %i10 = and i1 %i7, %i9 256 br i1 %i10, label %if.end, label %if.then 257 258if.then: 259 %i26 = sdiv exact i64 %iv, 1 260 %i29 = getelementptr inbounds float, float* %input, i64 %i26 261 %i30 = load float, float* %i29, align 4, !invariant.load !0 262 br label %if.end 263 264if.end: 265 %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ] 266 %i35 = getelementptr inbounds float, float* %output, i64 %iv 267 store float %i34, float* %i35, align 4 268 %iv.inc = add nuw nsw i64 %iv, 1 269 %exitcond = icmp eq i64 %iv.inc, 4 270 br i1 %exitcond, label %loop.exit, label %loop.header 271 272loop.exit: 273 ret void 274} 275 276; Preserve poison-generating flags from 'sdiv' and 'getelementptr' feeding a masked gather. 277define void @preserve_vector_exact_no_addr(float* noalias nocapture readonly %input, 278 float* %output) local_unnamed_addr #0 { 279; CHECK-LABEL: @preserve_vector_exact_no_addr( 280; CHECK: vector.body: 281; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ] 282; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ] 283; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 284; CHECK: [[TMP4:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer 285; CHECK-NEXT: [[TMP5:%.*]] = and <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1> 286; CHECK-NEXT: [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], zeroinitializer 287; CHECK-NEXT: [[TMP7:%.*]] = and <4 x i1> [[TMP4]], [[TMP6]] 288; CHECK-NEXT: [[TMP8:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2> 289; CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds float, float* [[INPUT:%.*]], <4 x i64> [[TMP8]] 290; CHECK-NEXT: [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true> 291; CHECK-NEXT: [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> [[TMP9]], i32 4, <4 x i1> [[TMP10]], <4 x float> undef), !invariant.load !0 292; 293entry: 294 br label %loop.header 295 296loop.header: 297 %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] 298 %i7 = icmp ne i64 %iv, 0 299 %i8 = and i64 %iv, 1 300 %i9 = icmp eq i64 %i8, 0 301 %i10 = and i1 %i7, %i9 302 br i1 %i10, label %if.end, label %if.then 303 304if.then: 305 %i26 = sdiv exact i64 %iv, 2 306 %i29 = getelementptr inbounds float, float* %input, i64 %i26 307 %i30 = load float, float* %i29, align 4, !invariant.load !0 308 br label %if.end 309 310if.end: 311 %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ] 312 %i35 = getelementptr inbounds float, float* %output, i64 %iv 313 store float %i34, float* %i35, align 4 314 %iv.inc = add nuw nsw i64 %iv, 1 315 %exitcond = icmp eq i64 %iv.inc, 4 316 br i1 %exitcond, label %loop.exit, label %loop.header 317 318loop.exit: 319 ret void 320} 321 322; Preserve poison-generating flags from 'sdiv', which is not contributing to any address computation 323; of any masked load/store/gather/scatter. 324define void @preserve_exact_no_addr(i64* %output) local_unnamed_addr #0 { 325; CHECK-LABEL: @preserve_exact_no_addr( 326; CHECK: vector.body: 327; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ] 328; CHECK-NEXT: [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ] 329; CHECK-NEXT: [[TMP0:%.*]] = add i64 [[INDEX]], 0 330; CHECK: [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer 331; CHECK-NEXT: [[TMP5:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2> 332; CHECK-NEXT: [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true> 333; CHECK-NEXT: [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP5]], <4 x i64> zeroinitializer 334; CHECK-NEXT: [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[OUTPUT:%.*]], i64 [[TMP0]] 335; CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds i64, i64* [[TMP7]], i32 0 336; CHECK-NEXT: [[TMP9:%.*]] = bitcast i64* [[TMP8]] to <4 x i64>* 337; CHECK-NEXT: store <4 x i64> [[PREDPHI]], <4 x i64>* [[TMP9]], align 4 338entry: 339 br label %loop.header 340 341loop.header: 342 %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] 343 %i23 = icmp eq i64 %iv, 0 344 br i1 %i23, label %if.end, label %if.then 345 346if.then: 347 %i27 = sdiv exact i64 %iv, 2 348 br label %if.end 349 350if.end: 351 %i34 = phi i64 [ 0, %loop.header ], [ %i27, %if.then ] 352 %i35 = getelementptr inbounds i64, i64* %output, i64 %iv 353 store i64 %i34, i64* %i35, align 4 354 %iv.inc = add nuw nsw i64 %iv, 1 355 %exitcond = icmp eq i64 %iv.inc, 4 356 br i1 %exitcond, label %loop.exit, label %loop.header 357 358loop.exit: 359 ret void 360} 361 362; Make sure we don't vectorize a loop with a phi feeding a poison value to 363; a masked load/gather. 364define void @dont_vectorize_poison_phi(float* noalias nocapture readonly %input, 365; CHECK-LABEL: @dont_vectorize_poison_phi( 366; CHECK-NEXT: entry: 367; CHECK-NEXT: br label [[LOOP_HEADER:%.*]] 368; CHECK: loop.header: 369; CHECK-NEXT: [[POISON:%.*]] = phi i64 [ poison, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[IF_END:%.*]] ] 370; CHECK-NEXT: [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_INC]], [[IF_END]] ] 371; CHECK-NEXT: [[I23:%.*]] = icmp eq i64 [[IV]], 0 372; CHECK-NEXT: br i1 [[I23]], label [[IF_END]], label [[IF_THEN:%.*]] 373; CHECK: if.then: 374; CHECK-NEXT: [[I29:%.*]] = getelementptr inbounds float, float* [[INPUT:%.*]], i64 [[POISON]] 375; CHECK-NEXT: [[I30:%.*]] = load float, float* [[I29]], align 4, !invariant.load !0 376; CHECK-NEXT: br label [[IF_END]] 377; CHECK: if.end: 378; CHECK-NEXT: [[I34:%.*]] = phi float [ 0.000000e+00, [[LOOP_HEADER]] ], [ [[I30]], [[IF_THEN]] ] 379; CHECK-NEXT: [[I35:%.*]] = getelementptr inbounds float, float* [[OUTPUT:%.*]], i64 [[IV]] 380; CHECK-NEXT: store float [[I34]], float* [[I35]], align 4 381; CHECK-NEXT: [[IV_INC]] = add nuw nsw i64 [[IV]], 1 382; CHECK-NEXT: [[EXITCOND:%.*]] = icmp eq i64 [[IV_INC]], 4 383; CHECK-NEXT: br i1 [[EXITCOND]], label [[LOOP_EXIT:%.*]], label [[LOOP_HEADER]] 384; CHECK: loop.exit: 385; CHECK-NEXT: ret void 386; 387 float* %output) local_unnamed_addr #0 { 388entry: 389 br label %loop.header 390 391loop.header: 392 %poison = phi i64 [ poison, %entry ], [ %iv.inc, %if.end ] 393 %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ] 394 %i23 = icmp eq i64 %iv, 0 395 br i1 %i23, label %if.end, label %if.then 396 397if.then: 398 %i29 = getelementptr inbounds float, float* %input, i64 %poison 399 %i30 = load float, float* %i29, align 4, !invariant.load !0 400 br label %if.end 401 402if.end: 403 %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ] 404 %i35 = getelementptr inbounds float, float* %output, i64 %iv 405 store float %i34, float* %i35, align 4 406 %iv.inc = add nuw nsw i64 %iv, 1 407 %exitcond = icmp eq i64 %iv.inc, 4 408 br i1 %exitcond, label %loop.exit, label %loop.header 409 410loop.exit: 411 ret void 412} 413 414attributes #0 = { noinline nounwind uwtable "target-features"="+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl" } 415 416!0 = !{} 417