1; RUN: opt %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s
2
3; Make sure that integer poison-generating flags (i.e., nuw/nsw, exact and inbounds)
4; are dropped from instructions in blocks that need predication and are linearized
5; and masked after vectorization. We only drop flags from scalar instructions that
6; contribute to the address computation of a masked vector load/store. After
7; linearizing the control flow and removing their guarding condition, these
8; instructions could generate a poison value which would be used as base address of
9; the masked vector load/store (see PR52111). For gather/scatter cases,
10; posiong-generating flags can be preserved since poison addresses in the vector GEP
11; reaching the gather/scatter instruction will be masked-out by the gather/scatter
12; instruction itself and won't be used.
13; We need AVX512 target features for the loop to be vectorized with masks instead of
14; predicates.
15
16target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
17target triple = "x86_64-pc-linux-gnu"
18
19; Drop poison-generating flags from 'sub' and 'getelementptr' feeding a masked load.
20; Test for PR52111.
21define void @drop_scalar_nuw_nsw(float* noalias nocapture readonly %input,
22                                 float* %output) local_unnamed_addr #0 {
23; CHECK-LABEL: @drop_scalar_nuw_nsw(
24; CHECK:       vector.body:
25; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
26; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
27; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
28; CHECK:         [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
29; CHECK-NEXT:    [[TMP5:%.*]] = sub i64 [[TMP0]], 1
30; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr float, float* [[INPUT:%.*]], i64 [[TMP5]]
31; CHECK-NEXT:    [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true>
32; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr float, float* [[TMP6]], i32 0
33; CHECK-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
34; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP9]], i32 4, <4 x i1> [[TMP7]], <4 x float> poison), !invariant.load !0
35entry:
36  br label %loop.header
37
38loop.header:
39  %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
40  %i23 = icmp eq i64 %iv, 0
41  br i1 %i23, label %if.end, label %if.then
42
43if.then:
44  %i27 = sub nuw nsw i64 %iv, 1
45  %i29 = getelementptr inbounds float, float* %input, i64 %i27
46  %i30 = load float, float* %i29, align 4, !invariant.load !0
47  br label %if.end
48
49if.end:
50  %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
51  %i35 = getelementptr inbounds float, float* %output, i64 %iv
52  store float %i34, float* %i35, align 4
53  %iv.inc = add nuw nsw i64 %iv, 1
54  %exitcond = icmp eq i64 %iv.inc, 4
55  br i1 %exitcond, label %loop.exit, label %loop.header
56
57loop.exit:
58  ret void
59}
60
61; Drop poison-generating flags from 'sub' and 'getelementptr' feeding a masked load.
62; In this case, 'sub' and 'getelementptr' are not guarded by the predicate.
63define void @drop_nonpred_scalar_nuw_nsw(float* noalias nocapture readonly %input,
64                                         float* %output) local_unnamed_addr #0 {
65; CHECK-LABEL: @drop_nonpred_scalar_nuw_nsw(
66; CHECK:       vector.body:
67; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
68; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
69; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
70; CHECK:         [[TMP5:%.*]] = sub i64 [[TMP0]], 1
71; CHECK-NEXT:    [[TMP6:%.*]] = getelementptr float, float* [[INPUT:%.*]], i64 [[TMP5]]
72; CHECK-NEXT:    [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
73; CHECK-NEXT:    [[TMP7:%.*]] = xor <4 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true>
74; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr float, float* [[TMP6]], i32 0
75; CHECK-NEXT:    [[TMP9:%.*]] = bitcast float* [[TMP8]] to <4 x float>*
76; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP9]], i32 4, <4 x i1> [[TMP7]], <4 x float> poison), !invariant.load !0
77entry:
78  br label %loop.header
79
80loop.header:
81  %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
82  %i27 = sub i64 %iv, 1
83  %i29 = getelementptr float, float* %input, i64 %i27
84  %i23 = icmp eq i64 %iv, 0
85  br i1 %i23, label %if.end, label %if.then
86
87if.then:
88  %i30 = load float, float* %i29, align 4, !invariant.load !0
89  br label %if.end
90
91if.end:
92  %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
93  %i35 = getelementptr inbounds float, float* %output, i64 %iv
94  store float %i34, float* %i35, align 4
95  %iv.inc = add nuw nsw i64 %iv, 1
96  %exitcond = icmp eq i64 %iv.inc, 4
97  br i1 %exitcond, label %loop.exit, label %loop.header
98
99loop.exit:
100  ret void
101}
102
103; Preserve poison-generating flags from vector 'sub', 'mul' and 'getelementptr' feeding a masked gather.
104define void @preserve_vector_nuw_nsw(float* noalias nocapture readonly %input,
105                                     float* %output) local_unnamed_addr #0 {
106; CHECK-LABEL: @preserve_vector_nuw_nsw(
107; CHECK:       vector.body:
108; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
109; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
110; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
111; CHECK:         [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
112; CHECK-NEXT:    [[TMP5:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
113; CHECK-NEXT:    [[TMP6:%.*]] = mul nuw nsw <4 x i64> [[TMP5]], <i64 2, i64 2, i64 2, i64 2>
114; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds float, float* [[INPUT:%.*]], <4 x i64> [[TMP6]]
115; CHECK-NEXT:    [[TMP8:%.*]] = xor <4 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true>
116; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> [[TMP7]], i32 4, <4 x i1> [[TMP8]], <4 x float> undef), !invariant.load !0
117entry:
118  br label %loop.header
119
120loop.header:
121  %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
122  %i23 = icmp eq i64 %iv, 0
123  br i1 %i23, label %if.end, label %if.then
124
125if.then:
126  %i27 = sub nuw nsw i64 %iv, 1
127  %i28 = mul nuw nsw i64 %i27, 2
128  %i29 = getelementptr inbounds float, float* %input, i64 %i28
129  %i30 = load float, float* %i29, align 4, !invariant.load !0
130  br label %if.end
131
132if.end:
133  %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
134  %i35 = getelementptr inbounds float, float* %output, i64 %iv
135  store float %i34, float* %i35, align 4
136  %iv.inc = add nuw nsw i64 %iv, 1
137  %exitcond = icmp eq i64 %iv.inc, 4
138  br i1 %exitcond, label %loop.exit, label %loop.header
139
140loop.exit:
141  ret void
142}
143
144; Drop poison-generating flags from vector 'sub' and 'gep' feeding a masked load.
145define void @drop_vector_nuw_nsw(float* noalias nocapture readonly %input,
146                                 float* %output, float** noalias %ptrs) local_unnamed_addr #0 {
147; CHECK-LABEL: @drop_vector_nuw_nsw(
148; CHECK:       vector.body:
149; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
150; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
151; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
152; CHECK:         [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
153; CHECK-NEXT:    [[TMP5:%.*]] = getelementptr inbounds float*, float** [[PTRS:%.*]], i64 [[TMP0]]
154; CHECK-NEXT:    [[TMP6:%.*]] = sub <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
155; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr float, float* [[INPUT:%.*]], <4 x i64> [[TMP6]]
156; CHECK:         [[TMP10:%.*]] = xor <4 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true>
157; CHECK-NEXT:    [[TMP11:%.*]] = extractelement <4 x float*> [[TMP7]], i32 0
158; CHECK-NEXT:    [[TMP12:%.*]] = getelementptr float, float* [[TMP11]], i32 0
159; CHECK-NEXT:    [[TMP13:%.*]] = bitcast float* [[TMP12]] to <4 x float>*
160; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP13]], i32 4, <4 x i1> [[TMP10]], <4 x float> poison), !invariant.load !0
161entry:
162  br label %loop.header
163
164loop.header:
165  %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
166  %i23 = icmp eq i64 %iv, 0
167  %gep = getelementptr inbounds float*, float** %ptrs, i64 %iv
168  %i27 = sub nuw nsw i64 %iv, 1
169  %i29 = getelementptr inbounds float, float* %input, i64 %i27
170  store float* %i29, float** %gep
171  br i1 %i23, label %if.end, label %if.then
172
173if.then:
174  %i30 = load float, float* %i29, align 4, !invariant.load !0
175  br label %if.end
176
177if.end:
178  %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
179  %i35 = getelementptr inbounds float, float* %output, i64 %iv
180  store float %i34, float* %i35, align 4
181  %iv.inc = add nuw nsw i64 %iv, 1
182  %exitcond = icmp eq i64 %iv.inc, 4
183  br i1 %exitcond, label %loop.exit, label %loop.header
184
185loop.exit:
186  ret void
187}
188
189; Preserve poison-generating flags from 'sub', which is not contributing to any address computation
190; of any masked load/store/gather/scatter.
191define void @preserve_nuw_nsw_no_addr(i64* %output) local_unnamed_addr #0 {
192; CHECK-LABEL: @preserve_nuw_nsw_no_addr(
193; CHECK:       vector.body:
194; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
195; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
196; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
197; CHECK:         [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
198; CHECK-NEXT:    [[TMP5:%.*]] = sub nuw nsw <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
199; CHECK-NEXT:    [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true>
200; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP5]], <4 x i64> zeroinitializer
201; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[OUTPUT:%.*]], i64 [[TMP0]]
202; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, i64* [[TMP7]], i32 0
203; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64* [[TMP8]] to <4 x i64>*
204; CHECK-NEXT:    store <4 x i64> [[PREDPHI]], <4 x i64>* [[TMP9]], align 4
205entry:
206  br label %loop.header
207
208loop.header:
209  %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
210  %i23 = icmp eq i64 %iv, 0
211  br i1 %i23, label %if.end, label %if.then
212
213if.then:
214  %i27 = sub nuw nsw i64 %iv, 1
215  br label %if.end
216
217if.end:
218  %i34 = phi i64 [ 0, %loop.header ], [ %i27, %if.then ]
219  %i35 = getelementptr inbounds i64, i64* %output, i64 %iv
220  store i64 %i34, i64* %i35, align 4
221  %iv.inc = add nuw nsw i64 %iv, 1
222  %exitcond = icmp eq i64 %iv.inc, 4
223  br i1 %exitcond, label %loop.exit, label %loop.header
224
225loop.exit:
226  ret void
227}
228
229; Drop poison-generating flags from 'sdiv' and 'getelementptr' feeding a masked load.
230define void @drop_scalar_exact(float* noalias nocapture readonly %input,
231                               float* %output) local_unnamed_addr #0 {
232; CHECK-LABEL: @drop_scalar_exact(
233; CHECK:       vector.body:
234; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
235; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
236; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
237; CHECK:         [[TMP4:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer
238; CHECK-NEXT:    [[TMP5:%.*]] = and <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
239; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], zeroinitializer
240; CHECK-NEXT:    [[TMP7:%.*]] = and <4 x i1> [[TMP4]], [[TMP6]]
241; CHECK-NEXT:    [[TMP8:%.*]] = sdiv i64 [[TMP0]], 1
242; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr float, float* [[INPUT:%.*]], i64 [[TMP8]]
243; CHECK-NEXT:    [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true>
244; CHECK-NEXT:    [[TMP11:%.*]] = getelementptr float, float* [[TMP9]], i32 0
245; CHECK-NEXT:    [[TMP12:%.*]] = bitcast float* [[TMP11]] to <4 x float>*
246; CHECK-NEXT:    [[WIDE_MASKED_LOAD:%.*]] = call <4 x float> @llvm.masked.load.v4f32.p0v4f32(<4 x float>* [[TMP12]], i32 4, <4 x i1> [[TMP10]], <4 x float> poison), !invariant.load !0
247entry:
248  br label %loop.header
249
250loop.header:
251  %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
252  %i7 = icmp ne i64 %iv, 0
253  %i8 = and i64 %iv, 1
254  %i9 = icmp eq i64 %i8, 0
255  %i10 = and i1 %i7, %i9
256  br i1 %i10, label %if.end, label %if.then
257
258if.then:
259  %i26 = sdiv exact i64 %iv, 1
260  %i29 = getelementptr inbounds float, float* %input, i64 %i26
261  %i30 = load float, float* %i29, align 4, !invariant.load !0
262  br label %if.end
263
264if.end:
265  %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
266  %i35 = getelementptr inbounds float, float* %output, i64 %iv
267  store float %i34, float* %i35, align 4
268  %iv.inc = add nuw nsw i64 %iv, 1
269  %exitcond = icmp eq i64 %iv.inc, 4
270  br i1 %exitcond, label %loop.exit, label %loop.header
271
272loop.exit:
273  ret void
274}
275
276; Preserve poison-generating flags from 'sdiv' and 'getelementptr' feeding a masked gather.
277define void @preserve_vector_exact_no_addr(float* noalias nocapture readonly %input,
278                                           float* %output) local_unnamed_addr #0 {
279; CHECK-LABEL: @preserve_vector_exact_no_addr(
280; CHECK:       vector.body:
281; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
282; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
283; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
284; CHECK:         [[TMP4:%.*]] = icmp ne <4 x i64> [[VEC_IND]], zeroinitializer
285; CHECK-NEXT:    [[TMP5:%.*]] = and <4 x i64> [[VEC_IND]], <i64 1, i64 1, i64 1, i64 1>
286; CHECK-NEXT:    [[TMP6:%.*]] = icmp eq <4 x i64> [[TMP5]], zeroinitializer
287; CHECK-NEXT:    [[TMP7:%.*]] = and <4 x i1> [[TMP4]], [[TMP6]]
288; CHECK-NEXT:    [[TMP8:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
289; CHECK-NEXT:    [[TMP9:%.*]] = getelementptr inbounds float, float* [[INPUT:%.*]], <4 x i64> [[TMP8]]
290; CHECK-NEXT:    [[TMP10:%.*]] = xor <4 x i1> [[TMP7]], <i1 true, i1 true, i1 true, i1 true>
291; CHECK-NEXT:    [[WIDE_MASKED_GATHER:%.*]] = call <4 x float> @llvm.masked.gather.v4f32.v4p0f32(<4 x float*> [[TMP9]], i32 4, <4 x i1> [[TMP10]], <4 x float> undef), !invariant.load !0
292;
293entry:
294  br label %loop.header
295
296loop.header:
297  %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
298  %i7 = icmp ne i64 %iv, 0
299  %i8 = and i64 %iv, 1
300  %i9 = icmp eq i64 %i8, 0
301  %i10 = and i1 %i7, %i9
302  br i1 %i10, label %if.end, label %if.then
303
304if.then:
305  %i26 = sdiv exact i64 %iv, 2
306  %i29 = getelementptr inbounds float, float* %input, i64 %i26
307  %i30 = load float, float* %i29, align 4, !invariant.load !0
308  br label %if.end
309
310if.end:
311  %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
312  %i35 = getelementptr inbounds float, float* %output, i64 %iv
313  store float %i34, float* %i35, align 4
314  %iv.inc = add nuw nsw i64 %iv, 1
315  %exitcond = icmp eq i64 %iv.inc, 4
316  br i1 %exitcond, label %loop.exit, label %loop.header
317
318loop.exit:
319  ret void
320}
321
322; Preserve poison-generating flags from 'sdiv', which is not contributing to any address computation
323; of any masked load/store/gather/scatter.
324define void @preserve_exact_no_addr(i64* %output) local_unnamed_addr #0 {
325; CHECK-LABEL: @preserve_exact_no_addr(
326; CHECK:       vector.body:
327; CHECK-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, {{.*}} ]
328; CHECK-NEXT:    [[VEC_IND:%.*]] = phi <4 x i64> [ <i64 0, i64 1, i64 2, i64 3>, {{.*}} ]
329; CHECK-NEXT:    [[TMP0:%.*]] = add i64 [[INDEX]], 0
330; CHECK:         [[TMP4:%.*]] = icmp eq <4 x i64> [[VEC_IND]], zeroinitializer
331; CHECK-NEXT:    [[TMP5:%.*]] = sdiv exact <4 x i64> [[VEC_IND]], <i64 2, i64 2, i64 2, i64 2>
332; CHECK-NEXT:    [[TMP6:%.*]] = xor <4 x i1> [[TMP4]], <i1 true, i1 true, i1 true, i1 true>
333; CHECK-NEXT:    [[PREDPHI:%.*]] = select <4 x i1> [[TMP6]], <4 x i64> [[TMP5]], <4 x i64> zeroinitializer
334; CHECK-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i64, i64* [[OUTPUT:%.*]], i64 [[TMP0]]
335; CHECK-NEXT:    [[TMP8:%.*]] = getelementptr inbounds i64, i64* [[TMP7]], i32 0
336; CHECK-NEXT:    [[TMP9:%.*]] = bitcast i64* [[TMP8]] to <4 x i64>*
337; CHECK-NEXT:    store <4 x i64> [[PREDPHI]], <4 x i64>* [[TMP9]], align 4
338entry:
339  br label %loop.header
340
341loop.header:
342  %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
343  %i23 = icmp eq i64 %iv, 0
344  br i1 %i23, label %if.end, label %if.then
345
346if.then:
347  %i27 = sdiv exact i64 %iv, 2
348  br label %if.end
349
350if.end:
351  %i34 = phi i64 [ 0, %loop.header ], [ %i27, %if.then ]
352  %i35 = getelementptr inbounds i64, i64* %output, i64 %iv
353  store i64 %i34, i64* %i35, align 4
354  %iv.inc = add nuw nsw i64 %iv, 1
355  %exitcond = icmp eq i64 %iv.inc, 4
356  br i1 %exitcond, label %loop.exit, label %loop.header
357
358loop.exit:
359  ret void
360}
361
362; Make sure we don't vectorize a loop with a phi feeding a poison value to
363; a masked load/gather.
364define void @dont_vectorize_poison_phi(float* noalias nocapture readonly %input,
365; CHECK-LABEL: @dont_vectorize_poison_phi(
366; CHECK-NEXT:  entry:
367; CHECK-NEXT:    br label [[LOOP_HEADER:%.*]]
368; CHECK:       loop.header:
369; CHECK-NEXT:    [[POISON:%.*]] = phi i64 [ poison, [[ENTRY:%.*]] ], [ [[IV_INC:%.*]], [[IF_END:%.*]] ]
370; CHECK-NEXT:    [[IV:%.*]] = phi i64 [ 0, [[ENTRY]] ], [ [[IV_INC]], [[IF_END]] ]
371; CHECK-NEXT:    [[I23:%.*]] = icmp eq i64 [[IV]], 0
372; CHECK-NEXT:    br i1 [[I23]], label [[IF_END]], label [[IF_THEN:%.*]]
373; CHECK:       if.then:
374; CHECK-NEXT:    [[I29:%.*]] = getelementptr inbounds float, float* [[INPUT:%.*]], i64 [[POISON]]
375; CHECK-NEXT:    [[I30:%.*]] = load float, float* [[I29]], align 4, !invariant.load !0
376; CHECK-NEXT:    br label [[IF_END]]
377; CHECK:       if.end:
378; CHECK-NEXT:    [[I34:%.*]] = phi float [ 0.000000e+00, [[LOOP_HEADER]] ], [ [[I30]], [[IF_THEN]] ]
379; CHECK-NEXT:    [[I35:%.*]] = getelementptr inbounds float, float* [[OUTPUT:%.*]], i64 [[IV]]
380; CHECK-NEXT:    store float [[I34]], float* [[I35]], align 4
381; CHECK-NEXT:    [[IV_INC]] = add nuw nsw i64 [[IV]], 1
382; CHECK-NEXT:    [[EXITCOND:%.*]] = icmp eq i64 [[IV_INC]], 4
383; CHECK-NEXT:    br i1 [[EXITCOND]], label [[LOOP_EXIT:%.*]], label [[LOOP_HEADER]]
384; CHECK:       loop.exit:
385; CHECK-NEXT:    ret void
386;
387  float* %output) local_unnamed_addr #0 {
388entry:
389  br label %loop.header
390
391loop.header:
392  %poison = phi i64 [ poison, %entry ], [ %iv.inc, %if.end ]
393  %iv = phi i64 [ 0, %entry ], [ %iv.inc, %if.end ]
394  %i23 = icmp eq i64 %iv, 0
395  br i1 %i23, label %if.end, label %if.then
396
397if.then:
398  %i29 = getelementptr inbounds float, float* %input, i64 %poison
399  %i30 = load float, float* %i29, align 4, !invariant.load !0
400  br label %if.end
401
402if.end:
403  %i34 = phi float [ 0.000000e+00, %loop.header ], [ %i30, %if.then ]
404  %i35 = getelementptr inbounds float, float* %output, i64 %iv
405  store float %i34, float* %i35, align 4
406  %iv.inc = add nuw nsw i64 %iv, 1
407  %exitcond = icmp eq i64 %iv.inc, 4
408  br i1 %exitcond, label %loop.exit, label %loop.header
409
410loop.exit:
411  ret void
412}
413
414attributes #0 = { noinline nounwind uwtable "target-features"="+avx512bw,+avx512cd,+avx512dq,+avx512f,+avx512vl" }
415
416!0 = !{}
417