1; REQUIRES: asserts
2; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -instcombine -debug-only=loop-vectorize -disable-output -print-after=instcombine -enable-new-pm=0 2>&1 | FileCheck %s
3; RUN: opt < %s -loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses -instcombine -debug-only=loop-vectorize -disable-output -print-after=instcombine -enable-new-pm=0 2>&1 | FileCheck %s --check-prefix=INTER
4; RUN: opt < %s -passes=loop-vectorize,instcombine -force-vector-width=4 -force-vector-interleave=1 -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s
5; RUN: opt < %s -passes=loop-vectorize,instcombine -force-vector-width=4 -force-vector-interleave=1 -enable-interleaved-mem-accesses -debug-only=loop-vectorize -disable-output -print-after=instcombine 2>&1 | FileCheck %s --check-prefix=INTER
6
7target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
8
9%pair = type { i32, i32 }
10
11; CHECK-LABEL: consecutive_ptr_forward
12;
13; Check that a forward consecutive pointer is recognized as uniform and remains
14; uniform after vectorization.
15;
16; CHECK:     LV: Found uniform instruction: %tmp1 = getelementptr inbounds i32, i32* %a, i64 %i
17; CHECK:     vector.body
18; CHECK:       %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
19; CHECK-NOT:   getelementptr
20; CHECK:       getelementptr inbounds i32, i32* %a, i64 %index
21; CHECK-NOT:   getelementptr
22; CHECK:       br i1 {{.*}}, label %middle.block, label %vector.body
23;
24define i32 @consecutive_ptr_forward(i32* %a, i64 %n) {
25entry:
26  br label %for.body
27
28for.body:
29  %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
30  %tmp0 = phi i32 [ %tmp3, %for.body ], [ 0, %entry ]
31  %tmp1 = getelementptr inbounds i32, i32* %a, i64 %i
32  %tmp2 = load i32, i32* %tmp1, align 8
33  %tmp3 = add i32 %tmp0, %tmp2
34  %i.next = add nuw nsw i64 %i, 1
35  %cond = icmp slt i64 %i.next, %n
36  br i1 %cond, label %for.body, label %for.end
37
38for.end:
39  %tmp4 = phi i32 [ %tmp3, %for.body ]
40  ret i32 %tmp4
41}
42
43; CHECK-LABEL: consecutive_ptr_reverse
44;
45; Check that a reverse consecutive pointer is recognized as uniform and remains
46; uniform after vectorization.
47;
48; CHECK:     LV: Found uniform instruction: %tmp1 = getelementptr inbounds i32, i32* %a, i64 %i
49; CHECK:     vector.body
50; CHECK:       %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
51; CHECK:       %offset.idx = sub i64 %n, %index
52; CHECK-NOT:   getelementptr
53; CHECK:       %[[G0:.+]] = getelementptr i32, i32* %a, i64 -3
54; CHECK:       getelementptr i32, i32* %[[G0]], i64 %offset.idx
55; CHECK-NOT:   getelementptr
56; CHECK:       br i1 {{.*}}, label %middle.block, label %vector.body
57;
58define i32 @consecutive_ptr_reverse(i32* %a, i64 %n) {
59entry:
60  br label %for.body
61
62for.body:
63  %i = phi i64 [ %i.next, %for.body ], [ %n, %entry ]
64  %tmp0 = phi i32 [ %tmp3, %for.body ], [ 0, %entry ]
65  %tmp1 = getelementptr inbounds i32, i32* %a, i64 %i
66  %tmp2 = load i32, i32* %tmp1, align 8
67  %tmp3 = add i32 %tmp0, %tmp2
68  %i.next = add nsw i64 %i, -1
69  %cond = icmp sgt i64 %i.next, 0
70  br i1 %cond, label %for.body, label %for.end
71
72for.end:
73  %tmp4 = phi i32 [ %tmp3, %for.body ]
74  ret i32 %tmp4
75}
76
77; CHECK-LABEL: interleaved_access_forward
78; INTER-LABEL: interleaved_access_forward
79;
80; Check that a consecutive-like pointer used by a forward interleaved group is
81; recognized as uniform and remains uniform after vectorization. When
82; interleaved memory accesses aren't enabled, the pointer should not be
83; recognized as uniform, and it should not be uniform after vectorization.
84;
85; CHECK-NOT: LV: Found uniform instruction: %tmp1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0
86; CHECK-NOT: LV: Found uniform instruction: %tmp2 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
87; CHECK:     vector.body
88; CHECK:       %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
89; CHECK:       %[[I1:.+]] = or i64 %index, 1
90; CHECK:       %[[I2:.+]] = or i64 %index, 2
91; CHECK:       %[[I3:.+]] = or i64 %index, 3
92; CHECK:       getelementptr inbounds %pair, %pair* %p, i64 %index, i32 0
93; CHECK:       getelementptr inbounds %pair, %pair* %p, i64 %[[I1]], i32 0
94; CHECK:       getelementptr inbounds %pair, %pair* %p, i64 %[[I2]], i32 0
95; CHECK:       getelementptr inbounds %pair, %pair* %p, i64 %[[I3]], i32 0
96; CHECK:       getelementptr inbounds %pair, %pair* %p, i64 %index, i32 1
97; CHECK:       getelementptr inbounds %pair, %pair* %p, i64 %[[I1]], i32 1
98; CHECK:       getelementptr inbounds %pair, %pair* %p, i64 %[[I2]], i32 1
99; CHECK:       getelementptr inbounds %pair, %pair* %p, i64 %[[I3]], i32 1
100; CHECK:       br i1 {{.*}}, label %middle.block, label %vector.body
101;
102; INTER:     LV: Found uniform instruction: %tmp1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0
103; INTER:     LV: Found uniform instruction: %tmp2 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
104; INTER:     vector.body
105; INTER:       %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
106; INTER-NOT:   getelementptr
107; INTER:       getelementptr inbounds %pair, %pair* %p, i64 %index, i32 0
108; INTER-NOT:   getelementptr
109; INTER:       br i1 {{.*}}, label %middle.block, label %vector.body
110;
111define i32 @interleaved_access_forward(%pair* %p, i64 %n) {
112entry:
113  br label %for.body
114
115for.body:
116  %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
117  %tmp0 = phi i32 [ %tmp6, %for.body ], [ 0, %entry ]
118  %tmp1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0
119  %tmp2 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
120  %tmp3 = load i32, i32* %tmp1, align 8
121  %tmp4 = load i32, i32* %tmp2, align 8
122  %tmp5 = add i32 %tmp3, %tmp4
123  %tmp6 = add i32 %tmp0, %tmp5
124  %i.next = add nuw nsw i64 %i, 1
125  %cond = icmp slt i64 %i.next, %n
126  br i1 %cond, label %for.body, label %for.end
127
128for.end:
129  %tmp14 = phi i32 [ %tmp6, %for.body ]
130  ret i32 %tmp14
131}
132
133; CHECK-LABEL: interleaved_access_reverse
134; INTER-LABEL: interleaved_access_reverse
135;
136; Check that a consecutive-like pointer used by a reverse interleaved group is
137; recognized as uniform and remains uniform after vectorization. When
138; interleaved memory accesses aren't enabled, the pointer should not be
139; recognized as uniform, and it should not be uniform after vectorization.
140;
141; recognized as uniform, and it should not be uniform after vectorization.
142; CHECK-NOT: LV: Found uniform instruction: %tmp1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0
143; CHECK-NOT: LV: Found uniform instruction: %tmp2 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
144; CHECK:     vector.body
145; CHECK:       %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
146; CHECK:       %offset.idx = sub i64 %n, %index
147; CHECK:       %[[I1:.+]] = add i64 %offset.idx, -1
148; CHECK:       %[[I2:.+]] = add i64 %offset.idx, -2
149; CHECK:       %[[I3:.+]] = add i64 %offset.idx, -3
150; CHECK:       getelementptr inbounds %pair, %pair* %p, i64 %offset.idx, i32 0
151; CHECK:       getelementptr inbounds %pair, %pair* %p, i64 %[[I1]], i32 0
152; CHECK:       getelementptr inbounds %pair, %pair* %p, i64 %[[I2]], i32 0
153; CHECK:       getelementptr inbounds %pair, %pair* %p, i64 %[[I3]], i32 0
154; CHECK:       getelementptr inbounds %pair, %pair* %p, i64 %offset.idx, i32 1
155; CHECK:       getelementptr inbounds %pair, %pair* %p, i64 %[[I1]], i32 1
156; CHECK:       getelementptr inbounds %pair, %pair* %p, i64 %[[I2]], i32 1
157; CHECK:       getelementptr inbounds %pair, %pair* %p, i64 %[[I3]], i32 1
158; CHECK:       br i1 {{.*}}, label %middle.block, label %vector.body
159;
160; INTER:     LV: Found uniform instruction: %tmp1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0
161; INTER:     LV: Found uniform instruction: %tmp2 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
162; INTER:     vector.body
163; INTER:       %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
164; INTER:       %offset.idx = sub i64 %n, %index
165; INTER-NOT:   getelementptr
166; INTER:       %[[G0:.+]] = getelementptr inbounds %pair, %pair* %p, i64 %offset.idx, i32 0
167; INTER:       getelementptr inbounds i32, i32* %[[G0]], i64 -6
168; INTER-NOT:   getelementptr
169; INTER:       br i1 {{.*}}, label %middle.block, label %vector.body
170;
171define i32 @interleaved_access_reverse(%pair* %p, i64 %n) {
172entry:
173  br label %for.body
174
175for.body:
176  %i = phi i64 [ %i.next, %for.body ], [ %n, %entry ]
177  %tmp0 = phi i32 [ %tmp6, %for.body ], [ 0, %entry ]
178  %tmp1 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0
179  %tmp2 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 1
180  %tmp3 = load i32, i32* %tmp1, align 8
181  %tmp4 = load i32, i32* %tmp2, align 8
182  %tmp5 = add i32 %tmp3, %tmp4
183  %tmp6 = add i32 %tmp0, %tmp5
184  %i.next = add nsw i64 %i, -1
185  %cond = icmp sgt i64 %i.next, 0
186  br i1 %cond, label %for.body, label %for.end
187
188for.end:
189  %tmp14 = phi i32 [ %tmp6, %for.body ]
190  ret i32 %tmp14
191}
192
193; INTER-LABEL: predicated_store
194;
195; Check that a consecutive-like pointer used by a forward interleaved group and
196; scalarized store is not recognized as uniform and is not uniform after
197; vectorization. The store is scalarized because it's in a predicated block.
198; Even though the load in this example is vectorized and only uses the pointer
199; as if it were uniform, the store is scalarized, making the pointer
200; non-uniform.
201;
202; INTER-NOT: LV: Found uniform instruction: %tmp0 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0
203; INTER:     vector.body
204; INTER:       %index = phi i64 [ 0, %vector.ph ], [ %index.next, {{.*}} ]
205; INTER:       %[[G0:.+]] = getelementptr inbounds %pair, %pair* %p, i64 %index, i32 0
206; INTER:       %[[B0:.+]] = bitcast i32* %[[G0]] to <8 x i32>*
207; INTER:       %wide.vec = load <8 x i32>, <8 x i32>* %[[B0]], align 8
208; INTER:       %[[I1:.+]] = or i64 %index, 1
209; INTER:       getelementptr inbounds %pair, %pair* %p, i64 %[[I1]], i32 0
210; INTER:       %[[I2:.+]] = or i64 %index, 2
211; INTER:       getelementptr inbounds %pair, %pair* %p, i64 %[[I2]], i32 0
212; INTER:       %[[I3:.+]] = or i64 %index, 3
213; INTER:       getelementptr inbounds %pair, %pair* %p, i64 %[[I3]], i32 0
214; INTER:       br i1 {{.*}}, label %middle.block, label %vector.body
215;
216define void @predicated_store(%pair *%p, i32 %x, i64 %n) {
217entry:
218  br label %for.body
219
220for.body:
221  %i  = phi i64 [ %i.next, %if.merge ], [ 0, %entry ]
222  %tmp0 = getelementptr inbounds %pair, %pair* %p, i64 %i, i32 0
223  %tmp1 = load i32, i32* %tmp0, align 8
224  %tmp2 = icmp eq i32 %tmp1, %x
225  br i1 %tmp2, label %if.then, label %if.merge
226
227if.then:
228  store i32 %tmp1, i32* %tmp0, align 8
229  br label %if.merge
230
231if.merge:
232  %i.next = add nuw nsw i64 %i, 1
233  %cond = icmp slt i64 %i.next, %n
234  br i1 %cond, label %for.body, label %for.end
235
236for.end:
237  ret void
238}
239
240; CHECK-LABEL: irregular_type
241;
242; Check that a consecutive pointer used by a scalarized store is not recognized
243; as uniform and is not uniform after vectorization. The store is scalarized
244; because the stored type may required padding.
245;
246; CHECK-NOT: LV: Found uniform instruction: %tmp1 = getelementptr inbounds x86_fp80, x86_fp80* %a, i64 %i
247; CHECK:     vector.body
248; CHECK:       %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
249; CHECK:       %[[I1:.+]] = or i64 %index, 1
250; CHECK:       %[[I2:.+]] = or i64 %index, 2
251; CHECK:       %[[I3:.+]] = or i64 %index, 3
252; CHECK:       getelementptr inbounds x86_fp80, x86_fp80* %a, i64 %index
253; CHECK:       getelementptr inbounds x86_fp80, x86_fp80* %a, i64 %[[I1]]
254; CHECK:       getelementptr inbounds x86_fp80, x86_fp80* %a, i64 %[[I2]]
255; CHECK:       getelementptr inbounds x86_fp80, x86_fp80* %a, i64 %[[I3]]
256; CHECK:       br i1 {{.*}}, label %middle.block, label %vector.body
257;
258define void @irregular_type(x86_fp80* %a, i64 %n) {
259entry:
260  br label %for.body
261
262for.body:
263  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
264  %tmp0 = sitofp i32 1 to x86_fp80
265  %tmp1 = getelementptr inbounds x86_fp80, x86_fp80* %a, i64 %i
266  store x86_fp80 %tmp0, x86_fp80* %tmp1, align 16
267  %i.next = add i64 %i, 1
268  %cond = icmp slt i64 %i.next, %n
269  br i1 %cond, label %for.body, label %for.end
270
271for.end:
272  ret void
273}
274
275; CHECK-LABEL: pointer_iv_uniform
276;
277; Check that a pointer induction variable is recognized as uniform and remains
278; uniform after vectorization.
279;
280; CHECK:     LV: Found uniform instruction: %p = phi i32* [ %tmp03, %for.body ], [ %a, %entry ]
281; CHECK:     vector.body
282; CHECK:       %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
283; CHECK-NOT:   getelementptr
284; CHECK:       %next.gep = getelementptr i32, i32* %a, i64 %index
285; CHECK-NOT:   getelementptr
286; CHECK:       br i1 {{.*}}, label %middle.block, label %vector.body
287;
288define void @pointer_iv_uniform(i32* %a, i32 %x, i64 %n) {
289entry:
290  br label %for.body
291
292for.body:
293  %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
294  %p = phi i32* [ %tmp03, %for.body ], [ %a, %entry ]
295  store i32 %x, i32* %p, align 8
296  %tmp03 = getelementptr inbounds i32, i32* %p, i32 1
297  %i.next = add nuw nsw i64 %i, 1
298  %cond = icmp slt i64 %i.next, %n
299  br i1 %cond, label %for.body, label %for.end
300
301for.end:
302  ret void
303}
304
305; INTER-LABEL: pointer_iv_non_uniform_0
306;
307; Check that a pointer induction variable with a non-uniform user is not
308; recognized as uniform and is not uniform after vectorization. The pointer
309; induction variable is used by getelementptr instructions that are non-uniform
310; due to scalarization of the stores.
311;
312; INTER-NOT: LV: Found uniform instruction: %p = phi i32* [ %tmp03, %for.body ], [ %a, %entry ]
313; INTER:     vector.body
314; INTER:       %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
315; INTER:       %[[I0:.+]] = shl i64 %index, 2
316; INTER:       %next.gep = getelementptr i32, i32* %a, i64 %[[I0]]
317; INTER:       %[[S1:.+]] = shl i64 %index, 2
318; INTER:       %[[I1:.+]] = or i64 %[[S1]], 4
319; INTER:       %next.gep2 = getelementptr i32, i32* %a, i64 %[[I1]]
320; INTER:       %[[S2:.+]] = shl i64 %index, 2
321; INTER:       %[[I2:.+]] = or i64 %[[S2]], 8
322; INTER:       %next.gep3 = getelementptr i32, i32* %a, i64 %[[I2]]
323; INTER:       %[[S3:.+]] = shl i64 %index, 2
324; INTER:       %[[I3:.+]] = or i64 %[[S3]], 12
325; INTER:       %next.gep4 = getelementptr i32, i32* %a, i64 %[[I3]]
326; INTER:       br i1 {{.*}}, label %middle.block, label %vector.body
327;
328define void @pointer_iv_non_uniform_0(i32* %a, i64 %n) {
329entry:
330  br label %for.body
331
332for.body:
333  %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
334  %p = phi i32* [ %tmp03, %for.body ], [ %a, %entry ]
335  %tmp00 = load i32, i32* %p, align 8
336  %tmp01 = getelementptr inbounds i32, i32* %p, i32 1
337  %tmp02 = load i32, i32* %tmp01, align 8
338  %tmp03 = getelementptr inbounds i32, i32* %p, i32 4
339  %tmp04 = load i32, i32* %tmp03, align 8
340  %tmp05 = getelementptr inbounds i32, i32* %p, i32 5
341  %tmp06 = load i32, i32* %tmp05, align 8
342  %tmp07 = sub i32 %tmp04, %tmp00
343  %tmp08 = sub i32 %tmp02, %tmp02
344  %tmp09 = getelementptr inbounds i32, i32* %p, i32 2
345  store i32 %tmp07, i32* %tmp09, align 8
346  %tmp10 = getelementptr inbounds i32, i32* %p, i32 3
347  store i32 %tmp08, i32* %tmp10, align 8
348  %i.next = add nuw nsw i64 %i, 1
349  %cond = icmp slt i64 %i.next, %n
350  br i1 %cond, label %for.body, label %for.end
351
352for.end:
353  ret void
354}
355
356; CHECK-LABEL: pointer_iv_non_uniform_1
357;
358; Check that a pointer induction variable with a non-uniform user is not
359; recognized as uniform and is not uniform after vectorization. The pointer
360; induction variable is used by a store that will be scalarized.
361;
362; CHECK-NOT: LV: Found uniform instruction: %p = phi x86_fp80* [%tmp1, %for.body], [%a, %entry]
363; CHECK:     vector.body
364; CHECK:       %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
365; CHECK:       %next.gep = getelementptr x86_fp80, x86_fp80* %a, i64 %index
366; CHECK:       %[[I1:.+]] = or i64 %index, 1
367; CHECK:       %next.gep2 = getelementptr x86_fp80, x86_fp80* %a, i64 %[[I1]]
368; CHECK:       %[[I2:.+]] = or i64 %index, 2
369; CHECK:       %next.gep3 = getelementptr x86_fp80, x86_fp80* %a, i64 %[[I2]]
370; CHECK:       %[[I3:.+]] = or i64 %index, 3
371; CHECK:       %next.gep4 = getelementptr x86_fp80, x86_fp80* %a, i64 %[[I3]]
372; CHECK:       br i1 {{.*}}, label %middle.block, label %vector.body
373;
374define void @pointer_iv_non_uniform_1(x86_fp80* %a, i64 %n) {
375entry:
376  br label %for.body
377
378for.body:
379  %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
380  %p = phi x86_fp80* [%tmp1, %for.body], [%a, %entry]
381  %tmp0 = sitofp i32 1 to x86_fp80
382  store x86_fp80 %tmp0, x86_fp80* %p, align 16
383  %tmp1 = getelementptr inbounds x86_fp80, x86_fp80* %p, i32 1
384  %i.next = add i64 %i, 1
385  %cond = icmp slt i64 %i.next, %n
386  br i1 %cond, label %for.body, label %for.end
387
388for.end:
389  ret void
390}
391
392; CHECK-LABEL: pointer_iv_mixed
393;
394; Check multiple pointer induction variables where only one is recognized as
395; uniform and remains uniform after vectorization. The other pointer induction
396; variable is not recognized as uniform and is not uniform after vectorization
397; because it is stored to memory.
398;
399; CHECK-NOT: LV: Found uniform instruction: %p = phi i32* [ %tmp3, %for.body ], [ %a, %entry ]
400; CHECK:     LV: Found uniform instruction: %q = phi i32** [ %tmp4, %for.body ], [ %b, %entry ]
401; CHECK:     vector.body
402; CHECK:       %pointer.phi = phi i32* [ %a, %vector.ph ], [ %ptr.ind, %vector.body ]
403; CHECK:       %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
404; CHECK:       %[[PTRVEC:.+]] = getelementptr i32, i32* %pointer.phi, <4 x i64> <i64 0, i64 1, i64 2, i64 3>
405; CHECK:       %next.gep = getelementptr i32*, i32** %b, i64 %index
406; CHECK:       %[[NEXTGEPBC:.+]] = bitcast i32** %next.gep to <4 x i32*>*
407; CHECK:       store <4 x i32*> %[[PTRVEC]], <4 x i32*>* %[[NEXTGEPBC]], align 8
408; CHECK:       %ptr.ind = getelementptr i32, i32* %pointer.phi, i64 4
409; CHECK:       br i1 {{.*}}, label %middle.block, label %vector.body
410;
411define i32 @pointer_iv_mixed(i32* %a, i32** %b, i64 %n) {
412entry:
413  br label %for.body
414
415for.body:
416  %i = phi i64 [ %i.next, %for.body ], [ 0, %entry ]
417  %p = phi i32* [ %tmp3, %for.body ], [ %a, %entry ]
418  %q = phi i32** [ %tmp4, %for.body ], [ %b, %entry ]
419  %tmp0 = phi i32 [ %tmp2, %for.body ], [ 0, %entry ]
420  %tmp1 = load i32, i32* %p, align 8
421  %tmp2 = add i32 %tmp1, %tmp0
422  store i32* %p, i32** %q, align 8
423  %tmp3 = getelementptr inbounds i32, i32* %p, i32 1
424  %tmp4 = getelementptr inbounds i32*, i32** %q, i32 1
425  %i.next = add nuw nsw i64 %i, 1
426  %cond = icmp slt i64 %i.next, %n
427  br i1 %cond, label %for.body, label %for.end
428
429for.end:
430  %tmp5 = phi i32 [ %tmp2, %for.body ]
431  ret i32 %tmp5
432}
433
434; INTER-LABEL: bitcast_pointer_operand
435;
436; Check that a pointer operand having a user other than a memory access is
437; recognized as uniform after vectorization. In this test case, %tmp1 is a
438; bitcast that is used by a load and a getelementptr instruction (%tmp2). Once
439; %tmp2 is marked uniform, %tmp1 should be marked uniform as well.
440;
441; INTER:       LV: Found uniform instruction: %cond = icmp slt i64 %i.next, %n
442; INTER-NEXT:  LV: Found uniform instruction: %tmp2 = getelementptr inbounds i8, i8* %tmp1, i64 3
443; INTER-NEXT:  LV: Found uniform instruction: %tmp6 = getelementptr inbounds i8, i8* %B, i64 %i
444; INTER-NEXT:  LV: Found uniform instruction: %tmp1 = bitcast i64* %tmp0 to i8*
445; INTER-NEXT:  LV: Found uniform instruction: %tmp0 = getelementptr inbounds i64, i64* %A, i64 %i
446; INTER-NEXT:  LV: Found uniform instruction: %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
447; INTER-NEXT:  LV: Found uniform instruction: %i.next = add nuw nsw i64 %i, 1
448; INTER:       define void @bitcast_pointer_operand(
449; INTER:       vector.body:
450; INTER-NEXT:    [[INDEX:%.*]] = phi i64 [ 0, %vector.ph ], [ [[INDEX_NEXT:%.*]], %vector.body ]
451; INTER-NEXT:    [[TMP4:%.*]] = getelementptr inbounds i64, i64* %A, i64 [[INDEX]]
452; INTER-NEXT:    [[TMP5:%.*]] = bitcast i64* [[TMP4]] to <32 x i8>*
453; INTER-NEXT:    [[WIDE_VEC:%.*]] = load <32 x i8>, <32 x i8>* [[TMP5]], align 1
454; INTER-NEXT:    [[STRIDED_VEC:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <4 x i32> <i32 0, i32 8, i32 16, i32 24>
455; INTER-NEXT:    [[STRIDED_VEC5:%.*]] = shufflevector <32 x i8> [[WIDE_VEC]], <32 x i8> poison, <4 x i32> <i32 3, i32 11, i32 19, i32 27>
456; INTER-NEXT:    [[TMP6:%.*]] = xor <4 x i8> [[STRIDED_VEC5]], [[STRIDED_VEC]]
457; INTER-NEXT:    [[TMP7:%.*]] = getelementptr inbounds i8, i8* %B, i64 [[INDEX]]
458; INTER-NEXT:    [[TMP8:%.*]] = bitcast i8* [[TMP7]] to <4 x i8>*
459; INTER-NEXT:    store <4 x i8> [[TMP6]], <4 x i8>* [[TMP8]], align 1
460; INTER-NEXT:    [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 4
461; INTER:         br i1 {{.*}}, label %middle.block, label %vector.body
462;
463define void @bitcast_pointer_operand(i64* %A, i8* %B, i64 %n) {
464entry:
465  br label %for.body
466
467for.body:
468  %i = phi i64 [ 0, %entry ], [ %i.next, %for.body ]
469  %tmp0 = getelementptr inbounds i64, i64* %A, i64 %i
470  %tmp1 = bitcast i64* %tmp0 to i8*
471  %tmp2 = getelementptr inbounds i8, i8* %tmp1, i64 3
472  %tmp3 = load i8, i8* %tmp2, align 1
473  %tmp4 = load i8, i8* %tmp1, align 1
474  %tmp5 = xor i8 %tmp3, %tmp4
475  %tmp6 = getelementptr inbounds i8, i8* %B, i64 %i
476  store i8 %tmp5, i8* %tmp6
477  %i.next = add nuw nsw i64 %i, 1
478  %cond = icmp slt i64 %i.next, %n
479  br i1 %cond, label %for.body, label %for.end
480
481for.end:
482  ret void
483}
484