1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt %s -passes=loop-vectorize -force-vector-width=4 -force-vector-interleave=1 -S | FileCheck %s
3
4target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
5
6define void @same_step_and_size(i32* %a, i32* %b, i64 %n) {
7; CHECK-LABEL: @same_step_and_size(
8; CHECK-NEXT:  entry:
9; CHECK-NEXT:    [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
10; CHECK-NEXT:    [[A3:%.*]] = bitcast i32* [[A:%.*]] to i8*
11; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
12; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %scalar.ph, label %vector.memcheck
13; CHECK:       vector.memcheck:
14; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]]
15; CHECK-NEXT:    [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
16; CHECK-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[A]], i64 [[N]]
17; CHECK-NEXT:    [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
18; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP45]]
19; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[A3]], [[SCEVGEP2]]
20; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
21; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label %scalar.ph, label %vector.ph
22;
23entry:
24  br label %loop
25
26loop:
27  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
28  %gep.a = getelementptr inbounds i32, i32* %a, i64 %iv
29  %l = load i32, i32* %gep.a
30  %mul = mul nsw i32 %l, 3
31  %gep.b = getelementptr inbounds i32, i32* %b, i64 %iv
32  store i32 %mul, i32* %gep.b
33  %iv.next = add nuw nsw i64 %iv, 1
34  %exitcond = icmp eq i64 %iv.next, %n
35  br i1 %exitcond, label %exit, label %loop
36
37exit:
38  ret void
39}
40
41define void @same_step_and_size_no_dominance_between_accesses(i32* %a, i32* %b, i64 %n, i64 %x) {
42; CHECK-LABEL: @same_step_and_size_no_dominance_between_accesses(
43; CHECK-NEXT:  entry:
44; CHECK-NEXT:    [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
45; CHECK-NEXT:    [[A3:%.*]] = bitcast i32* [[A:%.*]] to i8*
46; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
47; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %scalar.ph, label %vector.memcheck
48; CHECK:       vector.memcheck:
49; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]]
50; CHECK-NEXT:    [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
51; CHECK-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i32, i32* [[A]], i64 [[N]]
52; CHECK-NEXT:    [[SCEVGEP45:%.*]] = bitcast i32* [[SCEVGEP4]] to i8*
53; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP45]]
54; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[A3]], [[SCEVGEP2]]
55; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
56; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label %scalar.ph, label %vector.ph
57;
58entry:
59  br label %loop
60
61loop:
62  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop.latch ]
63  %cmp = icmp ne i64 %iv, %x
64  br i1 %cmp, label %then, label %else
65
66then:
67  %gep.a = getelementptr inbounds i32, i32* %a, i64 %iv
68  store i32 0, i32* %gep.a
69  br label %loop.latch
70
71else:
72  %gep.b = getelementptr inbounds i32, i32* %b, i64 %iv
73  store i32 10, i32* %gep.b
74  br label %loop.latch
75
76loop.latch:
77  %iv.next = add nuw nsw i64 %iv, 1
78  %exitcond = icmp eq i64 %iv.next, %n
79  br i1 %exitcond, label %exit, label %loop
80
81exit:
82  ret void
83}
84
85define void @different_steps_and_different_access_sizes(i16* %a, i32* %b, i64 %n) {
86; CHECK-LABEL: @different_steps_and_different_access_sizes(
87; CHECK-NEXT:  entry:
88; CHECK-NEXT:    [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
89; CHECK-NEXT:    [[A3:%.*]] = bitcast i16* [[A:%.*]] to i8*
90; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
91; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %scalar.ph, label %vector.memcheck
92; CHECK:       vector.memcheck:
93; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]]
94; CHECK-NEXT:    [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
95; CHECK-NEXT:    [[SCEVGEP4:%.*]] = getelementptr i16, i16* [[A]], i64 [[N]]
96; CHECK-NEXT:    [[SCEVGEP45:%.*]] = bitcast i16* [[SCEVGEP4]] to i8*
97; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP45]]
98; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[A3]], [[SCEVGEP2]]
99; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
100; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label %scalar.ph, label %vector.ph
101;
102entry:
103  br label %loop
104
105loop:
106  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
107  %gep.a = getelementptr inbounds i16, i16* %a, i64 %iv
108  %l = load i16, i16* %gep.a
109  %l.ext = sext i16 %l to i32
110  %mul = mul nsw i32 %l.ext, 3
111  %gep.b = getelementptr inbounds i32, i32* %b, i64 %iv
112  store i32 %mul, i32* %gep.b
113  %iv.next = add nuw nsw i64 %iv, 1
114  %exitcond = icmp eq i64 %iv.next, %n
115  br i1 %exitcond, label %exit, label %loop
116
117exit:
118  ret void
119}
120
121define void @steps_match_but_different_access_sizes_1([2 x i16]* %a, i32* %b, i64 %n) {
122; CHECK-LABEL: @steps_match_but_different_access_sizes_1(
123; CHECK-NEXT:  entry:
124; CHECK-NEXT:    [[B1:%.*]] = bitcast i32* [[B:%.*]] to i8*
125; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
126; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %scalar.ph, label %vector.memcheck
127; CHECK:       vector.memcheck:
128; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]]
129; CHECK-NEXT:    [[SCEVGEP2:%.*]] = bitcast i32* [[SCEVGEP]] to i8*
130; CHECK-NEXT:    [[SCEVGEP3:%.*]] = getelementptr [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 1
131; CHECK-NEXT:    [[SCEVGEP34:%.*]] = bitcast i16* [[SCEVGEP3]] to i8*
132; CHECK-NEXT:    [[SCEVGEP5:%.*]] = getelementptr [2 x i16], [2 x i16]* [[A]], i64 [[N]], i64 0
133; CHECK-NEXT:    [[SCEVGEP56:%.*]] = bitcast i16* [[SCEVGEP5]] to i8*
134; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[B1]], [[SCEVGEP56]]
135; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[SCEVGEP34]], [[SCEVGEP2]]
136; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
137; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label %scalar.ph, label %vector.ph
138;
139entry:
140  br label %loop
141
142loop:
143  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
144  %gep.a = getelementptr inbounds [2 x i16], [2 x i16]* %a, i64 %iv, i64 1
145  %l = load i16, i16* %gep.a
146  %l.ext = sext i16 %l to i32
147  %mul = mul nsw i32 %l.ext, 3
148  %gep.b = getelementptr inbounds i32, i32* %b, i64 %iv
149  store i32 %mul, i32* %gep.b
150  %iv.next = add nuw nsw i64 %iv, 1
151  %exitcond = icmp eq i64 %iv.next, %n
152  br i1 %exitcond, label %exit, label %loop
153
154exit:
155  ret void
156}
157
158; Same as @steps_match_but_different_access_sizes_1, but with source and sink
159; accesses flipped.
160define void @steps_match_but_different_access_sizes_2([2 x i16]* %a, i32* %b, i64 %n) {
161; CHECK-LABEL: @steps_match_but_different_access_sizes_2(
162; CHECK-NEXT:  entry:
163; CHECK-NEXT:    [[B4:%.*]] = bitcast i32* [[B:%.*]] to i8*
164; CHECK-NEXT:    [[MIN_ITERS_CHECK:%.*]] = icmp ult i64 [[N:%.*]], 4
165; CHECK-NEXT:    br i1 [[MIN_ITERS_CHECK]], label %scalar.ph, label %vector.memcheck
166; CHECK:       vector.memcheck:
167; CHECK-NEXT:    [[SCEVGEP:%.*]] = getelementptr [2 x i16], [2 x i16]* [[A:%.*]], i64 0, i64 1
168; CHECK-NEXT:    [[SCEVGEP1:%.*]] = bitcast i16* [[SCEVGEP]] to i8*
169; CHECK-NEXT:    [[SCEVGEP2:%.*]] = getelementptr [2 x i16], [2 x i16]* [[A]], i64 [[N]], i64 0
170; CHECK-NEXT:    [[SCEVGEP23:%.*]] = bitcast i16* [[SCEVGEP2]] to i8*
171; CHECK-NEXT:    [[SCEVGEP5:%.*]] = getelementptr i32, i32* [[B]], i64 [[N]]
172; CHECK-NEXT:    [[SCEVGEP56:%.*]] = bitcast i32* [[SCEVGEP5]] to i8*
173; CHECK-NEXT:    [[BOUND0:%.*]] = icmp ult i8* [[SCEVGEP1]], [[SCEVGEP56]]
174; CHECK-NEXT:    [[BOUND1:%.*]] = icmp ult i8* [[B4]], [[SCEVGEP23]]
175; CHECK-NEXT:    [[FOUND_CONFLICT:%.*]] = and i1 [[BOUND0]], [[BOUND1]]
176; CHECK-NEXT:    br i1 [[FOUND_CONFLICT]], label %scalar.ph, label %vector.ph
177;
178entry:
179  br label %loop
180
181loop:
182  %iv = phi i64 [ 0, %entry ], [ %iv.next, %loop ]
183  %gep.b = getelementptr inbounds i32, i32* %b, i64 %iv
184  %l = load i32, i32* %gep.b
185  %mul = mul nsw i32 %l, 3
186  %gep.a = getelementptr inbounds [2 x i16], [2 x i16]* %a, i64 %iv, i64 1
187  %trunc = trunc i32 %mul to i16
188  store i16 %trunc, i16* %gep.a
189  %iv.next = add nuw nsw i64 %iv, 1
190  %exitcond = icmp eq i64 %iv.next, %n
191  br i1 %exitcond, label %exit, label %loop
192
193exit:
194  ret void
195}
196