1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -slp-vectorizer -slp-threshold=-10000 < %s | FileCheck %s
3; RUN: opt -S -slp-vectorizer -slp-threshold=0 < %s | FileCheck %s
4
5target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-n8:16:32:64-S128"
6target triple = "x86_64-apple-macosx10.8.0"
7
8define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
9; CHECK-LABEL: @simple_select(
10; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
11; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
12; CHECK-NEXT:    ret <4 x float> [[TMP2]]
13;
14  %c0 = extractelement <4 x i32> %c, i32 0
15  %c1 = extractelement <4 x i32> %c, i32 1
16  %c2 = extractelement <4 x i32> %c, i32 2
17  %c3 = extractelement <4 x i32> %c, i32 3
18  %a0 = extractelement <4 x float> %a, i32 0
19  %a1 = extractelement <4 x float> %a, i32 1
20  %a2 = extractelement <4 x float> %a, i32 2
21  %a3 = extractelement <4 x float> %a, i32 3
22  %b0 = extractelement <4 x float> %b, i32 0
23  %b1 = extractelement <4 x float> %b, i32 1
24  %b2 = extractelement <4 x float> %b, i32 2
25  %b3 = extractelement <4 x float> %b, i32 3
26  %cmp0 = icmp ne i32 %c0, 0
27  %cmp1 = icmp ne i32 %c1, 0
28  %cmp2 = icmp ne i32 %c2, 0
29  %cmp3 = icmp ne i32 %c3, 0
30  %s0 = select i1 %cmp0, float %a0, float %b0
31  %s1 = select i1 %cmp1, float %a1, float %b1
32  %s2 = select i1 %cmp2, float %a2, float %b2
33  %s3 = select i1 %cmp3, float %a3, float %b3
34  %ra = insertelement <4 x float> undef, float %s0, i32 0
35  %rb = insertelement <4 x float> %ra, float %s1, i32 1
36  %rc = insertelement <4 x float> %rb, float %s2, i32 2
37  %rd = insertelement <4 x float> %rc, float %s3, i32 3
38  ret <4 x float> %rd
39}
40
41define <8 x float> @simple_select2(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
42; CHECK-LABEL: @simple_select2(
43; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
44; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
45; CHECK-NEXT:    [[TMP3:%.*]] = shufflevector <4 x float> [[TMP2]], <4 x float> undef, <8 x i32> <i32 0, i32 undef, i32 1, i32 undef, i32 2, i32 undef, i32 undef, i32 3>
46; CHECK-NEXT:    [[RD1:%.*]] = shufflevector <8 x float> undef, <8 x float> [[TMP3]], <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 6, i32 15>
47; CHECK-NEXT:    ret <8 x float> [[RD1]]
48;
49  %c0 = extractelement <4 x i32> %c, i32 0
50  %c1 = extractelement <4 x i32> %c, i32 1
51  %c2 = extractelement <4 x i32> %c, i32 2
52  %c3 = extractelement <4 x i32> %c, i32 3
53  %a0 = extractelement <4 x float> %a, i32 0
54  %a1 = extractelement <4 x float> %a, i32 1
55  %a2 = extractelement <4 x float> %a, i32 2
56  %a3 = extractelement <4 x float> %a, i32 3
57  %b0 = extractelement <4 x float> %b, i32 0
58  %b1 = extractelement <4 x float> %b, i32 1
59  %b2 = extractelement <4 x float> %b, i32 2
60  %b3 = extractelement <4 x float> %b, i32 3
61  %cmp0 = icmp ne i32 %c0, 0
62  %cmp1 = icmp ne i32 %c1, 0
63  %cmp2 = icmp ne i32 %c2, 0
64  %cmp3 = icmp ne i32 %c3, 0
65  %s0 = select i1 %cmp0, float %a0, float %b0
66  %s1 = select i1 %cmp1, float %a1, float %b1
67  %s2 = select i1 %cmp2, float %a2, float %b2
68  %s3 = select i1 %cmp3, float %a3, float %b3
69  %ra = insertelement <8 x float> undef, float %s0, i32 0
70  %rb = insertelement <8 x float> %ra, float %s1, i32 2
71  %rc = insertelement <8 x float> %rb, float %s2, i32 4
72  %rd = insertelement <8 x float> %rc, float %s3, i32 7
73  ret <8 x float> %rd
74}
75
76declare void @llvm.assume(i1) nounwind
77
78; This entire tree is ephemeral, don't vectorize any of it.
79define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
80; CHECK-LABEL: @simple_select_eph(
81; CHECK-NEXT:    [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
82; CHECK-NEXT:    [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
83; CHECK-NEXT:    [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
84; CHECK-NEXT:    [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
85; CHECK-NEXT:    [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
86; CHECK-NEXT:    [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
87; CHECK-NEXT:    [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
88; CHECK-NEXT:    [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
89; CHECK-NEXT:    [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
90; CHECK-NEXT:    [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
91; CHECK-NEXT:    [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
92; CHECK-NEXT:    [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
93; CHECK-NEXT:    [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
94; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
95; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i32 [[C2]], 0
96; CHECK-NEXT:    [[CMP3:%.*]] = icmp ne i32 [[C3]], 0
97; CHECK-NEXT:    [[S0:%.*]] = select i1 [[CMP0]], float [[A0]], float [[B0]]
98; CHECK-NEXT:    [[S1:%.*]] = select i1 [[CMP1]], float [[A1]], float [[B1]]
99; CHECK-NEXT:    [[S2:%.*]] = select i1 [[CMP2]], float [[A2]], float [[B2]]
100; CHECK-NEXT:    [[S3:%.*]] = select i1 [[CMP3]], float [[A3]], float [[B3]]
101; CHECK-NEXT:    [[RA:%.*]] = insertelement <4 x float> undef, float [[S0]], i32 0
102; CHECK-NEXT:    [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[S1]], i32 1
103; CHECK-NEXT:    [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[S2]], i32 2
104; CHECK-NEXT:    [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[S3]], i32 3
105; CHECK-NEXT:    [[Q0:%.*]] = extractelement <4 x float> [[RD]], i32 0
106; CHECK-NEXT:    [[Q1:%.*]] = extractelement <4 x float> [[RD]], i32 1
107; CHECK-NEXT:    [[Q2:%.*]] = extractelement <4 x float> [[RD]], i32 2
108; CHECK-NEXT:    [[Q3:%.*]] = extractelement <4 x float> [[RD]], i32 3
109; CHECK-NEXT:    [[Q4:%.*]] = fadd float [[Q0]], [[Q1]]
110; CHECK-NEXT:    [[Q5:%.*]] = fadd float [[Q2]], [[Q3]]
111; CHECK-NEXT:    [[Q6:%.*]] = fadd float [[Q4]], [[Q5]]
112; CHECK-NEXT:    [[QI:%.*]] = fcmp olt float [[Q6]], [[Q5]]
113; CHECK-NEXT:    call void @llvm.assume(i1 [[QI]])
114; CHECK-NEXT:    ret <4 x float> undef
115;
116  %c0 = extractelement <4 x i32> %c, i32 0
117  %c1 = extractelement <4 x i32> %c, i32 1
118  %c2 = extractelement <4 x i32> %c, i32 2
119  %c3 = extractelement <4 x i32> %c, i32 3
120  %a0 = extractelement <4 x float> %a, i32 0
121  %a1 = extractelement <4 x float> %a, i32 1
122  %a2 = extractelement <4 x float> %a, i32 2
123  %a3 = extractelement <4 x float> %a, i32 3
124  %b0 = extractelement <4 x float> %b, i32 0
125  %b1 = extractelement <4 x float> %b, i32 1
126  %b2 = extractelement <4 x float> %b, i32 2
127  %b3 = extractelement <4 x float> %b, i32 3
128  %cmp0 = icmp ne i32 %c0, 0
129  %cmp1 = icmp ne i32 %c1, 0
130  %cmp2 = icmp ne i32 %c2, 0
131  %cmp3 = icmp ne i32 %c3, 0
132  %s0 = select i1 %cmp0, float %a0, float %b0
133  %s1 = select i1 %cmp1, float %a1, float %b1
134  %s2 = select i1 %cmp2, float %a2, float %b2
135  %s3 = select i1 %cmp3, float %a3, float %b3
136  %ra = insertelement <4 x float> undef, float %s0, i32 0
137  %rb = insertelement <4 x float> %ra, float %s1, i32 1
138  %rc = insertelement <4 x float> %rb, float %s2, i32 2
139  %rd = insertelement <4 x float> %rc, float %s3, i32 3
140  %q0 = extractelement <4 x float> %rd, i32 0
141  %q1 = extractelement <4 x float> %rd, i32 1
142  %q2 = extractelement <4 x float> %rd, i32 2
143  %q3 = extractelement <4 x float> %rd, i32 3
144  %q4 = fadd float %q0, %q1
145  %q5 = fadd float %q2, %q3
146  %q6 = fadd float %q4, %q5
147  %qi = fcmp olt float %q6, %q5
148  call void @llvm.assume(i1 %qi)
149  ret <4 x float> undef
150}
151
152; Insert in an order different from the vector indices to make sure it
153; doesn't matter
154define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
155; CHECK-LABEL: @simple_select_insert_out_of_order(
156; CHECK-NEXT:    [[SHUFFLE:%.*]] = shufflevector <4 x i32> [[C:%.*]], <4 x i32> poison, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
157; CHECK-NEXT:    [[SHUFFLE1:%.*]] = shufflevector <4 x float> [[A:%.*]], <4 x float> poison, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
158; CHECK-NEXT:    [[SHUFFLE2:%.*]] = shufflevector <4 x float> [[B:%.*]], <4 x float> poison, <4 x i32> <i32 2, i32 1, i32 0, i32 3>
159; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[SHUFFLE]], zeroinitializer
160; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[SHUFFLE1]], <4 x float> [[SHUFFLE2]]
161; CHECK-NEXT:    ret <4 x float> [[TMP2]]
162;
163  %c0 = extractelement <4 x i32> %c, i32 0
164  %c1 = extractelement <4 x i32> %c, i32 1
165  %c2 = extractelement <4 x i32> %c, i32 2
166  %c3 = extractelement <4 x i32> %c, i32 3
167  %a0 = extractelement <4 x float> %a, i32 0
168  %a1 = extractelement <4 x float> %a, i32 1
169  %a2 = extractelement <4 x float> %a, i32 2
170  %a3 = extractelement <4 x float> %a, i32 3
171  %b0 = extractelement <4 x float> %b, i32 0
172  %b1 = extractelement <4 x float> %b, i32 1
173  %b2 = extractelement <4 x float> %b, i32 2
174  %b3 = extractelement <4 x float> %b, i32 3
175  %cmp0 = icmp ne i32 %c0, 0
176  %cmp1 = icmp ne i32 %c1, 0
177  %cmp2 = icmp ne i32 %c2, 0
178  %cmp3 = icmp ne i32 %c3, 0
179  %s0 = select i1 %cmp0, float %a0, float %b0
180  %s1 = select i1 %cmp1, float %a1, float %b1
181  %s2 = select i1 %cmp2, float %a2, float %b2
182  %s3 = select i1 %cmp3, float %a3, float %b3
183  %ra = insertelement <4 x float> undef, float %s0, i32 2
184  %rb = insertelement <4 x float> %ra, float %s1, i32 1
185  %rc = insertelement <4 x float> %rb, float %s2, i32 0
186  %rd = insertelement <4 x float> %rc, float %s3, i32 3
187  ret <4 x float> %rd
188}
189
190declare void @v4f32_user(<4 x float>) #0
191declare void @f32_user(float) #0
192
193; Multiple users of the final constructed vector
194define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
195; CHECK-LABEL: @simple_select_users(
196; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> [[C:%.*]], zeroinitializer
197; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> [[A:%.*]], <4 x float> [[B:%.*]]
198; CHECK-NEXT:    call void @v4f32_user(<4 x float> [[TMP2]]) #[[ATTR0:[0-9]+]]
199; CHECK-NEXT:    ret <4 x float> [[TMP2]]
200;
201  %c0 = extractelement <4 x i32> %c, i32 0
202  %c1 = extractelement <4 x i32> %c, i32 1
203  %c2 = extractelement <4 x i32> %c, i32 2
204  %c3 = extractelement <4 x i32> %c, i32 3
205  %a0 = extractelement <4 x float> %a, i32 0
206  %a1 = extractelement <4 x float> %a, i32 1
207  %a2 = extractelement <4 x float> %a, i32 2
208  %a3 = extractelement <4 x float> %a, i32 3
209  %b0 = extractelement <4 x float> %b, i32 0
210  %b1 = extractelement <4 x float> %b, i32 1
211  %b2 = extractelement <4 x float> %b, i32 2
212  %b3 = extractelement <4 x float> %b, i32 3
213  %cmp0 = icmp ne i32 %c0, 0
214  %cmp1 = icmp ne i32 %c1, 0
215  %cmp2 = icmp ne i32 %c2, 0
216  %cmp3 = icmp ne i32 %c3, 0
217  %s0 = select i1 %cmp0, float %a0, float %b0
218  %s1 = select i1 %cmp1, float %a1, float %b1
219  %s2 = select i1 %cmp2, float %a2, float %b2
220  %s3 = select i1 %cmp3, float %a3, float %b3
221  %ra = insertelement <4 x float> undef, float %s0, i32 0
222  %rb = insertelement <4 x float> %ra, float %s1, i32 1
223  %rc = insertelement <4 x float> %rb, float %s2, i32 2
224  %rd = insertelement <4 x float> %rc, float %s3, i32 3
225  call void @v4f32_user(<4 x float> %rd) #0
226  ret <4 x float> %rd
227}
228
229; Unused insertelement
230define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
231; CHECK-LABEL: @simple_select_no_users(
232; CHECK-NEXT:    [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
233; CHECK-NEXT:    [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
234; CHECK-NEXT:    [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
235; CHECK-NEXT:    [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
236; CHECK-NEXT:    [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
237; CHECK-NEXT:    [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
238; CHECK-NEXT:    [[A2:%.*]] = extractelement <4 x float> [[A]], i32 2
239; CHECK-NEXT:    [[A3:%.*]] = extractelement <4 x float> [[A]], i32 3
240; CHECK-NEXT:    [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
241; CHECK-NEXT:    [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
242; CHECK-NEXT:    [[B2:%.*]] = extractelement <4 x float> [[B]], i32 2
243; CHECK-NEXT:    [[B3:%.*]] = extractelement <4 x float> [[B]], i32 3
244; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> poison, i32 [[C0]], i32 0
245; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
246; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
247; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x i32> poison, i32 [[C2]], i32 0
248; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x i32> [[TMP4]], i32 [[C3]], i32 1
249; CHECK-NEXT:    [[TMP6:%.*]] = icmp ne <2 x i32> [[TMP5]], zeroinitializer
250; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x float> poison, float [[A0]], i32 0
251; CHECK-NEXT:    [[TMP8:%.*]] = insertelement <2 x float> [[TMP7]], float [[A1]], i32 1
252; CHECK-NEXT:    [[TMP9:%.*]] = insertelement <2 x float> poison, float [[B0]], i32 0
253; CHECK-NEXT:    [[TMP10:%.*]] = insertelement <2 x float> [[TMP9]], float [[B1]], i32 1
254; CHECK-NEXT:    [[TMP11:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP8]], <2 x float> [[TMP10]]
255; CHECK-NEXT:    [[TMP12:%.*]] = insertelement <2 x float> poison, float [[A2]], i32 0
256; CHECK-NEXT:    [[TMP13:%.*]] = insertelement <2 x float> [[TMP12]], float [[A3]], i32 1
257; CHECK-NEXT:    [[TMP14:%.*]] = insertelement <2 x float> poison, float [[B2]], i32 0
258; CHECK-NEXT:    [[TMP15:%.*]] = insertelement <2 x float> [[TMP14]], float [[B3]], i32 1
259; CHECK-NEXT:    [[TMP16:%.*]] = select <2 x i1> [[TMP6]], <2 x float> [[TMP13]], <2 x float> [[TMP15]]
260; CHECK-NEXT:    [[TMP17:%.*]] = shufflevector <2 x float> [[TMP11]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
261; CHECK-NEXT:    [[RB2:%.*]] = shufflevector <4 x float> undef, <4 x float> [[TMP17]], <4 x i32> <i32 4, i32 5, i32 2, i32 3>
262; CHECK-NEXT:    [[TMP18:%.*]] = shufflevector <2 x float> [[TMP16]], <2 x float> undef, <4 x i32> <i32 0, i32 1, i32 undef, i32 undef>
263; CHECK-NEXT:    [[RD1:%.*]] = shufflevector <4 x float> undef, <4 x float> [[TMP18]], <4 x i32> <i32 0, i32 1, i32 4, i32 5>
264; CHECK-NEXT:    ret <4 x float> [[RD1]]
265;
266  %c0 = extractelement <4 x i32> %c, i32 0
267  %c1 = extractelement <4 x i32> %c, i32 1
268  %c2 = extractelement <4 x i32> %c, i32 2
269  %c3 = extractelement <4 x i32> %c, i32 3
270  %a0 = extractelement <4 x float> %a, i32 0
271  %a1 = extractelement <4 x float> %a, i32 1
272  %a2 = extractelement <4 x float> %a, i32 2
273  %a3 = extractelement <4 x float> %a, i32 3
274  %b0 = extractelement <4 x float> %b, i32 0
275  %b1 = extractelement <4 x float> %b, i32 1
276  %b2 = extractelement <4 x float> %b, i32 2
277  %b3 = extractelement <4 x float> %b, i32 3
278  %cmp0 = icmp ne i32 %c0, 0
279  %cmp1 = icmp ne i32 %c1, 0
280  %cmp2 = icmp ne i32 %c2, 0
281  %cmp3 = icmp ne i32 %c3, 0
282  %s0 = select i1 %cmp0, float %a0, float %b0
283  %s1 = select i1 %cmp1, float %a1, float %b1
284  %s2 = select i1 %cmp2, float %a2, float %b2
285  %s3 = select i1 %cmp3, float %a3, float %b3
286  %ra = insertelement <4 x float> undef, float %s0, i32 0
287  %rb = insertelement <4 x float> %ra, float %s1, i32 1
288  %rc = insertelement <4 x float> undef, float %s2, i32 2
289  %rd = insertelement <4 x float> %rc, float %s3, i32 3
290  ret <4 x float> %rd
291}
292
293; Make sure infinite loop doesn't happen which I ran into when trying
294; to do this backwards this backwards
295define <4 x i32> @reconstruct(<4 x i32> %c) #0 {
296; CHECK-LABEL: @reconstruct(
297; CHECK-NEXT:    [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
298; CHECK-NEXT:    [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
299; CHECK-NEXT:    [[C2:%.*]] = extractelement <4 x i32> [[C]], i32 2
300; CHECK-NEXT:    [[C3:%.*]] = extractelement <4 x i32> [[C]], i32 3
301; CHECK-NEXT:    [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[C0]], i32 0
302; CHECK-NEXT:    [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[C1]], i32 1
303; CHECK-NEXT:    [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[C2]], i32 2
304; CHECK-NEXT:    [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[C3]], i32 3
305; CHECK-NEXT:    ret <4 x i32> [[RD]]
306;
307  %c0 = extractelement <4 x i32> %c, i32 0
308  %c1 = extractelement <4 x i32> %c, i32 1
309  %c2 = extractelement <4 x i32> %c, i32 2
310  %c3 = extractelement <4 x i32> %c, i32 3
311  %ra = insertelement <4 x i32> undef, i32 %c0, i32 0
312  %rb = insertelement <4 x i32> %ra, i32 %c1, i32 1
313  %rc = insertelement <4 x i32> %rb, i32 %c2, i32 2
314  %rd = insertelement <4 x i32> %rc, i32 %c3, i32 3
315  ret <4 x i32> %rd
316}
317
318define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %c) #0 {
319; CHECK-LABEL: @simple_select_v2(
320; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i32> [[C:%.*]], zeroinitializer
321; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x float> [[A:%.*]], <2 x float> [[B:%.*]]
322; CHECK-NEXT:    ret <2 x float> [[TMP2]]
323;
324  %c0 = extractelement <2 x i32> %c, i32 0
325  %c1 = extractelement <2 x i32> %c, i32 1
326  %a0 = extractelement <2 x float> %a, i32 0
327  %a1 = extractelement <2 x float> %a, i32 1
328  %b0 = extractelement <2 x float> %b, i32 0
329  %b1 = extractelement <2 x float> %b, i32 1
330  %cmp0 = icmp ne i32 %c0, 0
331  %cmp1 = icmp ne i32 %c1, 0
332  %s0 = select i1 %cmp0, float %a0, float %b0
333  %s1 = select i1 %cmp1, float %a1, float %b1
334  %ra = insertelement <2 x float> undef, float %s0, i32 0
335  %rb = insertelement <2 x float> %ra, float %s1, i32 1
336  ret <2 x float> %rb
337}
338
339; Make sure when we construct partial vectors, we don't keep
340; re-visiting the insertelement chains starting with undef
341; (low cost threshold needed to force this to happen)
342define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
343; CHECK-LABEL: @simple_select_partial_vector(
344; CHECK-NEXT:    [[C0:%.*]] = extractelement <4 x i32> [[C:%.*]], i32 0
345; CHECK-NEXT:    [[C1:%.*]] = extractelement <4 x i32> [[C]], i32 1
346; CHECK-NEXT:    [[A0:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
347; CHECK-NEXT:    [[A1:%.*]] = extractelement <4 x float> [[A]], i32 1
348; CHECK-NEXT:    [[B0:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
349; CHECK-NEXT:    [[B1:%.*]] = extractelement <4 x float> [[B]], i32 1
350; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0
351; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
352; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
353; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> undef, float [[A0]], i32 0
354; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[A1]], i32 1
355; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x float> undef, float [[B0]], i32 0
356; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B1]], i32 1
357; CHECK-NEXT:    [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP5]], <2 x float> [[TMP7]]
358; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
359; CHECK-NEXT:    [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP9]], i32 0
360; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
361; CHECK-NEXT:    [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP10]], i32 1
362; CHECK-NEXT:    ret <4 x float> [[RB]]
363;
364  %c0 = extractelement <4 x i32> %c, i32 0
365  %c1 = extractelement <4 x i32> %c, i32 1
366  %a0 = extractelement <4 x float> %a, i32 0
367  %a1 = extractelement <4 x float> %a, i32 1
368  %b0 = extractelement <4 x float> %b, i32 0
369  %b1 = extractelement <4 x float> %b, i32 1
370  %1 = insertelement <2 x i32> undef, i32 %c0, i32 0
371  %2 = insertelement <2 x i32> %1, i32 %c1, i32 1
372  %3 = icmp ne <2 x i32> %2, zeroinitializer
373  %4 = insertelement <2 x float> undef, float %a0, i32 0
374  %5 = insertelement <2 x float> %4, float %a1, i32 1
375  %6 = insertelement <2 x float> undef, float %b0, i32 0
376  %7 = insertelement <2 x float> %6, float %b1, i32 1
377  %8 = select <2 x i1> %3, <2 x float> %5, <2 x float> %7
378  %9 = extractelement <2 x float> %8, i32 0
379  %ra = insertelement <4 x float> undef, float %9, i32 0
380  %10 = extractelement <2 x float> %8, i32 1
381  %rb = insertelement <4 x float> %ra, float %10, i32 1
382  ret <4 x float> %rb
383}
384
385; Make sure that vectorization happens even if insertelements operations
386; must be rescheduled. The case here is from compiling Julia.
387define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
388; CHECK-LABEL: @reschedule_extract(
389; CHECK-NEXT:    [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
390; CHECK-NEXT:    ret <4 x float> [[TMP1]]
391;
392  %a0 = extractelement <4 x float> %a, i32 0
393  %b0 = extractelement <4 x float> %b, i32 0
394  %c0 = fadd float %a0, %b0
395  %v0 = insertelement <4 x float> undef, float %c0, i32 0
396  %a1 = extractelement <4 x float> %a, i32 1
397  %b1 = extractelement <4 x float> %b, i32 1
398  %c1 = fadd float %a1, %b1
399  %v1 = insertelement <4 x float> %v0, float %c1, i32 1
400  %a2 = extractelement <4 x float> %a, i32 2
401  %b2 = extractelement <4 x float> %b, i32 2
402  %c2 = fadd float %a2, %b2
403  %v2 = insertelement <4 x float> %v1, float %c2, i32 2
404  %a3 = extractelement <4 x float> %a, i32 3
405  %b3 = extractelement <4 x float> %b, i32 3
406  %c3 = fadd float %a3, %b3
407  %v3 = insertelement <4 x float> %v2, float %c3, i32 3
408  ret <4 x float> %v3
409}
410
411; Check that cost model for vectorization takes credit for
412; instructions that are erased.
413define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
414; CHECK-LABEL: @take_credit(
415; CHECK-NEXT:    [[TMP1:%.*]] = fadd <4 x float> [[A:%.*]], [[B:%.*]]
416; CHECK-NEXT:    ret <4 x float> [[TMP1]]
417;
418  %a0 = extractelement <4 x float> %a, i32 0
419  %b0 = extractelement <4 x float> %b, i32 0
420  %c0 = fadd float %a0, %b0
421  %a1 = extractelement <4 x float> %a, i32 1
422  %b1 = extractelement <4 x float> %b, i32 1
423  %c1 = fadd float %a1, %b1
424  %a2 = extractelement <4 x float> %a, i32 2
425  %b2 = extractelement <4 x float> %b, i32 2
426  %c2 = fadd float %a2, %b2
427  %a3 = extractelement <4 x float> %a, i32 3
428  %b3 = extractelement <4 x float> %b, i32 3
429  %c3 = fadd float %a3, %b3
430  %v0 = insertelement <4 x float> undef, float %c0, i32 0
431  %v1 = insertelement <4 x float> %v0, float %c1, i32 1
432  %v2 = insertelement <4 x float> %v1, float %c2, i32 2
433  %v3 = insertelement <4 x float> %v2, float %c3, i32 3
434  ret <4 x float> %v3
435}
436
437; Make sure we handle multiple trees that feed one build vector correctly.
438define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) {
439; CHECK-LABEL: @multi_tree(
440; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x double> poison, double [[Z:%.*]], i32 0
441; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double [[Y:%.*]], i32 1
442; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double [[X:%.*]], i32 2
443; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <4 x double> [[TMP3]], double [[W:%.*]], i32 3
444; CHECK-NEXT:    [[TMP5:%.*]] = fadd <4 x double> [[TMP4]], <double 3.000000e+00, double 2.000000e+00, double 1.000000e+00, double 0.000000e+00>
445; CHECK-NEXT:    [[TMP6:%.*]] = fmul <4 x double> [[TMP5]], <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>
446; CHECK-NEXT:    ret <4 x double> [[TMP6]]
447;
448  %t0 = fadd double %w , 0.000000e+00
449  %t1 = fadd double %x , 1.000000e+00
450  %t2 = fadd double %y , 2.000000e+00
451  %t3 = fadd double %z , 3.000000e+00
452  %t4 = fmul double %t0, 1.000000e+00
453  %i1 = insertelement <4 x double> undef, double %t4, i32 3
454  %t5 = fmul double %t1, 1.000000e+00
455  %i2 = insertelement <4 x double> %i1, double %t5, i32 2
456  %t6 = fmul double %t2, 1.000000e+00
457  %i3 = insertelement <4 x double> %i2, double %t6, i32 1
458  %t7 = fmul double %t3, 1.000000e+00
459  %i4 = insertelement <4 x double> %i3, double %t7, i32 0
460  ret <4 x double> %i4
461}
462
463define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr #0 {
464; CHECK-LABEL: @_vadd256(
465; CHECK-NEXT:    [[TMP1:%.*]] = fadd <8 x float> [[A:%.*]], [[B:%.*]]
466; CHECK-NEXT:    ret <8 x float> [[TMP1]]
467;
468  %vecext = extractelement <8 x float> %a, i32 0
469  %vecext1 = extractelement <8 x float> %b, i32 0
470  %add = fadd float %vecext, %vecext1
471  %vecext2 = extractelement <8 x float> %a, i32 1
472  %vecext3 = extractelement <8 x float> %b, i32 1
473  %add4 = fadd float %vecext2, %vecext3
474  %vecext5 = extractelement <8 x float> %a, i32 2
475  %vecext6 = extractelement <8 x float> %b, i32 2
476  %add7 = fadd float %vecext5, %vecext6
477  %vecext8 = extractelement <8 x float> %a, i32 3
478  %vecext9 = extractelement <8 x float> %b, i32 3
479  %add10 = fadd float %vecext8, %vecext9
480  %vecext11 = extractelement <8 x float> %a, i32 4
481  %vecext12 = extractelement <8 x float> %b, i32 4
482  %add13 = fadd float %vecext11, %vecext12
483  %vecext14 = extractelement <8 x float> %a, i32 5
484  %vecext15 = extractelement <8 x float> %b, i32 5
485  %add16 = fadd float %vecext14, %vecext15
486  %vecext17 = extractelement <8 x float> %a, i32 6
487  %vecext18 = extractelement <8 x float> %b, i32 6
488  %add19 = fadd float %vecext17, %vecext18
489  %vecext20 = extractelement <8 x float> %a, i32 7
490  %vecext21 = extractelement <8 x float> %b, i32 7
491  %add22 = fadd float %vecext20, %vecext21
492  %vecinit.i = insertelement <8 x float> undef, float %add, i32 0
493  %vecinit1.i = insertelement <8 x float> %vecinit.i, float %add4, i32 1
494  %vecinit2.i = insertelement <8 x float> %vecinit1.i, float %add7, i32 2
495  %vecinit3.i = insertelement <8 x float> %vecinit2.i, float %add10, i32 3
496  %vecinit4.i = insertelement <8 x float> %vecinit3.i, float %add13, i32 4
497  %vecinit5.i = insertelement <8 x float> %vecinit4.i, float %add16, i32 5
498  %vecinit6.i = insertelement <8 x float> %vecinit5.i, float %add19, i32 6
499  %vecinit7.i = insertelement <8 x float> %vecinit6.i, float %add22, i32 7
500  ret <8 x float> %vecinit7.i
501}
502
503attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "frame-pointer"="all" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
504