1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -S -slp-vectorizer -slp-threshold=-10000 < %s | FileCheck %s
3; RUN: opt -S -slp-vectorizer -slp-threshold=0 < %s | FileCheck %s --check-prefix=ZEROTHRESH
4target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-n8:16:32:64-S128"
5
6target triple = "x86_64-apple-macosx10.8.0"
7
8define <4 x float> @simple_select(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
9; CHECK-LABEL: @simple_select(
10; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
11; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
12; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
13; CHECK-NEXT:    [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
14; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
15; CHECK-NEXT:    [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP4]], i32 1
16; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2
17; CHECK-NEXT:    [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[TMP5]], i32 2
18; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
19; CHECK-NEXT:    [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP6]], i32 3
20; CHECK-NEXT:    ret <4 x float> [[RD]]
21;
22; ZEROTHRESH-LABEL: @simple_select(
23; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
24; ZEROTHRESH-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
25; ZEROTHRESH-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
26; ZEROTHRESH-NEXT:    [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
27; ZEROTHRESH-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
28; ZEROTHRESH-NEXT:    [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP4]], i32 1
29; ZEROTHRESH-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2
30; ZEROTHRESH-NEXT:    [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[TMP5]], i32 2
31; ZEROTHRESH-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
32; ZEROTHRESH-NEXT:    [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP6]], i32 3
33; ZEROTHRESH-NEXT:    ret <4 x float> [[RD]]
34;
35  %c0 = extractelement <4 x i32> %c, i32 0
36  %c1 = extractelement <4 x i32> %c, i32 1
37  %c2 = extractelement <4 x i32> %c, i32 2
38  %c3 = extractelement <4 x i32> %c, i32 3
39  %a0 = extractelement <4 x float> %a, i32 0
40  %a1 = extractelement <4 x float> %a, i32 1
41  %a2 = extractelement <4 x float> %a, i32 2
42  %a3 = extractelement <4 x float> %a, i32 3
43  %b0 = extractelement <4 x float> %b, i32 0
44  %b1 = extractelement <4 x float> %b, i32 1
45  %b2 = extractelement <4 x float> %b, i32 2
46  %b3 = extractelement <4 x float> %b, i32 3
47  %cmp0 = icmp ne i32 %c0, 0
48  %cmp1 = icmp ne i32 %c1, 0
49  %cmp2 = icmp ne i32 %c2, 0
50  %cmp3 = icmp ne i32 %c3, 0
51  %s0 = select i1 %cmp0, float %a0, float %b0
52  %s1 = select i1 %cmp1, float %a1, float %b1
53  %s2 = select i1 %cmp2, float %a2, float %b2
54  %s3 = select i1 %cmp3, float %a3, float %b3
55  %ra = insertelement <4 x float> undef, float %s0, i32 0
56  %rb = insertelement <4 x float> %ra, float %s1, i32 1
57  %rc = insertelement <4 x float> %rb, float %s2, i32 2
58  %rd = insertelement <4 x float> %rc, float %s3, i32 3
59  ret <4 x float> %rd
60}
61
62declare void @llvm.assume(i1) nounwind
63
64; This entire tree is ephemeral, don't vectorize any of it.
65define <4 x float> @simple_select_eph(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
66; CHECK-LABEL: @simple_select_eph(
67; CHECK-NEXT:    [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
68; CHECK-NEXT:    [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
69; CHECK-NEXT:    [[C2:%.*]] = extractelement <4 x i32> %c, i32 2
70; CHECK-NEXT:    [[C3:%.*]] = extractelement <4 x i32> %c, i32 3
71; CHECK-NEXT:    [[A0:%.*]] = extractelement <4 x float> %a, i32 0
72; CHECK-NEXT:    [[A1:%.*]] = extractelement <4 x float> %a, i32 1
73; CHECK-NEXT:    [[A2:%.*]] = extractelement <4 x float> %a, i32 2
74; CHECK-NEXT:    [[A3:%.*]] = extractelement <4 x float> %a, i32 3
75; CHECK-NEXT:    [[B0:%.*]] = extractelement <4 x float> %b, i32 0
76; CHECK-NEXT:    [[B1:%.*]] = extractelement <4 x float> %b, i32 1
77; CHECK-NEXT:    [[B2:%.*]] = extractelement <4 x float> %b, i32 2
78; CHECK-NEXT:    [[B3:%.*]] = extractelement <4 x float> %b, i32 3
79; CHECK-NEXT:    [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
80; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
81; CHECK-NEXT:    [[CMP2:%.*]] = icmp ne i32 [[C2]], 0
82; CHECK-NEXT:    [[CMP3:%.*]] = icmp ne i32 [[C3]], 0
83; CHECK-NEXT:    [[S0:%.*]] = select i1 [[CMP0]], float [[A0]], float [[B0]]
84; CHECK-NEXT:    [[S1:%.*]] = select i1 [[CMP1]], float [[A1]], float [[B1]]
85; CHECK-NEXT:    [[S2:%.*]] = select i1 [[CMP2]], float [[A2]], float [[B2]]
86; CHECK-NEXT:    [[S3:%.*]] = select i1 [[CMP3]], float [[A3]], float [[B3]]
87; CHECK-NEXT:    [[RA:%.*]] = insertelement <4 x float> undef, float [[S0]], i32 0
88; CHECK-NEXT:    [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[S1]], i32 1
89; CHECK-NEXT:    [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[S2]], i32 2
90; CHECK-NEXT:    [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[S3]], i32 3
91; CHECK-NEXT:    [[Q0:%.*]] = extractelement <4 x float> [[RD]], i32 0
92; CHECK-NEXT:    [[Q1:%.*]] = extractelement <4 x float> [[RD]], i32 1
93; CHECK-NEXT:    [[Q2:%.*]] = extractelement <4 x float> [[RD]], i32 2
94; CHECK-NEXT:    [[Q3:%.*]] = extractelement <4 x float> [[RD]], i32 3
95; CHECK-NEXT:    [[Q4:%.*]] = fadd float [[Q0]], [[Q1]]
96; CHECK-NEXT:    [[Q5:%.*]] = fadd float [[Q2]], [[Q3]]
97; CHECK-NEXT:    [[Q6:%.*]] = fadd float [[Q4]], [[Q5]]
98; CHECK-NEXT:    [[QI:%.*]] = fcmp olt float [[Q6]], [[Q5]]
99; CHECK-NEXT:    call void @llvm.assume(i1 [[QI]])
100; CHECK-NEXT:    ret <4 x float> undef
101;
102; ZEROTHRESH-LABEL: @simple_select_eph(
103; ZEROTHRESH-NEXT:    [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
104; ZEROTHRESH-NEXT:    [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
105; ZEROTHRESH-NEXT:    [[C2:%.*]] = extractelement <4 x i32> %c, i32 2
106; ZEROTHRESH-NEXT:    [[C3:%.*]] = extractelement <4 x i32> %c, i32 3
107; ZEROTHRESH-NEXT:    [[A0:%.*]] = extractelement <4 x float> %a, i32 0
108; ZEROTHRESH-NEXT:    [[A1:%.*]] = extractelement <4 x float> %a, i32 1
109; ZEROTHRESH-NEXT:    [[A2:%.*]] = extractelement <4 x float> %a, i32 2
110; ZEROTHRESH-NEXT:    [[A3:%.*]] = extractelement <4 x float> %a, i32 3
111; ZEROTHRESH-NEXT:    [[B0:%.*]] = extractelement <4 x float> %b, i32 0
112; ZEROTHRESH-NEXT:    [[B1:%.*]] = extractelement <4 x float> %b, i32 1
113; ZEROTHRESH-NEXT:    [[B2:%.*]] = extractelement <4 x float> %b, i32 2
114; ZEROTHRESH-NEXT:    [[B3:%.*]] = extractelement <4 x float> %b, i32 3
115; ZEROTHRESH-NEXT:    [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
116; ZEROTHRESH-NEXT:    [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
117; ZEROTHRESH-NEXT:    [[CMP2:%.*]] = icmp ne i32 [[C2]], 0
118; ZEROTHRESH-NEXT:    [[CMP3:%.*]] = icmp ne i32 [[C3]], 0
119; ZEROTHRESH-NEXT:    [[S0:%.*]] = select i1 [[CMP0]], float [[A0]], float [[B0]]
120; ZEROTHRESH-NEXT:    [[S1:%.*]] = select i1 [[CMP1]], float [[A1]], float [[B1]]
121; ZEROTHRESH-NEXT:    [[S2:%.*]] = select i1 [[CMP2]], float [[A2]], float [[B2]]
122; ZEROTHRESH-NEXT:    [[S3:%.*]] = select i1 [[CMP3]], float [[A3]], float [[B3]]
123; ZEROTHRESH-NEXT:    [[RA:%.*]] = insertelement <4 x float> undef, float [[S0]], i32 0
124; ZEROTHRESH-NEXT:    [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[S1]], i32 1
125; ZEROTHRESH-NEXT:    [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[S2]], i32 2
126; ZEROTHRESH-NEXT:    [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[S3]], i32 3
127; ZEROTHRESH-NEXT:    [[Q0:%.*]] = extractelement <4 x float> [[RD]], i32 0
128; ZEROTHRESH-NEXT:    [[Q1:%.*]] = extractelement <4 x float> [[RD]], i32 1
129; ZEROTHRESH-NEXT:    [[Q2:%.*]] = extractelement <4 x float> [[RD]], i32 2
130; ZEROTHRESH-NEXT:    [[Q3:%.*]] = extractelement <4 x float> [[RD]], i32 3
131; ZEROTHRESH-NEXT:    [[Q4:%.*]] = fadd float [[Q0]], [[Q1]]
132; ZEROTHRESH-NEXT:    [[Q5:%.*]] = fadd float [[Q2]], [[Q3]]
133; ZEROTHRESH-NEXT:    [[Q6:%.*]] = fadd float [[Q4]], [[Q5]]
134; ZEROTHRESH-NEXT:    [[QI:%.*]] = fcmp olt float [[Q6]], [[Q5]]
135; ZEROTHRESH-NEXT:    call void @llvm.assume(i1 [[QI]])
136; ZEROTHRESH-NEXT:    ret <4 x float> undef
137;
138  %c0 = extractelement <4 x i32> %c, i32 0
139  %c1 = extractelement <4 x i32> %c, i32 1
140  %c2 = extractelement <4 x i32> %c, i32 2
141  %c3 = extractelement <4 x i32> %c, i32 3
142  %a0 = extractelement <4 x float> %a, i32 0
143  %a1 = extractelement <4 x float> %a, i32 1
144  %a2 = extractelement <4 x float> %a, i32 2
145  %a3 = extractelement <4 x float> %a, i32 3
146  %b0 = extractelement <4 x float> %b, i32 0
147  %b1 = extractelement <4 x float> %b, i32 1
148  %b2 = extractelement <4 x float> %b, i32 2
149  %b3 = extractelement <4 x float> %b, i32 3
150  %cmp0 = icmp ne i32 %c0, 0
151  %cmp1 = icmp ne i32 %c1, 0
152  %cmp2 = icmp ne i32 %c2, 0
153  %cmp3 = icmp ne i32 %c3, 0
154  %s0 = select i1 %cmp0, float %a0, float %b0
155  %s1 = select i1 %cmp1, float %a1, float %b1
156  %s2 = select i1 %cmp2, float %a2, float %b2
157  %s3 = select i1 %cmp3, float %a3, float %b3
158  %ra = insertelement <4 x float> undef, float %s0, i32 0
159  %rb = insertelement <4 x float> %ra, float %s1, i32 1
160  %rc = insertelement <4 x float> %rb, float %s2, i32 2
161  %rd = insertelement <4 x float> %rc, float %s3, i32 3
162  %q0 = extractelement <4 x float> %rd, i32 0
163  %q1 = extractelement <4 x float> %rd, i32 1
164  %q2 = extractelement <4 x float> %rd, i32 2
165  %q3 = extractelement <4 x float> %rd, i32 3
166  %q4 = fadd float %q0, %q1
167  %q5 = fadd float %q2, %q3
168  %q6 = fadd float %q4, %q5
169  %qi = fcmp olt float %q6, %q5
170  call void @llvm.assume(i1 %qi)
171  ret <4 x float> undef
172}
173
174; Insert in an order different from the vector indices to make sure it
175; doesn't matter
176define <4 x float> @simple_select_insert_out_of_order(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
177; CHECK-LABEL: @simple_select_insert_out_of_order(
178; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
179; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
180; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
181; CHECK-NEXT:    [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 2
182; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
183; CHECK-NEXT:    [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP4]], i32 1
184; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2
185; CHECK-NEXT:    [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[TMP5]], i32 0
186; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
187; CHECK-NEXT:    [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP6]], i32 3
188; CHECK-NEXT:    ret <4 x float> [[RD]]
189;
190; ZEROTHRESH-LABEL: @simple_select_insert_out_of_order(
191; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
192; ZEROTHRESH-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
193; ZEROTHRESH-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
194; ZEROTHRESH-NEXT:    [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 2
195; ZEROTHRESH-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
196; ZEROTHRESH-NEXT:    [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP4]], i32 1
197; ZEROTHRESH-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2
198; ZEROTHRESH-NEXT:    [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[TMP5]], i32 0
199; ZEROTHRESH-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
200; ZEROTHRESH-NEXT:    [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP6]], i32 3
201; ZEROTHRESH-NEXT:    ret <4 x float> [[RD]]
202;
203  %c0 = extractelement <4 x i32> %c, i32 0
204  %c1 = extractelement <4 x i32> %c, i32 1
205  %c2 = extractelement <4 x i32> %c, i32 2
206  %c3 = extractelement <4 x i32> %c, i32 3
207  %a0 = extractelement <4 x float> %a, i32 0
208  %a1 = extractelement <4 x float> %a, i32 1
209  %a2 = extractelement <4 x float> %a, i32 2
210  %a3 = extractelement <4 x float> %a, i32 3
211  %b0 = extractelement <4 x float> %b, i32 0
212  %b1 = extractelement <4 x float> %b, i32 1
213  %b2 = extractelement <4 x float> %b, i32 2
214  %b3 = extractelement <4 x float> %b, i32 3
215  %cmp0 = icmp ne i32 %c0, 0
216  %cmp1 = icmp ne i32 %c1, 0
217  %cmp2 = icmp ne i32 %c2, 0
218  %cmp3 = icmp ne i32 %c3, 0
219  %s0 = select i1 %cmp0, float %a0, float %b0
220  %s1 = select i1 %cmp1, float %a1, float %b1
221  %s2 = select i1 %cmp2, float %a2, float %b2
222  %s3 = select i1 %cmp3, float %a3, float %b3
223  %ra = insertelement <4 x float> undef, float %s0, i32 2
224  %rb = insertelement <4 x float> %ra, float %s1, i32 1
225  %rc = insertelement <4 x float> %rb, float %s2, i32 0
226  %rd = insertelement <4 x float> %rc, float %s3, i32 3
227  ret <4 x float> %rd
228}
229
230declare void @v4f32_user(<4 x float>) #0
231declare void @f32_user(float) #0
232
233; Multiple users of the final constructed vector
234define <4 x float> @simple_select_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
235; CHECK-LABEL: @simple_select_users(
236; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
237; CHECK-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
238; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
239; CHECK-NEXT:    [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
240; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
241; CHECK-NEXT:    [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP4]], i32 1
242; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2
243; CHECK-NEXT:    [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[TMP5]], i32 2
244; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
245; CHECK-NEXT:    [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP6]], i32 3
246; CHECK-NEXT:    call void @v4f32_user(<4 x float> [[RD]]) #0
247; CHECK-NEXT:    ret <4 x float> [[RD]]
248;
249; ZEROTHRESH-LABEL: @simple_select_users(
250; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = icmp ne <4 x i32> %c, zeroinitializer
251; ZEROTHRESH-NEXT:    [[TMP2:%.*]] = select <4 x i1> [[TMP1]], <4 x float> %a, <4 x float> %b
252; ZEROTHRESH-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP2]], i32 0
253; ZEROTHRESH-NEXT:    [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP3]], i32 0
254; ZEROTHRESH-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP2]], i32 1
255; ZEROTHRESH-NEXT:    [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP4]], i32 1
256; ZEROTHRESH-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP2]], i32 2
257; ZEROTHRESH-NEXT:    [[RC:%.*]] = insertelement <4 x float> [[RB]], float [[TMP5]], i32 2
258; ZEROTHRESH-NEXT:    [[TMP6:%.*]] = extractelement <4 x float> [[TMP2]], i32 3
259; ZEROTHRESH-NEXT:    [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP6]], i32 3
260; ZEROTHRESH-NEXT:    call void @v4f32_user(<4 x float> [[RD]]) #0
261; ZEROTHRESH-NEXT:    ret <4 x float> [[RD]]
262;
263  %c0 = extractelement <4 x i32> %c, i32 0
264  %c1 = extractelement <4 x i32> %c, i32 1
265  %c2 = extractelement <4 x i32> %c, i32 2
266  %c3 = extractelement <4 x i32> %c, i32 3
267  %a0 = extractelement <4 x float> %a, i32 0
268  %a1 = extractelement <4 x float> %a, i32 1
269  %a2 = extractelement <4 x float> %a, i32 2
270  %a3 = extractelement <4 x float> %a, i32 3
271  %b0 = extractelement <4 x float> %b, i32 0
272  %b1 = extractelement <4 x float> %b, i32 1
273  %b2 = extractelement <4 x float> %b, i32 2
274  %b3 = extractelement <4 x float> %b, i32 3
275  %cmp0 = icmp ne i32 %c0, 0
276  %cmp1 = icmp ne i32 %c1, 0
277  %cmp2 = icmp ne i32 %c2, 0
278  %cmp3 = icmp ne i32 %c3, 0
279  %s0 = select i1 %cmp0, float %a0, float %b0
280  %s1 = select i1 %cmp1, float %a1, float %b1
281  %s2 = select i1 %cmp2, float %a2, float %b2
282  %s3 = select i1 %cmp3, float %a3, float %b3
283  %ra = insertelement <4 x float> undef, float %s0, i32 0
284  %rb = insertelement <4 x float> %ra, float %s1, i32 1
285  %rc = insertelement <4 x float> %rb, float %s2, i32 2
286  %rd = insertelement <4 x float> %rc, float %s3, i32 3
287  call void @v4f32_user(<4 x float> %rd) #0
288  ret <4 x float> %rd
289}
290
291; Unused insertelement
292define <4 x float> @simple_select_no_users(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
293; CHECK-LABEL: @simple_select_no_users(
294; CHECK-NEXT:    [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
295; CHECK-NEXT:    [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
296; CHECK-NEXT:    [[C2:%.*]] = extractelement <4 x i32> %c, i32 2
297; CHECK-NEXT:    [[C3:%.*]] = extractelement <4 x i32> %c, i32 3
298; CHECK-NEXT:    [[A0:%.*]] = extractelement <4 x float> %a, i32 0
299; CHECK-NEXT:    [[A1:%.*]] = extractelement <4 x float> %a, i32 1
300; CHECK-NEXT:    [[A2:%.*]] = extractelement <4 x float> %a, i32 2
301; CHECK-NEXT:    [[A3:%.*]] = extractelement <4 x float> %a, i32 3
302; CHECK-NEXT:    [[B0:%.*]] = extractelement <4 x float> %b, i32 0
303; CHECK-NEXT:    [[B1:%.*]] = extractelement <4 x float> %b, i32 1
304; CHECK-NEXT:    [[B2:%.*]] = extractelement <4 x float> %b, i32 2
305; CHECK-NEXT:    [[B3:%.*]] = extractelement <4 x float> %b, i32 3
306; CHECK-NEXT:    [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
307; CHECK-NEXT:    [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
308; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C2]], i32 0
309; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C3]], i32 1
310; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
311; CHECK-NEXT:    [[S0:%.*]] = select i1 [[CMP0]], float [[A0]], float [[B0]]
312; CHECK-NEXT:    [[S1:%.*]] = select i1 [[CMP1]], float [[A1]], float [[B1]]
313; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> undef, float [[A2]], i32 0
314; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[A3]], i32 1
315; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x float> undef, float [[B2]], i32 0
316; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B3]], i32 1
317; CHECK-NEXT:    [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP5]], <2 x float> [[TMP7]]
318; CHECK-NEXT:    [[RA:%.*]] = insertelement <4 x float> undef, float [[S0]], i32 0
319; CHECK-NEXT:    [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[S1]], i32 1
320; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
321; CHECK-NEXT:    [[RC:%.*]] = insertelement <4 x float> undef, float [[TMP9]], i32 2
322; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
323; CHECK-NEXT:    [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[TMP10]], i32 3
324; CHECK-NEXT:    ret <4 x float> [[RD]]
325;
326; ZEROTHRESH-LABEL: @simple_select_no_users(
327; ZEROTHRESH-NEXT:    [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
328; ZEROTHRESH-NEXT:    [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
329; ZEROTHRESH-NEXT:    [[C2:%.*]] = extractelement <4 x i32> %c, i32 2
330; ZEROTHRESH-NEXT:    [[C3:%.*]] = extractelement <4 x i32> %c, i32 3
331; ZEROTHRESH-NEXT:    [[A0:%.*]] = extractelement <4 x float> %a, i32 0
332; ZEROTHRESH-NEXT:    [[A1:%.*]] = extractelement <4 x float> %a, i32 1
333; ZEROTHRESH-NEXT:    [[A2:%.*]] = extractelement <4 x float> %a, i32 2
334; ZEROTHRESH-NEXT:    [[A3:%.*]] = extractelement <4 x float> %a, i32 3
335; ZEROTHRESH-NEXT:    [[B0:%.*]] = extractelement <4 x float> %b, i32 0
336; ZEROTHRESH-NEXT:    [[B1:%.*]] = extractelement <4 x float> %b, i32 1
337; ZEROTHRESH-NEXT:    [[B2:%.*]] = extractelement <4 x float> %b, i32 2
338; ZEROTHRESH-NEXT:    [[B3:%.*]] = extractelement <4 x float> %b, i32 3
339; ZEROTHRESH-NEXT:    [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
340; ZEROTHRESH-NEXT:    [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
341; ZEROTHRESH-NEXT:    [[CMP2:%.*]] = icmp ne i32 [[C2]], 0
342; ZEROTHRESH-NEXT:    [[CMP3:%.*]] = icmp ne i32 [[C3]], 0
343; ZEROTHRESH-NEXT:    [[S0:%.*]] = select i1 [[CMP0]], float [[A0]], float [[B0]]
344; ZEROTHRESH-NEXT:    [[S1:%.*]] = select i1 [[CMP1]], float [[A1]], float [[B1]]
345; ZEROTHRESH-NEXT:    [[S2:%.*]] = select i1 [[CMP2]], float [[A2]], float [[B2]]
346; ZEROTHRESH-NEXT:    [[S3:%.*]] = select i1 [[CMP3]], float [[A3]], float [[B3]]
347; ZEROTHRESH-NEXT:    [[RA:%.*]] = insertelement <4 x float> undef, float [[S0]], i32 0
348; ZEROTHRESH-NEXT:    [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[S1]], i32 1
349; ZEROTHRESH-NEXT:    [[RC:%.*]] = insertelement <4 x float> undef, float [[S2]], i32 2
350; ZEROTHRESH-NEXT:    [[RD:%.*]] = insertelement <4 x float> [[RC]], float [[S3]], i32 3
351; ZEROTHRESH-NEXT:    ret <4 x float> [[RD]]
352;
353  %c0 = extractelement <4 x i32> %c, i32 0
354  %c1 = extractelement <4 x i32> %c, i32 1
355  %c2 = extractelement <4 x i32> %c, i32 2
356  %c3 = extractelement <4 x i32> %c, i32 3
357  %a0 = extractelement <4 x float> %a, i32 0
358  %a1 = extractelement <4 x float> %a, i32 1
359  %a2 = extractelement <4 x float> %a, i32 2
360  %a3 = extractelement <4 x float> %a, i32 3
361  %b0 = extractelement <4 x float> %b, i32 0
362  %b1 = extractelement <4 x float> %b, i32 1
363  %b2 = extractelement <4 x float> %b, i32 2
364  %b3 = extractelement <4 x float> %b, i32 3
365  %cmp0 = icmp ne i32 %c0, 0
366  %cmp1 = icmp ne i32 %c1, 0
367  %cmp2 = icmp ne i32 %c2, 0
368  %cmp3 = icmp ne i32 %c3, 0
369  %s0 = select i1 %cmp0, float %a0, float %b0
370  %s1 = select i1 %cmp1, float %a1, float %b1
371  %s2 = select i1 %cmp2, float %a2, float %b2
372  %s3 = select i1 %cmp3, float %a3, float %b3
373  %ra = insertelement <4 x float> undef, float %s0, i32 0
374  %rb = insertelement <4 x float> %ra, float %s1, i32 1
375  %rc = insertelement <4 x float> undef, float %s2, i32 2
376  %rd = insertelement <4 x float> %rc, float %s3, i32 3
377  ret <4 x float> %rd
378}
379
380; Make sure infinite loop doesn't happen which I ran into when trying
381; to do this backwards this backwards
382define <4 x i32> @reconstruct(<4 x i32> %c) #0 {
383; CHECK-LABEL: @reconstruct(
384; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> %c, i32 0
385; CHECK-NEXT:    [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0
386; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> %c, i32 1
387; CHECK-NEXT:    [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP2]], i32 1
388; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> %c, i32 2
389; CHECK-NEXT:    [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP3]], i32 2
390; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> %c, i32 3
391; CHECK-NEXT:    [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP4]], i32 3
392; CHECK-NEXT:    ret <4 x i32> [[RD]]
393;
394; ZEROTHRESH-LABEL: @reconstruct(
395; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> %c, i32 0
396; ZEROTHRESH-NEXT:    [[RA:%.*]] = insertelement <4 x i32> undef, i32 [[TMP1]], i32 0
397; ZEROTHRESH-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> %c, i32 1
398; ZEROTHRESH-NEXT:    [[RB:%.*]] = insertelement <4 x i32> [[RA]], i32 [[TMP2]], i32 1
399; ZEROTHRESH-NEXT:    [[TMP3:%.*]] = extractelement <4 x i32> %c, i32 2
400; ZEROTHRESH-NEXT:    [[RC:%.*]] = insertelement <4 x i32> [[RB]], i32 [[TMP3]], i32 2
401; ZEROTHRESH-NEXT:    [[TMP4:%.*]] = extractelement <4 x i32> %c, i32 3
402; ZEROTHRESH-NEXT:    [[RD:%.*]] = insertelement <4 x i32> [[RC]], i32 [[TMP4]], i32 3
403; ZEROTHRESH-NEXT:    ret <4 x i32> [[RD]]
404;
405  %c0 = extractelement <4 x i32> %c, i32 0
406  %c1 = extractelement <4 x i32> %c, i32 1
407  %c2 = extractelement <4 x i32> %c, i32 2
408  %c3 = extractelement <4 x i32> %c, i32 3
409  %ra = insertelement <4 x i32> undef, i32 %c0, i32 0
410  %rb = insertelement <4 x i32> %ra, i32 %c1, i32 1
411  %rc = insertelement <4 x i32> %rb, i32 %c2, i32 2
412  %rd = insertelement <4 x i32> %rc, i32 %c3, i32 3
413  ret <4 x i32> %rd
414}
415
416define <2 x float> @simple_select_v2(<2 x float> %a, <2 x float> %b, <2 x i32> %c) #0 {
417; CHECK-LABEL: @simple_select_v2(
418; CHECK-NEXT:    [[TMP1:%.*]] = icmp ne <2 x i32> %c, zeroinitializer
419; CHECK-NEXT:    [[TMP2:%.*]] = select <2 x i1> [[TMP1]], <2 x float> %a, <2 x float> %b
420; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <2 x float> [[TMP2]], i32 0
421; CHECK-NEXT:    [[RA:%.*]] = insertelement <2 x float> undef, float [[TMP3]], i32 0
422; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <2 x float> [[TMP2]], i32 1
423; CHECK-NEXT:    [[RB:%.*]] = insertelement <2 x float> [[RA]], float [[TMP4]], i32 1
424; CHECK-NEXT:    ret <2 x float> [[RB]]
425;
426; ZEROTHRESH-LABEL: @simple_select_v2(
427; ZEROTHRESH-NEXT:    [[C0:%.*]] = extractelement <2 x i32> %c, i32 0
428; ZEROTHRESH-NEXT:    [[C1:%.*]] = extractelement <2 x i32> %c, i32 1
429; ZEROTHRESH-NEXT:    [[A0:%.*]] = extractelement <2 x float> %a, i32 0
430; ZEROTHRESH-NEXT:    [[A1:%.*]] = extractelement <2 x float> %a, i32 1
431; ZEROTHRESH-NEXT:    [[B0:%.*]] = extractelement <2 x float> %b, i32 0
432; ZEROTHRESH-NEXT:    [[B1:%.*]] = extractelement <2 x float> %b, i32 1
433; ZEROTHRESH-NEXT:    [[CMP0:%.*]] = icmp ne i32 [[C0]], 0
434; ZEROTHRESH-NEXT:    [[CMP1:%.*]] = icmp ne i32 [[C1]], 0
435; ZEROTHRESH-NEXT:    [[S0:%.*]] = select i1 [[CMP0]], float [[A0]], float [[B0]]
436; ZEROTHRESH-NEXT:    [[S1:%.*]] = select i1 [[CMP1]], float [[A1]], float [[B1]]
437; ZEROTHRESH-NEXT:    [[RA:%.*]] = insertelement <2 x float> undef, float [[S0]], i32 0
438; ZEROTHRESH-NEXT:    [[RB:%.*]] = insertelement <2 x float> [[RA]], float [[S1]], i32 1
439; ZEROTHRESH-NEXT:    ret <2 x float> [[RB]]
440;
441  %c0 = extractelement <2 x i32> %c, i32 0
442  %c1 = extractelement <2 x i32> %c, i32 1
443  %a0 = extractelement <2 x float> %a, i32 0
444  %a1 = extractelement <2 x float> %a, i32 1
445  %b0 = extractelement <2 x float> %b, i32 0
446  %b1 = extractelement <2 x float> %b, i32 1
447  %cmp0 = icmp ne i32 %c0, 0
448  %cmp1 = icmp ne i32 %c1, 0
449  %s0 = select i1 %cmp0, float %a0, float %b0
450  %s1 = select i1 %cmp1, float %a1, float %b1
451  %ra = insertelement <2 x float> undef, float %s0, i32 0
452  %rb = insertelement <2 x float> %ra, float %s1, i32 1
453  ret <2 x float> %rb
454}
455
456; Make sure when we construct partial vectors, we don't keep
457; re-visiting the insertelement chains starting with undef
458; (low cost threshold needed to force this to happen)
459define <4 x float> @simple_select_partial_vector(<4 x float> %a, <4 x float> %b, <4 x i32> %c) #0 {
460; CHECK-LABEL: @simple_select_partial_vector(
461; CHECK-NEXT:    [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
462; CHECK-NEXT:    [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
463; CHECK-NEXT:    [[A0:%.*]] = extractelement <4 x float> %a, i32 0
464; CHECK-NEXT:    [[A1:%.*]] = extractelement <4 x float> %a, i32 1
465; CHECK-NEXT:    [[B0:%.*]] = extractelement <4 x float> %b, i32 0
466; CHECK-NEXT:    [[B1:%.*]] = extractelement <4 x float> %b, i32 1
467; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0
468; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
469; CHECK-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
470; CHECK-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> undef, float [[A0]], i32 0
471; CHECK-NEXT:    [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[A1]], i32 1
472; CHECK-NEXT:    [[TMP6:%.*]] = insertelement <2 x float> undef, float [[B0]], i32 0
473; CHECK-NEXT:    [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B1]], i32 1
474; CHECK-NEXT:    [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP5]], <2 x float> [[TMP7]]
475; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
476; CHECK-NEXT:    [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP9]], i32 0
477; CHECK-NEXT:    [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
478; CHECK-NEXT:    [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP10]], i32 1
479; CHECK-NEXT:    ret <4 x float> [[RB]]
480;
481; ZEROTHRESH-LABEL: @simple_select_partial_vector(
482; ZEROTHRESH-NEXT:    [[C0:%.*]] = extractelement <4 x i32> %c, i32 0
483; ZEROTHRESH-NEXT:    [[C1:%.*]] = extractelement <4 x i32> %c, i32 1
484; ZEROTHRESH-NEXT:    [[A0:%.*]] = extractelement <4 x float> %a, i32 0
485; ZEROTHRESH-NEXT:    [[A1:%.*]] = extractelement <4 x float> %a, i32 1
486; ZEROTHRESH-NEXT:    [[B0:%.*]] = extractelement <4 x float> %b, i32 0
487; ZEROTHRESH-NEXT:    [[B1:%.*]] = extractelement <4 x float> %b, i32 1
488; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = insertelement <2 x i32> undef, i32 [[C0]], i32 0
489; ZEROTHRESH-NEXT:    [[TMP2:%.*]] = insertelement <2 x i32> [[TMP1]], i32 [[C1]], i32 1
490; ZEROTHRESH-NEXT:    [[TMP3:%.*]] = icmp ne <2 x i32> [[TMP2]], zeroinitializer
491; ZEROTHRESH-NEXT:    [[TMP4:%.*]] = insertelement <2 x float> undef, float [[A0]], i32 0
492; ZEROTHRESH-NEXT:    [[TMP5:%.*]] = insertelement <2 x float> [[TMP4]], float [[A1]], i32 1
493; ZEROTHRESH-NEXT:    [[TMP6:%.*]] = insertelement <2 x float> undef, float [[B0]], i32 0
494; ZEROTHRESH-NEXT:    [[TMP7:%.*]] = insertelement <2 x float> [[TMP6]], float [[B1]], i32 1
495; ZEROTHRESH-NEXT:    [[TMP8:%.*]] = select <2 x i1> [[TMP3]], <2 x float> [[TMP5]], <2 x float> [[TMP7]]
496; ZEROTHRESH-NEXT:    [[TMP9:%.*]] = extractelement <2 x float> [[TMP8]], i32 0
497; ZEROTHRESH-NEXT:    [[RA:%.*]] = insertelement <4 x float> undef, float [[TMP9]], i32 0
498; ZEROTHRESH-NEXT:    [[TMP10:%.*]] = extractelement <2 x float> [[TMP8]], i32 1
499; ZEROTHRESH-NEXT:    [[RB:%.*]] = insertelement <4 x float> [[RA]], float [[TMP10]], i32 1
500; ZEROTHRESH-NEXT:    ret <4 x float> [[RB]]
501;
502  %c0 = extractelement <4 x i32> %c, i32 0
503  %c1 = extractelement <4 x i32> %c, i32 1
504  %a0 = extractelement <4 x float> %a, i32 0
505  %a1 = extractelement <4 x float> %a, i32 1
506  %b0 = extractelement <4 x float> %b, i32 0
507  %b1 = extractelement <4 x float> %b, i32 1
508  %1 = insertelement <2 x i32> undef, i32 %c0, i32 0
509  %2 = insertelement <2 x i32> %1, i32 %c1, i32 1
510  %3 = icmp ne <2 x i32> %2, zeroinitializer
511  %4 = insertelement <2 x float> undef, float %a0, i32 0
512  %5 = insertelement <2 x float> %4, float %a1, i32 1
513  %6 = insertelement <2 x float> undef, float %b0, i32 0
514  %7 = insertelement <2 x float> %6, float %b1, i32 1
515  %8 = select <2 x i1> %3, <2 x float> %5, <2 x float> %7
516  %9 = extractelement <2 x float> %8, i32 0
517  %ra = insertelement <4 x float> undef, float %9, i32 0
518  %10 = extractelement <2 x float> %8, i32 1
519  %rb = insertelement <4 x float> %ra, float %10, i32 1
520  ret <4 x float> %rb
521}
522
523; Make sure that vectorization happens even if insertelements operations
524; must be rescheduled. The case here is from compiling Julia.
525define <4 x float> @reschedule_extract(<4 x float> %a, <4 x float> %b) {
526; CHECK-LABEL: @reschedule_extract(
527; CHECK-NEXT:    [[TMP1:%.*]] = fadd <4 x float> %a, %b
528; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
529; CHECK-NEXT:    [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
530; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
531; CHECK-NEXT:    [[V1:%.*]] = insertelement <4 x float> [[V0]], float [[TMP3]], i32 1
532; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
533; CHECK-NEXT:    [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[TMP4]], i32 2
534; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
535; CHECK-NEXT:    [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[TMP5]], i32 3
536; CHECK-NEXT:    ret <4 x float> [[V3]]
537;
538; ZEROTHRESH-LABEL: @reschedule_extract(
539; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = fadd <4 x float> %a, %b
540; ZEROTHRESH-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
541; ZEROTHRESH-NEXT:    [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
542; ZEROTHRESH-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
543; ZEROTHRESH-NEXT:    [[V1:%.*]] = insertelement <4 x float> [[V0]], float [[TMP3]], i32 1
544; ZEROTHRESH-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
545; ZEROTHRESH-NEXT:    [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[TMP4]], i32 2
546; ZEROTHRESH-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
547; ZEROTHRESH-NEXT:    [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[TMP5]], i32 3
548; ZEROTHRESH-NEXT:    ret <4 x float> [[V3]]
549;
550  %a0 = extractelement <4 x float> %a, i32 0
551  %b0 = extractelement <4 x float> %b, i32 0
552  %c0 = fadd float %a0, %b0
553  %v0 = insertelement <4 x float> undef, float %c0, i32 0
554  %a1 = extractelement <4 x float> %a, i32 1
555  %b1 = extractelement <4 x float> %b, i32 1
556  %c1 = fadd float %a1, %b1
557  %v1 = insertelement <4 x float> %v0, float %c1, i32 1
558  %a2 = extractelement <4 x float> %a, i32 2
559  %b2 = extractelement <4 x float> %b, i32 2
560  %c2 = fadd float %a2, %b2
561  %v2 = insertelement <4 x float> %v1, float %c2, i32 2
562  %a3 = extractelement <4 x float> %a, i32 3
563  %b3 = extractelement <4 x float> %b, i32 3
564  %c3 = fadd float %a3, %b3
565  %v3 = insertelement <4 x float> %v2, float %c3, i32 3
566  ret <4 x float> %v3
567}
568
569; Check that cost model for vectorization takes credit for
570; instructions that are erased.
571define <4 x float> @take_credit(<4 x float> %a, <4 x float> %b) {
572; CHECK-LABEL: @take_credit(
573; CHECK-NEXT:    [[TMP1:%.*]] = fadd <4 x float> %a, %b
574; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
575; CHECK-NEXT:    [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
576; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
577; CHECK-NEXT:    [[V1:%.*]] = insertelement <4 x float> [[V0]], float [[TMP3]], i32 1
578; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
579; CHECK-NEXT:    [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[TMP4]], i32 2
580; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
581; CHECK-NEXT:    [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[TMP5]], i32 3
582; CHECK-NEXT:    ret <4 x float> [[V3]]
583;
584; ZEROTHRESH-LABEL: @take_credit(
585; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = fadd <4 x float> %a, %b
586; ZEROTHRESH-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[TMP1]], i32 0
587; ZEROTHRESH-NEXT:    [[V0:%.*]] = insertelement <4 x float> undef, float [[TMP2]], i32 0
588; ZEROTHRESH-NEXT:    [[TMP3:%.*]] = extractelement <4 x float> [[TMP1]], i32 1
589; ZEROTHRESH-NEXT:    [[V1:%.*]] = insertelement <4 x float> [[V0]], float [[TMP3]], i32 1
590; ZEROTHRESH-NEXT:    [[TMP4:%.*]] = extractelement <4 x float> [[TMP1]], i32 2
591; ZEROTHRESH-NEXT:    [[V2:%.*]] = insertelement <4 x float> [[V1]], float [[TMP4]], i32 2
592; ZEROTHRESH-NEXT:    [[TMP5:%.*]] = extractelement <4 x float> [[TMP1]], i32 3
593; ZEROTHRESH-NEXT:    [[V3:%.*]] = insertelement <4 x float> [[V2]], float [[TMP5]], i32 3
594; ZEROTHRESH-NEXT:    ret <4 x float> [[V3]]
595;
596  %a0 = extractelement <4 x float> %a, i32 0
597  %b0 = extractelement <4 x float> %b, i32 0
598  %c0 = fadd float %a0, %b0
599  %a1 = extractelement <4 x float> %a, i32 1
600  %b1 = extractelement <4 x float> %b, i32 1
601  %c1 = fadd float %a1, %b1
602  %a2 = extractelement <4 x float> %a, i32 2
603  %b2 = extractelement <4 x float> %b, i32 2
604  %c2 = fadd float %a2, %b2
605  %a3 = extractelement <4 x float> %a, i32 3
606  %b3 = extractelement <4 x float> %b, i32 3
607  %c3 = fadd float %a3, %b3
608  %v0 = insertelement <4 x float> undef, float %c0, i32 0
609  %v1 = insertelement <4 x float> %v0, float %c1, i32 1
610  %v2 = insertelement <4 x float> %v1, float %c2, i32 2
611  %v3 = insertelement <4 x float> %v2, float %c3, i32 3
612  ret <4 x float> %v3
613}
614
615; Make sure we handle multiple trees that feed one build vector correctly.
616define <4 x double> @multi_tree(double %w, double %x, double %y, double %z) {
617; CHECK-LABEL: @multi_tree(
618; CHECK-NEXT:  entry:
619; CHECK-NEXT:    [[TMP0:%.*]] = insertelement <4 x double> undef, double %w, i32 0
620; CHECK-NEXT:    [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double %x, i32 1
621; CHECK-NEXT:    [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double %y, i32 2
622; CHECK-NEXT:    [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double %z, i32 3
623; CHECK-NEXT:    [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>
624; CHECK-NEXT:    [[TMP5:%.*]] = fmul <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[TMP4]]
625; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0
626; CHECK-NEXT:    [[I1:%.*]] = insertelement <4 x double> undef, double [[TMP6]], i32 3
627; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <4 x double> [[TMP5]], i32 1
628; CHECK-NEXT:    [[I2:%.*]] = insertelement <4 x double> [[I1]], double [[TMP7]], i32 2
629; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <4 x double> [[TMP5]], i32 2
630; CHECK-NEXT:    [[I3:%.*]] = insertelement <4 x double> [[I2]], double [[TMP8]], i32 1
631; CHECK-NEXT:    [[TMP9:%.*]] = extractelement <4 x double> [[TMP5]], i32 3
632; CHECK-NEXT:    [[I4:%.*]] = insertelement <4 x double> [[I3]], double [[TMP9]], i32 0
633; CHECK-NEXT:    ret <4 x double> [[I4]]
634;
635; ZEROTHRESH-LABEL: @multi_tree(
636; ZEROTHRESH-NEXT:  entry:
637; ZEROTHRESH-NEXT:    [[TMP0:%.*]] = insertelement <4 x double> undef, double %w, i32 0
638; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = insertelement <4 x double> [[TMP0]], double %x, i32 1
639; ZEROTHRESH-NEXT:    [[TMP2:%.*]] = insertelement <4 x double> [[TMP1]], double %y, i32 2
640; ZEROTHRESH-NEXT:    [[TMP3:%.*]] = insertelement <4 x double> [[TMP2]], double %z, i32 3
641; ZEROTHRESH-NEXT:    [[TMP4:%.*]] = fadd <4 x double> [[TMP3]], <double 0.000000e+00, double 1.000000e+00, double 2.000000e+00, double 3.000000e+00>
642; ZEROTHRESH-NEXT:    [[TMP5:%.*]] = fmul <4 x double> <double 1.000000e+00, double 1.000000e+00, double 1.000000e+00, double 1.000000e+00>, [[TMP4]]
643; ZEROTHRESH-NEXT:    [[TMP6:%.*]] = extractelement <4 x double> [[TMP5]], i32 0
644; ZEROTHRESH-NEXT:    [[I1:%.*]] = insertelement <4 x double> undef, double [[TMP6]], i32 3
645; ZEROTHRESH-NEXT:    [[TMP7:%.*]] = extractelement <4 x double> [[TMP5]], i32 1
646; ZEROTHRESH-NEXT:    [[I2:%.*]] = insertelement <4 x double> [[I1]], double [[TMP7]], i32 2
647; ZEROTHRESH-NEXT:    [[TMP8:%.*]] = extractelement <4 x double> [[TMP5]], i32 2
648; ZEROTHRESH-NEXT:    [[I3:%.*]] = insertelement <4 x double> [[I2]], double [[TMP8]], i32 1
649; ZEROTHRESH-NEXT:    [[TMP9:%.*]] = extractelement <4 x double> [[TMP5]], i32 3
650; ZEROTHRESH-NEXT:    [[I4:%.*]] = insertelement <4 x double> [[I3]], double [[TMP9]], i32 0
651; ZEROTHRESH-NEXT:    ret <4 x double> [[I4]]
652;
653entry:
654  %t0 = fadd double %w , 0.000000e+00
655  %t1 = fadd double %x , 1.000000e+00
656  %t2 = fadd double %y , 2.000000e+00
657  %t3 = fadd double %z , 3.000000e+00
658  %t4 = fmul double %t0, 1.000000e+00
659  %i1 = insertelement <4 x double> undef, double %t4, i32 3
660  %t5 = fmul double %t1, 1.000000e+00
661  %i2 = insertelement <4 x double> %i1, double %t5, i32 2
662  %t6 = fmul double %t2, 1.000000e+00
663  %i3 = insertelement <4 x double> %i2, double %t6, i32 1
664  %t7 = fmul double %t3, 1.000000e+00
665  %i4 = insertelement <4 x double> %i3, double %t7, i32 0
666  ret <4 x double> %i4
667}
668
669define <8 x float> @_vadd256(<8 x float> %a, <8 x float> %b) local_unnamed_addr #0 {
670; CHECK-LABEL: @_vadd256(
671; CHECK-NEXT:  entry:
672; CHECK-NEXT:    [[TMP0:%.*]] = fadd <8 x float> %a, %b
673; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x float> [[TMP0]], i32 0
674; CHECK-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x float> undef, float [[TMP1]], i32 0
675; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[TMP0]], i32 1
676; CHECK-NEXT:    [[VECINIT1_I:%.*]] = insertelement <8 x float> [[VECINIT_I]], float [[TMP2]], i32 1
677; CHECK-NEXT:    [[TMP3:%.*]] = extractelement <8 x float> [[TMP0]], i32 2
678; CHECK-NEXT:    [[VECINIT2_I:%.*]] = insertelement <8 x float> [[VECINIT1_I]], float [[TMP3]], i32 2
679; CHECK-NEXT:    [[TMP4:%.*]] = extractelement <8 x float> [[TMP0]], i32 3
680; CHECK-NEXT:    [[VECINIT3_I:%.*]] = insertelement <8 x float> [[VECINIT2_I]], float [[TMP4]], i32 3
681; CHECK-NEXT:    [[TMP5:%.*]] = extractelement <8 x float> [[TMP0]], i32 4
682; CHECK-NEXT:    [[VECINIT4_I:%.*]] = insertelement <8 x float> [[VECINIT3_I]], float [[TMP5]], i32 4
683; CHECK-NEXT:    [[TMP6:%.*]] = extractelement <8 x float> [[TMP0]], i32 5
684; CHECK-NEXT:    [[VECINIT5_I:%.*]] = insertelement <8 x float> [[VECINIT4_I]], float [[TMP6]], i32 5
685; CHECK-NEXT:    [[TMP7:%.*]] = extractelement <8 x float> [[TMP0]], i32 6
686; CHECK-NEXT:    [[VECINIT6_I:%.*]] = insertelement <8 x float> [[VECINIT5_I]], float [[TMP7]], i32 6
687; CHECK-NEXT:    [[TMP8:%.*]] = extractelement <8 x float> [[TMP0]], i32 7
688; CHECK-NEXT:    [[VECINIT7_I:%.*]] = insertelement <8 x float> [[VECINIT6_I]], float [[TMP8]], i32 7
689; CHECK-NEXT:    ret <8 x float> [[VECINIT7_I]]
690;
691; ZEROTHRESH-LABEL: @_vadd256(
692; ZEROTHRESH-NEXT:  entry:
693; ZEROTHRESH-NEXT:    [[TMP0:%.*]] = fadd <8 x float> %a, %b
694; ZEROTHRESH-NEXT:    [[TMP1:%.*]] = extractelement <8 x float> [[TMP0]], i32 0
695; ZEROTHRESH-NEXT:    [[VECINIT_I:%.*]] = insertelement <8 x float> undef, float [[TMP1]], i32 0
696; ZEROTHRESH-NEXT:    [[TMP2:%.*]] = extractelement <8 x float> [[TMP0]], i32 1
697; ZEROTHRESH-NEXT:    [[VECINIT1_I:%.*]] = insertelement <8 x float> [[VECINIT_I]], float [[TMP2]], i32 1
698; ZEROTHRESH-NEXT:    [[TMP3:%.*]] = extractelement <8 x float> [[TMP0]], i32 2
699; ZEROTHRESH-NEXT:    [[VECINIT2_I:%.*]] = insertelement <8 x float> [[VECINIT1_I]], float [[TMP3]], i32 2
700; ZEROTHRESH-NEXT:    [[TMP4:%.*]] = extractelement <8 x float> [[TMP0]], i32 3
701; ZEROTHRESH-NEXT:    [[VECINIT3_I:%.*]] = insertelement <8 x float> [[VECINIT2_I]], float [[TMP4]], i32 3
702; ZEROTHRESH-NEXT:    [[TMP5:%.*]] = extractelement <8 x float> [[TMP0]], i32 4
703; ZEROTHRESH-NEXT:    [[VECINIT4_I:%.*]] = insertelement <8 x float> [[VECINIT3_I]], float [[TMP5]], i32 4
704; ZEROTHRESH-NEXT:    [[TMP6:%.*]] = extractelement <8 x float> [[TMP0]], i32 5
705; ZEROTHRESH-NEXT:    [[VECINIT5_I:%.*]] = insertelement <8 x float> [[VECINIT4_I]], float [[TMP6]], i32 5
706; ZEROTHRESH-NEXT:    [[TMP7:%.*]] = extractelement <8 x float> [[TMP0]], i32 6
707; ZEROTHRESH-NEXT:    [[VECINIT6_I:%.*]] = insertelement <8 x float> [[VECINIT5_I]], float [[TMP7]], i32 6
708; ZEROTHRESH-NEXT:    [[TMP8:%.*]] = extractelement <8 x float> [[TMP0]], i32 7
709; ZEROTHRESH-NEXT:    [[VECINIT7_I:%.*]] = insertelement <8 x float> [[VECINIT6_I]], float [[TMP8]], i32 7
710; ZEROTHRESH-NEXT:    ret <8 x float> [[VECINIT7_I]]
711;
712  entry:
713  %vecext = extractelement <8 x float> %a, i32 0
714  %vecext1 = extractelement <8 x float> %b, i32 0
715  %add = fadd float %vecext, %vecext1
716  %vecext2 = extractelement <8 x float> %a, i32 1
717  %vecext3 = extractelement <8 x float> %b, i32 1
718  %add4 = fadd float %vecext2, %vecext3
719  %vecext5 = extractelement <8 x float> %a, i32 2
720  %vecext6 = extractelement <8 x float> %b, i32 2
721  %add7 = fadd float %vecext5, %vecext6
722  %vecext8 = extractelement <8 x float> %a, i32 3
723  %vecext9 = extractelement <8 x float> %b, i32 3
724  %add10 = fadd float %vecext8, %vecext9
725  %vecext11 = extractelement <8 x float> %a, i32 4
726  %vecext12 = extractelement <8 x float> %b, i32 4
727  %add13 = fadd float %vecext11, %vecext12
728  %vecext14 = extractelement <8 x float> %a, i32 5
729  %vecext15 = extractelement <8 x float> %b, i32 5
730  %add16 = fadd float %vecext14, %vecext15
731  %vecext17 = extractelement <8 x float> %a, i32 6
732  %vecext18 = extractelement <8 x float> %b, i32 6
733  %add19 = fadd float %vecext17, %vecext18
734  %vecext20 = extractelement <8 x float> %a, i32 7
735  %vecext21 = extractelement <8 x float> %b, i32 7
736  %add22 = fadd float %vecext20, %vecext21
737  %vecinit.i = insertelement <8 x float> undef, float %add, i32 0
738  %vecinit1.i = insertelement <8 x float> %vecinit.i, float %add4, i32 1
739  %vecinit2.i = insertelement <8 x float> %vecinit1.i, float %add7, i32 2
740  %vecinit3.i = insertelement <8 x float> %vecinit2.i, float %add10, i32 3
741  %vecinit4.i = insertelement <8 x float> %vecinit3.i, float %add13, i32 4
742  %vecinit5.i = insertelement <8 x float> %vecinit4.i, float %add16, i32 5
743  %vecinit6.i = insertelement <8 x float> %vecinit5.i, float %add19, i32 6
744  %vecinit7.i = insertelement <8 x float> %vecinit6.i, float %add22, i32 7
745  ret <8 x float> %vecinit7.i
746}
747
748attributes #0 = { nounwind ssp uwtable "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" }
749