1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -slp-vectorizer -S < %s -mtriple=x86_64-unknown-linux -mcpu=corei7-avx -verify | FileCheck %s -check-prefix=ENABLED
3;
4; Without supernode operand reordering, this does not get fully vectorized.
5; S[0] = (A[0] + B[0]) + C[0]
6; S[1] = (B[1] + C[1]) + A[1]
7define void @test_supernode_add(double* %Aarray, double* %Barray, double *%Carray, double *%Sarray) {
8; ENABLED-LABEL: @test_supernode_add(
9; ENABLED-NEXT:  entry:
10; ENABLED-NEXT:    [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0
11; ENABLED-NEXT:    [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1
12; ENABLED-NEXT:    [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0
13; ENABLED-NEXT:    [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1
14; ENABLED-NEXT:    [[IDXC0:%.*]] = getelementptr inbounds double, double* [[CARRAY:%.*]], i64 0
15; ENABLED-NEXT:    [[IDXC1:%.*]] = getelementptr inbounds double, double* [[CARRAY]], i64 1
16; ENABLED-NEXT:    [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0
17; ENABLED-NEXT:    [[IDXS1:%.*]] = getelementptr inbounds double, double* [[SARRAY]], i64 1
18; ENABLED-NEXT:    [[A0:%.*]] = load double, double* [[IDXA0]], align 8
19; ENABLED-NEXT:    [[A1:%.*]] = load double, double* [[IDXA1]], align 8
20; ENABLED-NEXT:    [[TMP0:%.*]] = bitcast double* [[IDXB0]] to <2 x double>*
21; ENABLED-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
22; ENABLED-NEXT:    [[C0:%.*]] = load double, double* [[IDXC0]], align 8
23; ENABLED-NEXT:    [[C1:%.*]] = load double, double* [[IDXC1]], align 8
24; ENABLED-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0
25; ENABLED-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[C1]], i32 1
26; ENABLED-NEXT:    [[TMP4:%.*]] = fadd fast <2 x double> [[TMP3]], [[TMP1]]
27; ENABLED-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
28; ENABLED-NEXT:    [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[A1]], i32 1
29; ENABLED-NEXT:    [[TMP7:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP6]]
30; ENABLED-NEXT:    [[TMP8:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
31; ENABLED-NEXT:    store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8
32; ENABLED-NEXT:    ret void
33;
34entry:
35  %idxA0 = getelementptr inbounds double, double* %Aarray, i64 0
36  %idxA1 = getelementptr inbounds double, double* %Aarray, i64 1
37  %idxB0 = getelementptr inbounds double, double* %Barray, i64 0
38  %idxB1 = getelementptr inbounds double, double* %Barray, i64 1
39  %idxC0 = getelementptr inbounds double, double* %Carray, i64 0
40  %idxC1 = getelementptr inbounds double, double* %Carray, i64 1
41  %idxS0 = getelementptr inbounds double, double* %Sarray, i64 0
42  %idxS1 = getelementptr inbounds double, double* %Sarray, i64 1
43
44  %A0 = load double, double *%idxA0, align 8
45  %A1 = load double, double *%idxA1, align 8
46
47  %B0 = load double, double *%idxB0, align 8
48  %B1 = load double, double *%idxB1, align 8
49
50  %C0 = load double, double *%idxC0, align 8
51  %C1 = load double, double *%idxC1, align 8
52
53  %addA0B0 = fadd fast double %A0, %B0
54  %addB1C1 = fadd fast double %B1, %C1
55  %add0 = fadd fast double %addA0B0, %C0
56  %add1 = fadd fast double %addB1C1, %A1
57  store double %add0, double *%idxS0, align 8
58  store double %add1, double *%idxS1, align 8
59  ret void
60}
61
62
63; Without supernode operand reordering, this does not get fully vectorized.
64; S[0] = (A[0] - B[0]) + C[0]
65; S[1] = (C[1] - B[1]) + A[1]
66define void @test_supernode_addsub(double* %Aarray, double* %Barray, double *%Carray, double *%Sarray) {
67; ENABLED-LABEL: @test_supernode_addsub(
68; ENABLED-NEXT:  entry:
69; ENABLED-NEXT:    [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0
70; ENABLED-NEXT:    [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1
71; ENABLED-NEXT:    [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0
72; ENABLED-NEXT:    [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1
73; ENABLED-NEXT:    [[IDXC0:%.*]] = getelementptr inbounds double, double* [[CARRAY:%.*]], i64 0
74; ENABLED-NEXT:    [[IDXC1:%.*]] = getelementptr inbounds double, double* [[CARRAY]], i64 1
75; ENABLED-NEXT:    [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0
76; ENABLED-NEXT:    [[IDXS1:%.*]] = getelementptr inbounds double, double* [[SARRAY]], i64 1
77; ENABLED-NEXT:    [[A0:%.*]] = load double, double* [[IDXA0]], align 8
78; ENABLED-NEXT:    [[A1:%.*]] = load double, double* [[IDXA1]], align 8
79; ENABLED-NEXT:    [[TMP0:%.*]] = bitcast double* [[IDXB0]] to <2 x double>*
80; ENABLED-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
81; ENABLED-NEXT:    [[C0:%.*]] = load double, double* [[IDXC0]], align 8
82; ENABLED-NEXT:    [[C1:%.*]] = load double, double* [[IDXC1]], align 8
83; ENABLED-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0
84; ENABLED-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[C1]], i32 1
85; ENABLED-NEXT:    [[TMP4:%.*]] = fsub fast <2 x double> [[TMP3]], [[TMP1]]
86; ENABLED-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> poison, double [[C0]], i32 0
87; ENABLED-NEXT:    [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[A1]], i32 1
88; ENABLED-NEXT:    [[TMP7:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP6]]
89; ENABLED-NEXT:    [[TMP8:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
90; ENABLED-NEXT:    store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8
91; ENABLED-NEXT:    ret void
92;
93entry:
94  %idxA0 = getelementptr inbounds double, double* %Aarray, i64 0
95  %idxA1 = getelementptr inbounds double, double* %Aarray, i64 1
96  %idxB0 = getelementptr inbounds double, double* %Barray, i64 0
97  %idxB1 = getelementptr inbounds double, double* %Barray, i64 1
98  %idxC0 = getelementptr inbounds double, double* %Carray, i64 0
99  %idxC1 = getelementptr inbounds double, double* %Carray, i64 1
100  %idxS0 = getelementptr inbounds double, double* %Sarray, i64 0
101  %idxS1 = getelementptr inbounds double, double* %Sarray, i64 1
102
103  %A0 = load double, double *%idxA0, align 8
104  %A1 = load double, double *%idxA1, align 8
105
106  %B0 = load double, double *%idxB0, align 8
107  %B1 = load double, double *%idxB1, align 8
108
109  %C0 = load double, double *%idxC0, align 8
110  %C1 = load double, double *%idxC1, align 8
111
112  %subA0B0 = fsub fast double %A0, %B0
113  %subC1B1 = fsub fast double %C1, %B1
114  %add0 = fadd fast double %subA0B0, %C0
115  %add1 = fadd fast double %subC1B1, %A1
116  store double %add0, double *%idxS0, align 8
117  store double %add1, double *%idxS1, align 8
118  ret void
119}
120
121; Without supernode operand reordering, this does not get fully vectorized.
122; This checks that the super-node works with alternate sequences.
123;
124; S[0] = (A[0] - B[0]) - C[0]
125; S[1] = (B[1] + C[1]) + A[1]
126define void @test_supernode_addsub_alt(double* %Aarray, double* %Barray, double *%Carray, double *%Sarray) {
127; ENABLED-LABEL: @test_supernode_addsub_alt(
128; ENABLED-NEXT:  entry:
129; ENABLED-NEXT:    [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0
130; ENABLED-NEXT:    [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1
131; ENABLED-NEXT:    [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0
132; ENABLED-NEXT:    [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1
133; ENABLED-NEXT:    [[IDXC0:%.*]] = getelementptr inbounds double, double* [[CARRAY:%.*]], i64 0
134; ENABLED-NEXT:    [[IDXC1:%.*]] = getelementptr inbounds double, double* [[CARRAY]], i64 1
135; ENABLED-NEXT:    [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0
136; ENABLED-NEXT:    [[IDXS1:%.*]] = getelementptr inbounds double, double* [[SARRAY]], i64 1
137; ENABLED-NEXT:    [[A0:%.*]] = load double, double* [[IDXA0]], align 8
138; ENABLED-NEXT:    [[A1:%.*]] = load double, double* [[IDXA1]], align 8
139; ENABLED-NEXT:    [[B0:%.*]] = load double, double* [[IDXB0]], align 8
140; ENABLED-NEXT:    [[B1:%.*]] = load double, double* [[IDXB1]], align 8
141; ENABLED-NEXT:    [[C0:%.*]] = load double, double* [[IDXC0]], align 8
142; ENABLED-NEXT:    [[C1:%.*]] = load double, double* [[IDXC1]], align 8
143; ENABLED-NEXT:    [[SUBA0B0:%.*]] = fsub fast double [[A0]], [[B0]]
144; ENABLED-NEXT:    [[ADDB1C1:%.*]] = fadd fast double [[B1]], [[C1]]
145; ENABLED-NEXT:    [[SUB0:%.*]] = fsub fast double [[SUBA0B0]], [[C0]]
146; ENABLED-NEXT:    [[ADD1:%.*]] = fadd fast double [[ADDB1C1]], [[A1]]
147; ENABLED-NEXT:    store double [[SUB0]], double* [[IDXS0]], align 8
148; ENABLED-NEXT:    store double [[ADD1]], double* [[IDXS1]], align 8
149; ENABLED-NEXT:    ret void
150;
151entry:
152  %idxA0 = getelementptr inbounds double, double* %Aarray, i64 0
153  %idxA1 = getelementptr inbounds double, double* %Aarray, i64 1
154  %idxB0 = getelementptr inbounds double, double* %Barray, i64 0
155  %idxB1 = getelementptr inbounds double, double* %Barray, i64 1
156  %idxC0 = getelementptr inbounds double, double* %Carray, i64 0
157  %idxC1 = getelementptr inbounds double, double* %Carray, i64 1
158  %idxS0 = getelementptr inbounds double, double* %Sarray, i64 0
159  %idxS1 = getelementptr inbounds double, double* %Sarray, i64 1
160
161  %A0 = load double, double *%idxA0, align 8
162  %A1 = load double, double *%idxA1, align 8
163
164  %B0 = load double, double *%idxB0, align 8
165  %B1 = load double, double *%idxB1, align 8
166
167  %C0 = load double, double *%idxC0, align 8
168  %C1 = load double, double *%idxC1, align 8
169
170  %subA0B0 = fsub fast double %A0, %B0
171  %addB1C1 = fadd fast double %B1, %C1
172  %sub0 = fsub fast double %subA0B0, %C0
173  %add1 = fadd fast double %addB1C1, %A1
174  store double %sub0, double *%idxS0, align 8
175  store double %add1, double *%idxS1, align 8
176  ret void
177}
178
179; This checks that vectorizeTree() works correctly with the supernode
180; and does not generate uses before defs.
181; If all of the operands of the supernode are vectorizable, then the scheduler
182; will fix their position in the program. If not, then the scheduler may not
183; touch them, leading to uses before defs.
184;
185; A0 = ...
186; C = ...
187; t1 = A0 + C
188; B0 = ...
189; t2 = t1 + B0
190; A1 = ...
191; B1 = ...
192; t3 = A1 + B1
193; D = ...
194; t4 = t3 + D
195;
196;
197;  A0  C   A1  B1              A0  C    A1  D            A0:1  C,D
198;   \ /      \ /    Reorder      \ /      \ /    Bundles     \ /
199; t1 + B0  t3 + D   ------->   t1 + B0  t3 + B1  ------> t1:3 + B0:1
200;    |/       |/                  |/       |/                 |/
201; t2 +     t4 +                t2 +     t4 +             t2:4 +
202;
203; After reordering, 'D' conceptually becomes an operand of t3:
204; t3 = A1 + D
205; But D is defined *after* its use.
206;
207define void @supernode_scheduling(double* %Aarray, double* %Barray, double *%Carray, double *%Darray, double *%Sarray) {
208; ENABLED-LABEL: @supernode_scheduling(
209; ENABLED-NEXT:  entry:
210; ENABLED-NEXT:    [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0
211; ENABLED-NEXT:    [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1
212; ENABLED-NEXT:    [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0
213; ENABLED-NEXT:    [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1
214; ENABLED-NEXT:    [[IDXC:%.*]] = getelementptr inbounds double, double* [[CARRAY:%.*]], i64 0
215; ENABLED-NEXT:    [[IDXD:%.*]] = getelementptr inbounds double, double* [[DARRAY:%.*]], i64 0
216; ENABLED-NEXT:    [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0
217; ENABLED-NEXT:    [[IDXS1:%.*]] = getelementptr inbounds double, double* [[SARRAY]], i64 1
218; ENABLED-NEXT:    [[C:%.*]] = load double, double* [[IDXC]], align 8
219; ENABLED-NEXT:    [[B0:%.*]] = load double, double* [[IDXB0]], align 8
220; ENABLED-NEXT:    [[TMP0:%.*]] = bitcast double* [[IDXA0]] to <2 x double>*
221; ENABLED-NEXT:    [[TMP1:%.*]] = load <2 x double>, <2 x double>* [[TMP0]], align 8
222; ENABLED-NEXT:    [[B1:%.*]] = load double, double* [[IDXB1]], align 8
223; ENABLED-NEXT:    [[TMP2:%.*]] = insertelement <2 x double> poison, double [[C]], i32 0
224; ENABLED-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> [[TMP2]], double [[B1]], i32 1
225; ENABLED-NEXT:    [[TMP4:%.*]] = fadd fast <2 x double> [[TMP1]], [[TMP3]]
226; ENABLED-NEXT:    [[D:%.*]] = load double, double* [[IDXD]], align 8
227; ENABLED-NEXT:    [[TMP5:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0
228; ENABLED-NEXT:    [[TMP6:%.*]] = insertelement <2 x double> [[TMP5]], double [[D]], i32 1
229; ENABLED-NEXT:    [[TMP7:%.*]] = fadd fast <2 x double> [[TMP4]], [[TMP6]]
230; ENABLED-NEXT:    [[TMP8:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
231; ENABLED-NEXT:    store <2 x double> [[TMP7]], <2 x double>* [[TMP8]], align 8
232; ENABLED-NEXT:    ret void
233;
234entry:
235  %idxA0 = getelementptr inbounds double, double* %Aarray, i64 0
236  %idxA1 = getelementptr inbounds double, double* %Aarray, i64 1
237  %idxB0 = getelementptr inbounds double, double* %Barray, i64 0
238  %idxB1 = getelementptr inbounds double, double* %Barray, i64 1
239  %idxC = getelementptr inbounds double, double* %Carray, i64 0
240  %idxD = getelementptr inbounds double, double* %Darray, i64 0
241  %idxS0 = getelementptr inbounds double, double* %Sarray, i64 0
242  %idxS1 = getelementptr inbounds double, double* %Sarray, i64 1
243
244
245  %A0 = load double, double *%idxA0, align 8
246  %C = load double, double *%idxC, align 8
247  %t1 = fadd fast double %A0, %C
248  %B0 = load double, double *%idxB0, align 8
249  %t2 = fadd fast double %t1, %B0
250  %A1 = load double, double *%idxA1, align 8
251  %B1 = load double, double *%idxB1, align 8
252  %t3 = fadd fast double %A1, %B1
253  %D = load double, double *%idxD, align 8
254  %t4 = fadd fast double %t3, %D
255
256  store double %t2, double *%idxS0, align 8
257  store double %t4, double *%idxS1, align 8
258  ret void
259}
260
261
262; The SLP scheduler has trouble moving instructions across blocks.
263; Even though we can build a SuperNode for this example, we should not because the scheduler
264; cannot handle the cross-block instruction motion that is required once the operands of the
265; SuperNode are reordered.
266;
267; bb1:
268;  A0 = ...
269;  B1 = ...
270;  Tmp0 = A0 + 2.0
271;  Tmp1 = B1 + 2.0
272;
273; bb2:
274;  A1 = ...
275;  B0 = ...
276;  S[0] = Tmp0 + B0
277;  S[1] = Tmp1 + A1
278define void @supernode_scheduling_cross_block(double* %Aarray, double* %Barray, double *%Sarray) {
279; ENABLED-LABEL: @supernode_scheduling_cross_block(
280; ENABLED-NEXT:  entry:
281; ENABLED-NEXT:    [[IDXA0:%.*]] = getelementptr inbounds double, double* [[AARRAY:%.*]], i64 0
282; ENABLED-NEXT:    [[IDXA1:%.*]] = getelementptr inbounds double, double* [[AARRAY]], i64 1
283; ENABLED-NEXT:    [[IDXB0:%.*]] = getelementptr inbounds double, double* [[BARRAY:%.*]], i64 0
284; ENABLED-NEXT:    [[IDXB1:%.*]] = getelementptr inbounds double, double* [[BARRAY]], i64 1
285; ENABLED-NEXT:    [[IDXS0:%.*]] = getelementptr inbounds double, double* [[SARRAY:%.*]], i64 0
286; ENABLED-NEXT:    [[IDXS1:%.*]] = getelementptr inbounds double, double* [[SARRAY]], i64 1
287; ENABLED-NEXT:    [[A0:%.*]] = load double, double* [[IDXA0]], align 8
288; ENABLED-NEXT:    [[B1:%.*]] = load double, double* [[IDXB1]], align 8
289; ENABLED-NEXT:    [[TMP0:%.*]] = insertelement <2 x double> poison, double [[A0]], i32 0
290; ENABLED-NEXT:    [[TMP1:%.*]] = insertelement <2 x double> [[TMP0]], double [[B1]], i32 1
291; ENABLED-NEXT:    [[TMP2:%.*]] = fadd fast <2 x double> [[TMP1]], <double 2.000000e+00, double 2.000000e+00>
292; ENABLED-NEXT:    br label [[BB:%.*]]
293; ENABLED:       bb:
294; ENABLED-NEXT:    [[A1:%.*]] = load double, double* [[IDXA1]], align 8
295; ENABLED-NEXT:    [[B0:%.*]] = load double, double* [[IDXB0]], align 8
296; ENABLED-NEXT:    [[TMP3:%.*]] = insertelement <2 x double> poison, double [[B0]], i32 0
297; ENABLED-NEXT:    [[TMP4:%.*]] = insertelement <2 x double> [[TMP3]], double [[A1]], i32 1
298; ENABLED-NEXT:    [[TMP5:%.*]] = fadd fast <2 x double> [[TMP2]], [[TMP4]]
299; ENABLED-NEXT:    [[TMP6:%.*]] = bitcast double* [[IDXS0]] to <2 x double>*
300; ENABLED-NEXT:    store <2 x double> [[TMP5]], <2 x double>* [[TMP6]], align 8
301; ENABLED-NEXT:    ret void
302;
303entry:
304  %idxA0 = getelementptr inbounds double, double* %Aarray, i64 0
305  %idxA1 = getelementptr inbounds double, double* %Aarray, i64 1
306  %idxB0 = getelementptr inbounds double, double* %Barray, i64 0
307  %idxB1 = getelementptr inbounds double, double* %Barray, i64 1
308  %idxS0 = getelementptr inbounds double, double* %Sarray, i64 0
309  %idxS1 = getelementptr inbounds double, double* %Sarray, i64 1
310
311  %A0 = load double, double *%idxA0, align 8
312  %B1 = load double, double *%idxB1, align 8
313  %Tmp0 = fadd fast double %A0, 2.0
314  %Tmp1 = fadd fast double %B1, 2.0
315br label %bb
316
317bb:
318  %A1 = load double, double *%idxA1, align 8
319  %B0 = load double, double *%idxB0, align 8
320
321  %Sum0 = fadd fast double %Tmp0, %B0
322  %Sum1 = fadd fast double %Tmp1, %A1
323
324  store double %Sum0, double *%idxS0, align 8
325  store double %Sum1, double *%idxS1, align 8
326  ret void
327}
328