1; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2; RUN: opt -O2 -expand-reductions -mattr=avx -S < %s | FileCheck %s
3
4; Test if SLP vector reduction patterns are recognized
5; and optionally converted to reduction intrinsics and
6; back to raw IR.
7
8target triple = "x86_64--"
9target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
10
11define i32 @add_v4i32(i32* %p) #0 {
12; CHECK-LABEL: @add_v4i32(
13; CHECK-NEXT:  entry:
14; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
15; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, !tbaa [[TBAA0:![0-9]+]]
16; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
17; CHECK-NEXT:    [[BIN_RDX:%.*]] = add <4 x i32> [[TMP1]], [[RDX_SHUF]]
18; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[BIN_RDX]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
19; CHECK-NEXT:    [[BIN_RDX4:%.*]] = add <4 x i32> [[BIN_RDX]], [[RDX_SHUF3]]
20; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[BIN_RDX4]], i32 0
21; CHECK-NEXT:    ret i32 [[TMP2]]
22;
23entry:
24  br label %for.cond
25
26for.cond:
27  %r.0 = phi i32 [ 0, %entry ], [ %add, %for.inc ]
28  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
29  %cmp = icmp slt i32 %i.0, 4
30  br i1 %cmp, label %for.body, label %for.cond.cleanup
31
32for.cond.cleanup:
33  br label %for.end
34
35for.body:
36  %idxprom = sext i32 %i.0 to i64
37  %arrayidx = getelementptr inbounds i32, i32* %p, i64 %idxprom
38  %0 = load i32, i32* %arrayidx, align 4, !tbaa !3
39  %add = add nsw i32 %r.0, %0
40  br label %for.inc
41
42for.inc:
43  %inc = add nsw i32 %i.0, 1
44  br label %for.cond
45
46for.end:
47  ret i32 %r.0
48}
49
50define signext i16 @mul_v8i16(i16* %p) #0 {
51; CHECK-LABEL: @mul_v8i16(
52; CHECK-NEXT:  entry:
53; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i16* [[P:%.*]] to <8 x i16>*
54; CHECK-NEXT:    [[TMP1:%.*]] = load <8 x i16>, <8 x i16>* [[TMP0]], align 2, !tbaa [[TBAA4:![0-9]+]]
55; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x i16> [[TMP1]], <8 x i16> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
56; CHECK-NEXT:    [[BIN_RDX:%.*]] = mul <8 x i16> [[TMP1]], [[RDX_SHUF]]
57; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <8 x i16> [[BIN_RDX]], <8 x i16> poison, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
58; CHECK-NEXT:    [[BIN_RDX4:%.*]] = mul <8 x i16> [[BIN_RDX]], [[RDX_SHUF3]]
59; CHECK-NEXT:    [[RDX_SHUF5:%.*]] = shufflevector <8 x i16> [[BIN_RDX4]], <8 x i16> poison, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
60; CHECK-NEXT:    [[BIN_RDX6:%.*]] = mul <8 x i16> [[BIN_RDX4]], [[RDX_SHUF5]]
61; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <8 x i16> [[BIN_RDX6]], i32 0
62; CHECK-NEXT:    ret i16 [[TMP2]]
63;
64entry:
65  br label %for.cond
66
67for.cond:
68  %r.0 = phi i16 [ 1, %entry ], [ %conv2, %for.inc ]
69  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
70  %cmp = icmp slt i32 %i.0, 8
71  br i1 %cmp, label %for.body, label %for.cond.cleanup
72
73for.cond.cleanup:
74  br label %for.end
75
76for.body:
77  %idxprom = sext i32 %i.0 to i64
78  %arrayidx = getelementptr inbounds i16, i16* %p, i64 %idxprom
79  %0 = load i16, i16* %arrayidx, align 2, !tbaa !7
80  %conv = sext i16 %0 to i32
81  %conv1 = sext i16 %r.0 to i32
82  %mul = mul nsw i32 %conv1, %conv
83  %conv2 = trunc i32 %mul to i16
84  br label %for.inc
85
86for.inc:
87  %inc = add nsw i32 %i.0, 1
88  br label %for.cond
89
90for.end:
91  ret i16 %r.0
92}
93
94define signext i8 @or_v16i8(i8* %p) #0 {
95; CHECK-LABEL: @or_v16i8(
96; CHECK-NEXT:  entry:
97; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i8* [[P:%.*]] to <16 x i8>*
98; CHECK-NEXT:    [[TMP1:%.*]] = load <16 x i8>, <16 x i8>* [[TMP0]], align 1, !tbaa [[TBAA6:![0-9]+]]
99; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <16 x i8> [[TMP1]], <16 x i8> poison, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
100; CHECK-NEXT:    [[BIN_RDX:%.*]] = or <16 x i8> [[TMP1]], [[RDX_SHUF]]
101; CHECK-NEXT:    [[RDX_SHUF4:%.*]] = shufflevector <16 x i8> [[BIN_RDX]], <16 x i8> poison, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
102; CHECK-NEXT:    [[BIN_RDX5:%.*]] = or <16 x i8> [[BIN_RDX]], [[RDX_SHUF4]]
103; CHECK-NEXT:    [[RDX_SHUF6:%.*]] = shufflevector <16 x i8> [[BIN_RDX5]], <16 x i8> poison, <16 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
104; CHECK-NEXT:    [[BIN_RDX7:%.*]] = or <16 x i8> [[BIN_RDX5]], [[RDX_SHUF6]]
105; CHECK-NEXT:    [[RDX_SHUF8:%.*]] = shufflevector <16 x i8> [[BIN_RDX7]], <16 x i8> poison, <16 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
106; CHECK-NEXT:    [[BIN_RDX9:%.*]] = or <16 x i8> [[BIN_RDX7]], [[RDX_SHUF8]]
107; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <16 x i8> [[BIN_RDX9]], i32 0
108; CHECK-NEXT:    ret i8 [[TMP2]]
109;
110entry:
111  br label %for.cond
112
113for.cond:
114  %r.0 = phi i8 [ 0, %entry ], [ %conv2, %for.inc ]
115  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
116  %cmp = icmp slt i32 %i.0, 16
117  br i1 %cmp, label %for.body, label %for.cond.cleanup
118
119for.cond.cleanup:
120  br label %for.end
121
122for.body:
123  %idxprom = sext i32 %i.0 to i64
124  %arrayidx = getelementptr inbounds i8, i8* %p, i64 %idxprom
125  %0 = load i8, i8* %arrayidx, align 1, !tbaa !9
126  %conv = sext i8 %0 to i32
127  %conv1 = sext i8 %r.0 to i32
128  %or = or i32 %conv1, %conv
129  %conv2 = trunc i32 %or to i8
130  br label %for.inc
131
132for.inc:
133  %inc = add nsw i32 %i.0, 1
134  br label %for.cond
135
136for.end:
137  ret i8 %r.0
138}
139
140define i32 @smin_v4i32(i32* %p) #0 {
141; CHECK-LABEL: @smin_v4i32(
142; CHECK-NEXT:  entry:
143; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
144; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, !tbaa [[TBAA0]]
145; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
146; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp slt <4 x i32> [[TMP1]], [[RDX_SHUF]]
147; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP1]], <4 x i32> [[RDX_SHUF]]
148; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
149; CHECK-NEXT:    [[RDX_MINMAX_CMP4:%.*]] = icmp slt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF3]]
150; CHECK-NEXT:    [[RDX_MINMAX_SELECT5:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP4]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF3]]
151; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT5]], i32 0
152; CHECK-NEXT:    ret i32 [[TMP2]]
153;
154entry:
155  br label %for.cond
156
157for.cond:
158  %r.0 = phi i32 [ 2147483647, %entry ], [ %cond, %for.inc ]
159  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
160  %cmp = icmp slt i32 %i.0, 4
161  br i1 %cmp, label %for.body, label %for.cond.cleanup
162
163for.cond.cleanup:
164  br label %for.end
165
166for.body:
167  %idxprom = sext i32 %i.0 to i64
168  %arrayidx = getelementptr inbounds i32, i32* %p, i64 %idxprom
169  %0 = load i32, i32* %arrayidx, align 4, !tbaa !3
170  %cmp1 = icmp slt i32 %0, %r.0
171  br i1 %cmp1, label %cond.true, label %cond.false
172
173cond.true:
174  %idxprom2 = sext i32 %i.0 to i64
175  %arrayidx3 = getelementptr inbounds i32, i32* %p, i64 %idxprom2
176  %1 = load i32, i32* %arrayidx3, align 4, !tbaa !3
177  br label %cond.end
178
179cond.false:
180  br label %cond.end
181
182cond.end:
183  %cond = phi i32 [ %1, %cond.true ], [ %r.0, %cond.false ]
184  br label %for.inc
185
186for.inc:
187  %inc = add nsw i32 %i.0, 1
188  br label %for.cond
189
190for.end:
191  ret i32 %r.0
192}
193
194define i32 @umax_v4i32(i32* %p) #0 {
195; CHECK-LABEL: @umax_v4i32(
196; CHECK-NEXT:  entry:
197; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[P:%.*]] to <4 x i32>*
198; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x i32>, <4 x i32>* [[TMP0]], align 4, !tbaa [[TBAA0]]
199; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x i32> [[TMP1]], <4 x i32> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
200; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = icmp ugt <4 x i32> [[TMP1]], [[RDX_SHUF]]
201; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP]], <4 x i32> [[TMP1]], <4 x i32> [[RDX_SHUF]]
202; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
203; CHECK-NEXT:    [[RDX_MINMAX_CMP4:%.*]] = icmp ugt <4 x i32> [[RDX_MINMAX_SELECT]], [[RDX_SHUF3]]
204; CHECK-NEXT:    [[RDX_MINMAX_SELECT5:%.*]] = select <4 x i1> [[RDX_MINMAX_CMP4]], <4 x i32> [[RDX_MINMAX_SELECT]], <4 x i32> [[RDX_SHUF3]]
205; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x i32> [[RDX_MINMAX_SELECT5]], i32 0
206; CHECK-NEXT:    ret i32 [[TMP2]]
207;
208entry:
209  br label %for.cond
210
211for.cond:
212  %r.0 = phi i32 [ 0, %entry ], [ %cond, %for.inc ]
213  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
214  %cmp = icmp slt i32 %i.0, 4
215  br i1 %cmp, label %for.body, label %for.cond.cleanup
216
217for.cond.cleanup:
218  br label %for.end
219
220for.body:
221  %idxprom = sext i32 %i.0 to i64
222  %arrayidx = getelementptr inbounds i32, i32* %p, i64 %idxprom
223  %0 = load i32, i32* %arrayidx, align 4, !tbaa !3
224  %cmp1 = icmp ugt i32 %0, %r.0
225  br i1 %cmp1, label %cond.true, label %cond.false
226
227cond.true:
228  %idxprom2 = sext i32 %i.0 to i64
229  %arrayidx3 = getelementptr inbounds i32, i32* %p, i64 %idxprom2
230  %1 = load i32, i32* %arrayidx3, align 4, !tbaa !3
231  br label %cond.end
232
233cond.false:
234  br label %cond.end
235
236cond.end:
237  %cond = phi i32 [ %1, %cond.true ], [ %r.0, %cond.false ]
238  br label %for.inc
239
240for.inc:
241  %inc = add nsw i32 %i.0, 1
242  br label %for.cond
243
244for.end:
245  ret i32 %r.0
246}
247
248define float @fadd_v4i32(float* %p) #0 {
249; CHECK-LABEL: @fadd_v4i32(
250; CHECK-NEXT:  entry:
251; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
252; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4, !tbaa [[TBAA7:![0-9]+]]
253; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
254; CHECK-NEXT:    [[BIN_RDX:%.*]] = fadd fast <4 x float> [[TMP1]], [[RDX_SHUF]]
255; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
256; CHECK-NEXT:    [[BIN_RDX4:%.*]] = fadd fast <4 x float> [[BIN_RDX]], [[RDX_SHUF3]]
257; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[BIN_RDX4]], i32 0
258; CHECK-NEXT:    [[BIN_RDX5:%.*]] = fadd fast float 4.200000e+01, [[TMP2]]
259; CHECK-NEXT:    ret float [[BIN_RDX5]]
260;
261entry:
262  br label %for.cond
263
264for.cond:
265  %r.0 = phi float [ 4.200000e+01, %entry ], [ %add, %for.inc ]
266  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
267  %cmp = icmp slt i32 %i.0, 4
268  br i1 %cmp, label %for.body, label %for.cond.cleanup
269
270for.cond.cleanup:
271  br label %for.end
272
273for.body:
274  %idxprom = sext i32 %i.0 to i64
275  %arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom
276  %0 = load float, float* %arrayidx, align 4, !tbaa !10
277  %add = fadd fast float %r.0, %0
278  br label %for.inc
279
280for.inc:
281  %inc = add nsw i32 %i.0, 1
282  br label %for.cond
283
284for.end:
285  ret float %r.0
286}
287
288define float @fmul_v4i32(float* %p) #0 {
289; CHECK-LABEL: @fmul_v4i32(
290; CHECK-NEXT:  entry:
291; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
292; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4, !tbaa [[TBAA7]]
293; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
294; CHECK-NEXT:    [[BIN_RDX:%.*]] = fmul fast <4 x float> [[TMP1]], [[RDX_SHUF]]
295; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[BIN_RDX]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
296; CHECK-NEXT:    [[BIN_RDX4:%.*]] = fmul fast <4 x float> [[BIN_RDX]], [[RDX_SHUF3]]
297; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[BIN_RDX4]], i32 0
298; CHECK-NEXT:    [[BIN_RDX5:%.*]] = fmul fast float 1.000000e+00, [[TMP2]]
299; CHECK-NEXT:    [[OP_EXTRA:%.*]] = fmul fast float [[BIN_RDX5]], 4.200000e+01
300; CHECK-NEXT:    ret float [[OP_EXTRA]]
301;
302entry:
303  br label %for.cond
304
305for.cond:
306  %r.0 = phi float [ 4.200000e+01, %entry ], [ %mul, %for.inc ]
307  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
308  %cmp = icmp slt i32 %i.0, 4
309  br i1 %cmp, label %for.body, label %for.cond.cleanup
310
311for.cond.cleanup:
312  br label %for.end
313
314for.body:
315  %idxprom = sext i32 %i.0 to i64
316  %arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom
317  %0 = load float, float* %arrayidx, align 4, !tbaa !10
318  %mul = fmul fast float %r.0, %0
319  br label %for.inc
320
321for.inc:
322  %inc = add nsw i32 %i.0, 1
323  br label %for.cond
324
325for.end:
326  ret float %r.0
327}
328
329define float @fmin_v4f32(float* %p) #0 {
330; CHECK-LABEL: @fmin_v4f32(
331; CHECK-NEXT:  entry:
332; CHECK-NEXT:    [[TMP0:%.*]] = bitcast float* [[P:%.*]] to <4 x float>*
333; CHECK-NEXT:    [[TMP1:%.*]] = load <4 x float>, <4 x float>* [[TMP0]], align 4, !tbaa [[TBAA7]]
334; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <4 x float> [[TMP1]], <4 x float> poison, <4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
335; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp fast olt <4 x float> [[TMP1]], [[RDX_SHUF]]
336; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select fast <4 x i1> [[RDX_MINMAX_CMP]], <4 x float> [[TMP1]], <4 x float> [[RDX_SHUF]]
337; CHECK-NEXT:    [[RDX_SHUF3:%.*]] = shufflevector <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> poison, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
338; CHECK-NEXT:    [[RDX_MINMAX_CMP4:%.*]] = fcmp fast olt <4 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF3]]
339; CHECK-NEXT:    [[RDX_MINMAX_SELECT5:%.*]] = select fast <4 x i1> [[RDX_MINMAX_CMP4]], <4 x float> [[RDX_MINMAX_SELECT]], <4 x float> [[RDX_SHUF3]]
340; CHECK-NEXT:    [[TMP2:%.*]] = extractelement <4 x float> [[RDX_MINMAX_SELECT5]], i32 0
341; CHECK-NEXT:    ret float [[TMP2]]
342;
343entry:
344  br label %for.cond
345
346for.cond:
347  %r.0 = phi float [  0x47EFFFFFE0000000, %entry ], [ %cond, %for.inc ]
348  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
349  %cmp = icmp slt i32 %i.0, 4
350  br i1 %cmp, label %for.body, label %for.cond.cleanup
351
352for.cond.cleanup:
353  br label %for.end
354
355for.body:
356  %idxprom = sext i32 %i.0 to i64
357  %arrayidx = getelementptr inbounds float, float* %p, i64 %idxprom
358  %0 = load float, float* %arrayidx, align 4, !tbaa !10
359  %cmp1 = fcmp fast olt float %0, %r.0
360  br i1 %cmp1, label %cond.true, label %cond.false
361
362cond.true:
363  %idxprom2 = sext i32 %i.0 to i64
364  %arrayidx3 = getelementptr inbounds float, float* %p, i64 %idxprom2
365  %1 = load float, float* %arrayidx3, align 4, !tbaa !10
366  br label %cond.end
367
368cond.false:
369  br label %cond.end
370
371cond.end:
372  %cond = phi fast float [ %1, %cond.true ], [ %r.0, %cond.false ]
373  br label %for.inc
374
375for.inc:
376  %inc = add nsw i32 %i.0, 1
377  br label %for.cond
378
379for.end:
380  ret float %r.0
381}
382
383define available_externally float @max(float %a, float %b) {
384entry:
385  %a.addr = alloca float, align 4
386  %b.addr = alloca float, align 4
387  store float %a, float* %a.addr, align 4
388  store float %b, float* %b.addr, align 4
389  %0 = load float, float* %a.addr, align 4
390  %1 = load float, float* %b.addr, align 4
391  %cmp = fcmp nnan ninf nsz ogt float %0, %1
392  br i1 %cmp, label %cond.true, label %cond.false
393
394cond.true:                                        ; preds = %entry
395  %2 = load float, float* %a.addr, align 4
396  br label %cond.end
397
398cond.false:                                       ; preds = %entry
399  %3 = load float, float* %b.addr, align 4
400  br label %cond.end
401
402cond.end:                                         ; preds = %cond.false, %cond.true
403  %cond = phi nnan ninf nsz float [ %2, %cond.true ], [ %3, %cond.false ]
404  ret float %cond
405}
406
407; PR23116
408
409define float @findMax(<8 x float>* byval(<8 x float>) align 16 %0) {
410; CHECK-LABEL: @findMax(
411; CHECK-NEXT:  entry:
412; CHECK-NEXT:    [[V:%.*]] = load <8 x float>, <8 x float>* [[TMP0:%.*]], align 16, !tbaa [[TBAA0]]
413; CHECK-NEXT:    [[RDX_SHUF:%.*]] = shufflevector <8 x float> [[V]], <8 x float> poison, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
414; CHECK-NEXT:    [[RDX_MINMAX_CMP:%.*]] = fcmp nnan ninf nsz ogt <8 x float> [[V]], [[RDX_SHUF]]
415; CHECK-NEXT:    [[RDX_MINMAX_SELECT:%.*]] = select nnan ninf nsz <8 x i1> [[RDX_MINMAX_CMP]], <8 x float> [[V]], <8 x float> [[RDX_SHUF]]
416; CHECK-NEXT:    [[RDX_SHUF8:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> poison, <8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
417; CHECK-NEXT:    [[RDX_MINMAX_CMP9:%.*]] = fcmp nnan ninf nsz ogt <8 x float> [[RDX_MINMAX_SELECT]], [[RDX_SHUF8]]
418; CHECK-NEXT:    [[RDX_MINMAX_SELECT10:%.*]] = select nnan ninf nsz <8 x i1> [[RDX_MINMAX_CMP9]], <8 x float> [[RDX_MINMAX_SELECT]], <8 x float> [[RDX_SHUF8]]
419; CHECK-NEXT:    [[RDX_SHUF11:%.*]] = shufflevector <8 x float> [[RDX_MINMAX_SELECT10]], <8 x float> poison, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
420; CHECK-NEXT:    [[RDX_MINMAX_CMP12:%.*]] = fcmp nnan ninf nsz ogt <8 x float> [[RDX_MINMAX_SELECT10]], [[RDX_SHUF11]]
421; CHECK-NEXT:    [[RDX_MINMAX_SELECT13:%.*]] = select nnan ninf nsz <8 x i1> [[RDX_MINMAX_CMP12]], <8 x float> [[RDX_MINMAX_SELECT10]], <8 x float> [[RDX_SHUF11]]
422; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <8 x float> [[RDX_MINMAX_SELECT13]], i32 0
423; CHECK-NEXT:    ret float [[TMP1]]
424;
425entry:
426  %v.addr = alloca <8 x float>, align 32
427  %v = load <8 x float>, <8 x float>* %0, align 16, !tbaa !3
428  store <8 x float> %v, <8 x float>* %v.addr, align 32, !tbaa !3
429  %1 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
430  %vecext = extractelement <8 x float> %1, i32 0
431  %2 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
432  %vecext1 = extractelement <8 x float> %2, i32 1
433  %call = call nnan ninf nsz float @max(float %vecext, float %vecext1)
434  %3 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
435  %vecext2 = extractelement <8 x float> %3, i32 2
436  %call3 = call nnan ninf nsz float @max(float %call, float %vecext2)
437  %4 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
438  %vecext4 = extractelement <8 x float> %4, i32 3
439  %call5 = call nnan ninf nsz float @max(float %call3, float %vecext4)
440  %5 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
441  %vecext6 = extractelement <8 x float> %5, i32 4
442  %call7 = call nnan ninf nsz float @max(float %call5, float %vecext6)
443  %6 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
444  %vecext8 = extractelement <8 x float> %6, i32 5
445  %call9 = call nnan ninf nsz float @max(float %call7, float %vecext8)
446  %7 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
447  %vecext10 = extractelement <8 x float> %7, i32 6
448  %call11 = call nnan ninf nsz float @max(float %call9, float %vecext10)
449  %8 = load <8 x float>, <8 x float>* %v.addr, align 32, !tbaa !3
450  %vecext12 = extractelement <8 x float> %8, i32 7
451  %call13 = call nnan ninf nsz float @max(float %call11, float %vecext12)
452  ret float %call13
453}
454
455attributes #0 = { nounwind ssp uwtable "correctly-rounded-divide-sqrt-fp-math"="false" "disable-tail-calls"="false" "frame-pointer"="all" "less-precise-fpmad"="false" "min-legal-vector-width"="0" "no-infs-fp-math"="true" "no-jump-tables"="false" "no-nans-fp-math"="true" "no-signed-zeros-fp-math"="true" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="penryn" "target-features"="+avx,+cx16,+cx8,+fxsr,+mmx,+popcnt,+sahf,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3,+x87,+xsave" "unsafe-fp-math"="true" "use-soft-float"="false" }
456
457!0 = !{i32 1, !"wchar_size", i32 4}
458!1 = !{i32 7, !"PIC Level", i32 2}
459!2 = !{!"clang version 11.0.0 (https://github.com/llvm/llvm-project.git a9fe69c359de653015c39e413e48630d069abe27)"}
460!3 = !{!4, !4, i64 0}
461!4 = !{!"int", !5, i64 0}
462!5 = !{!"omnipotent char", !6, i64 0}
463!6 = !{!"Simple C/C++ TBAA"}
464!7 = !{!8, !8, i64 0}
465!8 = !{!"short", !5, i64 0}
466!9 = !{!5, !5, i64 0}
467!10 = !{!11, !11, i64 0}
468!11 = !{!"float", !5, i64 0}
469