1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
3; RUN:   -mcpu=pwr9 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR9LE
4; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
5; RUN:   -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR9BE
6; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
7; RUN:   -mcpu=pwr10 -mattr=-paired-vector-memops -mtriple=powerpc64le < %s | \
8; RUN:   FileCheck %s --check-prefix=PWR10LE
9; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \
10; RUN:   -mcpu=pwr10 -mattr=-paired-vector-memops -mtriple=powerpc64 < %s | \
11; RUN:   FileCheck %s --check-prefix=PWR10BE
12
13;;
14;; Vectors of f32
15;;
16define dso_local float @v2f32(<2 x float> %a) local_unnamed_addr #0 {
17; PWR9LE-LABEL: v2f32:
18; PWR9LE:       # %bb.0: # %entry
19; PWR9LE-NEXT:    xxswapd vs0, v2
20; PWR9LE-NEXT:    xxsldwi vs1, v2, v2, 3
21; PWR9LE-NEXT:    xscvspdpn f0, vs0
22; PWR9LE-NEXT:    xscvspdpn f1, vs1
23; PWR9LE-NEXT:    xsmaxdp f1, f1, f0
24; PWR9LE-NEXT:    blr
25;
26; PWR9BE-LABEL: v2f32:
27; PWR9BE:       # %bb.0: # %entry
28; PWR9BE-NEXT:    xxsldwi vs1, v2, v2, 1
29; PWR9BE-NEXT:    xscvspdpn f0, v2
30; PWR9BE-NEXT:    xscvspdpn f1, vs1
31; PWR9BE-NEXT:    xsmaxdp f1, f0, f1
32; PWR9BE-NEXT:    blr
33;
34; PWR10LE-LABEL: v2f32:
35; PWR10LE:       # %bb.0: # %entry
36; PWR10LE-NEXT:    xxswapd vs0, v2
37; PWR10LE-NEXT:    xxsldwi vs1, v2, v2, 3
38; PWR10LE-NEXT:    xscvspdpn f0, vs0
39; PWR10LE-NEXT:    xscvspdpn f1, vs1
40; PWR10LE-NEXT:    xsmaxdp f1, f1, f0
41; PWR10LE-NEXT:    blr
42;
43; PWR10BE-LABEL: v2f32:
44; PWR10BE:       # %bb.0: # %entry
45; PWR10BE-NEXT:    xxsldwi vs1, v2, v2, 1
46; PWR10BE-NEXT:    xscvspdpn f0, v2
47; PWR10BE-NEXT:    xscvspdpn f1, vs1
48; PWR10BE-NEXT:    xsmaxdp f1, f0, f1
49; PWR10BE-NEXT:    blr
50entry:
51  %0 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %a)
52  ret float %0
53}
54
55define dso_local float @v2f32_fast(<2 x float> %a) local_unnamed_addr #0 {
56; PWR9LE-LABEL: v2f32_fast:
57; PWR9LE:       # %bb.0: # %entry
58; PWR9LE-NEXT:    xxspltw vs0, v2, 2
59; PWR9LE-NEXT:    xvmaxsp vs0, v2, vs0
60; PWR9LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
61; PWR9LE-NEXT:    xscvspdpn f1, vs0
62; PWR9LE-NEXT:    blr
63;
64; PWR9BE-LABEL: v2f32_fast:
65; PWR9BE:       # %bb.0: # %entry
66; PWR9BE-NEXT:    xxspltw vs0, v2, 1
67; PWR9BE-NEXT:    xvmaxsp vs0, v2, vs0
68; PWR9BE-NEXT:    xscvspdpn f1, vs0
69; PWR9BE-NEXT:    blr
70;
71; PWR10LE-LABEL: v2f32_fast:
72; PWR10LE:       # %bb.0: # %entry
73; PWR10LE-NEXT:    xxspltw vs0, v2, 2
74; PWR10LE-NEXT:    xvmaxsp vs0, v2, vs0
75; PWR10LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
76; PWR10LE-NEXT:    xscvspdpn f1, vs0
77; PWR10LE-NEXT:    blr
78;
79; PWR10BE-LABEL: v2f32_fast:
80; PWR10BE:       # %bb.0: # %entry
81; PWR10BE-NEXT:    xxspltw vs0, v2, 1
82; PWR10BE-NEXT:    xvmaxsp vs0, v2, vs0
83; PWR10BE-NEXT:    xscvspdpn f1, vs0
84; PWR10BE-NEXT:    blr
85entry:
86  %0 = call fast float @llvm.vector.reduce.fmax.v2f32(<2 x float> %a)
87  ret float %0
88}
89
90define dso_local float @v4f32(<4 x float> %a) local_unnamed_addr #0 {
91; PWR9LE-LABEL: v4f32:
92; PWR9LE:       # %bb.0: # %entry
93; PWR9LE-NEXT:    xxsldwi vs2, v2, v2, 3
94; PWR9LE-NEXT:    xxswapd vs3, v2
95; PWR9LE-NEXT:    xscvspdpn f0, v2
96; PWR9LE-NEXT:    xscvspdpn f2, vs2
97; PWR9LE-NEXT:    xxsldwi vs1, v2, v2, 1
98; PWR9LE-NEXT:    xscvspdpn f3, vs3
99; PWR9LE-NEXT:    xscvspdpn f1, vs1
100; PWR9LE-NEXT:    xsmaxdp f2, f2, f3
101; PWR9LE-NEXT:    xsmaxdp f1, f2, f1
102; PWR9LE-NEXT:    xsmaxdp f1, f1, f0
103; PWR9LE-NEXT:    blr
104;
105; PWR9BE-LABEL: v4f32:
106; PWR9BE:       # %bb.0: # %entry
107; PWR9BE-NEXT:    xxsldwi vs2, v2, v2, 1
108; PWR9BE-NEXT:    xxswapd vs1, v2
109; PWR9BE-NEXT:    xscvspdpn f3, v2
110; PWR9BE-NEXT:    xscvspdpn f2, vs2
111; PWR9BE-NEXT:    xxsldwi vs0, v2, v2, 3
112; PWR9BE-NEXT:    xscvspdpn f1, vs1
113; PWR9BE-NEXT:    xscvspdpn f0, vs0
114; PWR9BE-NEXT:    xsmaxdp f2, f3, f2
115; PWR9BE-NEXT:    xsmaxdp f1, f2, f1
116; PWR9BE-NEXT:    xsmaxdp f1, f1, f0
117; PWR9BE-NEXT:    blr
118;
119; PWR10LE-LABEL: v4f32:
120; PWR10LE:       # %bb.0: # %entry
121; PWR10LE-NEXT:    xxsldwi vs2, v2, v2, 3
122; PWR10LE-NEXT:    xxswapd vs3, v2
123; PWR10LE-NEXT:    xxsldwi vs1, v2, v2, 1
124; PWR10LE-NEXT:    xscvspdpn f0, v2
125; PWR10LE-NEXT:    xscvspdpn f2, vs2
126; PWR10LE-NEXT:    xscvspdpn f3, vs3
127; PWR10LE-NEXT:    xscvspdpn f1, vs1
128; PWR10LE-NEXT:    xsmaxdp f2, f2, f3
129; PWR10LE-NEXT:    xsmaxdp f1, f2, f1
130; PWR10LE-NEXT:    xsmaxdp f1, f1, f0
131; PWR10LE-NEXT:    blr
132;
133; PWR10BE-LABEL: v4f32:
134; PWR10BE:       # %bb.0: # %entry
135; PWR10BE-NEXT:    xxsldwi vs2, v2, v2, 1
136; PWR10BE-NEXT:    xxswapd vs1, v2
137; PWR10BE-NEXT:    xscvspdpn f3, v2
138; PWR10BE-NEXT:    xxsldwi vs0, v2, v2, 3
139; PWR10BE-NEXT:    xscvspdpn f2, vs2
140; PWR10BE-NEXT:    xscvspdpn f1, vs1
141; PWR10BE-NEXT:    xscvspdpn f0, vs0
142; PWR10BE-NEXT:    xsmaxdp f2, f3, f2
143; PWR10BE-NEXT:    xsmaxdp f1, f2, f1
144; PWR10BE-NEXT:    xsmaxdp f1, f1, f0
145; PWR10BE-NEXT:    blr
146entry:
147  %0 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a)
148  ret float %0
149}
150
151define dso_local float @v4f32_fast(<4 x float> %a) local_unnamed_addr #0 {
152; PWR9LE-LABEL: v4f32_fast:
153; PWR9LE:       # %bb.0: # %entry
154; PWR9LE-NEXT:    xxswapd v3, v2
155; PWR9LE-NEXT:    xvmaxsp vs0, v2, v3
156; PWR9LE-NEXT:    xxspltw vs1, vs0, 2
157; PWR9LE-NEXT:    xvmaxsp vs0, vs0, vs1
158; PWR9LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
159; PWR9LE-NEXT:    xscvspdpn f1, vs0
160; PWR9LE-NEXT:    blr
161;
162; PWR9BE-LABEL: v4f32_fast:
163; PWR9BE:       # %bb.0: # %entry
164; PWR9BE-NEXT:    xxswapd v3, v2
165; PWR9BE-NEXT:    xvmaxsp vs0, v2, v3
166; PWR9BE-NEXT:    xxspltw vs1, vs0, 1
167; PWR9BE-NEXT:    xvmaxsp vs0, vs0, vs1
168; PWR9BE-NEXT:    xscvspdpn f1, vs0
169; PWR9BE-NEXT:    blr
170;
171; PWR10LE-LABEL: v4f32_fast:
172; PWR10LE:       # %bb.0: # %entry
173; PWR10LE-NEXT:    xxswapd v3, v2
174; PWR10LE-NEXT:    xvmaxsp vs0, v2, v3
175; PWR10LE-NEXT:    xxspltw vs1, vs0, 2
176; PWR10LE-NEXT:    xvmaxsp vs0, vs0, vs1
177; PWR10LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
178; PWR10LE-NEXT:    xscvspdpn f1, vs0
179; PWR10LE-NEXT:    blr
180;
181; PWR10BE-LABEL: v4f32_fast:
182; PWR10BE:       # %bb.0: # %entry
183; PWR10BE-NEXT:    xxswapd v3, v2
184; PWR10BE-NEXT:    xvmaxsp vs0, v2, v3
185; PWR10BE-NEXT:    xxspltw vs1, vs0, 1
186; PWR10BE-NEXT:    xvmaxsp vs0, vs0, vs1
187; PWR10BE-NEXT:    xscvspdpn f1, vs0
188; PWR10BE-NEXT:    blr
189entry:
190  %0 = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a)
191  ret float %0
192}
193
194define dso_local float @v8f32(<8 x float> %a) local_unnamed_addr #0 {
195; PWR9LE-LABEL: v8f32:
196; PWR9LE:       # %bb.0: # %entry
197; PWR9LE-NEXT:    xvmaxsp vs0, v2, v3
198; PWR9LE-NEXT:    xxswapd vs1, vs0
199; PWR9LE-NEXT:    xxsldwi vs2, vs0, vs0, 3
200; PWR9LE-NEXT:    xscvspdpn f1, vs1
201; PWR9LE-NEXT:    xscvspdpn f2, vs2
202; PWR9LE-NEXT:    xsmaxdp f1, f2, f1
203; PWR9LE-NEXT:    xxsldwi vs2, vs0, vs0, 1
204; PWR9LE-NEXT:    xscvspdpn f0, vs0
205; PWR9LE-NEXT:    xscvspdpn f2, vs2
206; PWR9LE-NEXT:    xsmaxdp f1, f1, f2
207; PWR9LE-NEXT:    xsmaxdp f1, f1, f0
208; PWR9LE-NEXT:    blr
209;
210; PWR9BE-LABEL: v8f32:
211; PWR9BE:       # %bb.0: # %entry
212; PWR9BE-NEXT:    xvmaxsp vs0, v2, v3
213; PWR9BE-NEXT:    xxsldwi vs2, vs0, vs0, 1
214; PWR9BE-NEXT:    xscvspdpn f1, vs0
215; PWR9BE-NEXT:    xscvspdpn f2, vs2
216; PWR9BE-NEXT:    xsmaxdp f1, f1, f2
217; PWR9BE-NEXT:    xxswapd vs2, vs0
218; PWR9BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
219; PWR9BE-NEXT:    xscvspdpn f2, vs2
220; PWR9BE-NEXT:    xscvspdpn f0, vs0
221; PWR9BE-NEXT:    xsmaxdp f1, f1, f2
222; PWR9BE-NEXT:    xsmaxdp f1, f1, f0
223; PWR9BE-NEXT:    blr
224;
225; PWR10LE-LABEL: v8f32:
226; PWR10LE:       # %bb.0: # %entry
227; PWR10LE-NEXT:    xvmaxsp vs0, v2, v3
228; PWR10LE-NEXT:    xxswapd vs1, vs0
229; PWR10LE-NEXT:    xxsldwi vs2, vs0, vs0, 3
230; PWR10LE-NEXT:    xscvspdpn f1, vs1
231; PWR10LE-NEXT:    xscvspdpn f2, vs2
232; PWR10LE-NEXT:    xsmaxdp f1, f2, f1
233; PWR10LE-NEXT:    xxsldwi vs2, vs0, vs0, 1
234; PWR10LE-NEXT:    xscvspdpn f0, vs0
235; PWR10LE-NEXT:    xscvspdpn f2, vs2
236; PWR10LE-NEXT:    xsmaxdp f1, f1, f2
237; PWR10LE-NEXT:    xsmaxdp f1, f1, f0
238; PWR10LE-NEXT:    blr
239;
240; PWR10BE-LABEL: v8f32:
241; PWR10BE:       # %bb.0: # %entry
242; PWR10BE-NEXT:    xvmaxsp vs0, v2, v3
243; PWR10BE-NEXT:    xxsldwi vs2, vs0, vs0, 1
244; PWR10BE-NEXT:    xscvspdpn f1, vs0
245; PWR10BE-NEXT:    xscvspdpn f2, vs2
246; PWR10BE-NEXT:    xsmaxdp f1, f1, f2
247; PWR10BE-NEXT:    xxswapd vs2, vs0
248; PWR10BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
249; PWR10BE-NEXT:    xscvspdpn f2, vs2
250; PWR10BE-NEXT:    xscvspdpn f0, vs0
251; PWR10BE-NEXT:    xsmaxdp f1, f1, f2
252; PWR10BE-NEXT:    xsmaxdp f1, f1, f0
253; PWR10BE-NEXT:    blr
254entry:
255  %0 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %a)
256  ret float %0
257}
258
259define dso_local float @v8f32_fast(<8 x float> %a) local_unnamed_addr #0 {
260; PWR9LE-LABEL: v8f32_fast:
261; PWR9LE:       # %bb.0: # %entry
262; PWR9LE-NEXT:    xvmaxsp vs0, v2, v3
263; PWR9LE-NEXT:    xxswapd v2, vs0
264; PWR9LE-NEXT:    xvmaxsp vs0, vs0, v2
265; PWR9LE-NEXT:    xxspltw vs1, vs0, 2
266; PWR9LE-NEXT:    xvmaxsp vs0, vs0, vs1
267; PWR9LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
268; PWR9LE-NEXT:    xscvspdpn f1, vs0
269; PWR9LE-NEXT:    blr
270;
271; PWR9BE-LABEL: v8f32_fast:
272; PWR9BE:       # %bb.0: # %entry
273; PWR9BE-NEXT:    xvmaxsp vs0, v2, v3
274; PWR9BE-NEXT:    xxswapd v2, vs0
275; PWR9BE-NEXT:    xvmaxsp vs0, vs0, v2
276; PWR9BE-NEXT:    xxspltw vs1, vs0, 1
277; PWR9BE-NEXT:    xvmaxsp vs0, vs0, vs1
278; PWR9BE-NEXT:    xscvspdpn f1, vs0
279; PWR9BE-NEXT:    blr
280;
281; PWR10LE-LABEL: v8f32_fast:
282; PWR10LE:       # %bb.0: # %entry
283; PWR10LE-NEXT:    xvmaxsp vs0, v2, v3
284; PWR10LE-NEXT:    xxswapd v2, vs0
285; PWR10LE-NEXT:    xvmaxsp vs0, vs0, v2
286; PWR10LE-NEXT:    xxspltw vs1, vs0, 2
287; PWR10LE-NEXT:    xvmaxsp vs0, vs0, vs1
288; PWR10LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
289; PWR10LE-NEXT:    xscvspdpn f1, vs0
290; PWR10LE-NEXT:    blr
291;
292; PWR10BE-LABEL: v8f32_fast:
293; PWR10BE:       # %bb.0: # %entry
294; PWR10BE-NEXT:    xvmaxsp vs0, v2, v3
295; PWR10BE-NEXT:    xxswapd v2, vs0
296; PWR10BE-NEXT:    xvmaxsp vs0, vs0, v2
297; PWR10BE-NEXT:    xxspltw vs1, vs0, 1
298; PWR10BE-NEXT:    xvmaxsp vs0, vs0, vs1
299; PWR10BE-NEXT:    xscvspdpn f1, vs0
300; PWR10BE-NEXT:    blr
301entry:
302  %0 = call fast float @llvm.vector.reduce.fmax.v8f32(<8 x float> %a)
303  ret float %0
304}
305
306define dso_local float @v16f32(<16 x float> %a) local_unnamed_addr #0 {
307; PWR9LE-LABEL: v16f32:
308; PWR9LE:       # %bb.0: # %entry
309; PWR9LE-NEXT:    xvmaxsp vs0, v3, v5
310; PWR9LE-NEXT:    xvmaxsp vs1, v2, v4
311; PWR9LE-NEXT:    xvmaxsp vs0, vs1, vs0
312; PWR9LE-NEXT:    xxswapd vs1, vs0
313; PWR9LE-NEXT:    xxsldwi vs2, vs0, vs0, 3
314; PWR9LE-NEXT:    xscvspdpn f1, vs1
315; PWR9LE-NEXT:    xscvspdpn f2, vs2
316; PWR9LE-NEXT:    xsmaxdp f1, f2, f1
317; PWR9LE-NEXT:    xxsldwi vs2, vs0, vs0, 1
318; PWR9LE-NEXT:    xscvspdpn f0, vs0
319; PWR9LE-NEXT:    xscvspdpn f2, vs2
320; PWR9LE-NEXT:    xsmaxdp f1, f1, f2
321; PWR9LE-NEXT:    xsmaxdp f1, f1, f0
322; PWR9LE-NEXT:    blr
323;
324; PWR9BE-LABEL: v16f32:
325; PWR9BE:       # %bb.0: # %entry
326; PWR9BE-NEXT:    xvmaxsp vs0, v3, v5
327; PWR9BE-NEXT:    xvmaxsp vs1, v2, v4
328; PWR9BE-NEXT:    xvmaxsp vs0, vs1, vs0
329; PWR9BE-NEXT:    xxsldwi vs2, vs0, vs0, 1
330; PWR9BE-NEXT:    xscvspdpn f1, vs0
331; PWR9BE-NEXT:    xscvspdpn f2, vs2
332; PWR9BE-NEXT:    xsmaxdp f1, f1, f2
333; PWR9BE-NEXT:    xxswapd vs2, vs0
334; PWR9BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
335; PWR9BE-NEXT:    xscvspdpn f2, vs2
336; PWR9BE-NEXT:    xscvspdpn f0, vs0
337; PWR9BE-NEXT:    xsmaxdp f1, f1, f2
338; PWR9BE-NEXT:    xsmaxdp f1, f1, f0
339; PWR9BE-NEXT:    blr
340;
341; PWR10LE-LABEL: v16f32:
342; PWR10LE:       # %bb.0: # %entry
343; PWR10LE-NEXT:    xvmaxsp vs0, v3, v5
344; PWR10LE-NEXT:    xvmaxsp vs1, v2, v4
345; PWR10LE-NEXT:    xvmaxsp vs0, vs1, vs0
346; PWR10LE-NEXT:    xxswapd vs1, vs0
347; PWR10LE-NEXT:    xxsldwi vs2, vs0, vs0, 3
348; PWR10LE-NEXT:    xscvspdpn f1, vs1
349; PWR10LE-NEXT:    xscvspdpn f2, vs2
350; PWR10LE-NEXT:    xsmaxdp f1, f2, f1
351; PWR10LE-NEXT:    xxsldwi vs2, vs0, vs0, 1
352; PWR10LE-NEXT:    xscvspdpn f0, vs0
353; PWR10LE-NEXT:    xscvspdpn f2, vs2
354; PWR10LE-NEXT:    xsmaxdp f1, f1, f2
355; PWR10LE-NEXT:    xsmaxdp f1, f1, f0
356; PWR10LE-NEXT:    blr
357;
358; PWR10BE-LABEL: v16f32:
359; PWR10BE:       # %bb.0: # %entry
360; PWR10BE-NEXT:    xvmaxsp vs0, v3, v5
361; PWR10BE-NEXT:    xvmaxsp vs1, v2, v4
362; PWR10BE-NEXT:    xvmaxsp vs0, vs1, vs0
363; PWR10BE-NEXT:    xxsldwi vs2, vs0, vs0, 1
364; PWR10BE-NEXT:    xscvspdpn f1, vs0
365; PWR10BE-NEXT:    xscvspdpn f2, vs2
366; PWR10BE-NEXT:    xsmaxdp f1, f1, f2
367; PWR10BE-NEXT:    xxswapd vs2, vs0
368; PWR10BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
369; PWR10BE-NEXT:    xscvspdpn f2, vs2
370; PWR10BE-NEXT:    xscvspdpn f0, vs0
371; PWR10BE-NEXT:    xsmaxdp f1, f1, f2
372; PWR10BE-NEXT:    xsmaxdp f1, f1, f0
373; PWR10BE-NEXT:    blr
374entry:
375  %0 = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a)
376  ret float %0
377}
378
379define dso_local float @v16f32_fast(<16 x float> %a) local_unnamed_addr #0 {
380; PWR9LE-LABEL: v16f32_fast:
381; PWR9LE:       # %bb.0: # %entry
382; PWR9LE-NEXT:    xvmaxsp vs0, v3, v5
383; PWR9LE-NEXT:    xvmaxsp vs1, v2, v4
384; PWR9LE-NEXT:    xvmaxsp vs0, vs1, vs0
385; PWR9LE-NEXT:    xxswapd v2, vs0
386; PWR9LE-NEXT:    xvmaxsp vs0, vs0, v2
387; PWR9LE-NEXT:    xxspltw vs1, vs0, 2
388; PWR9LE-NEXT:    xvmaxsp vs0, vs0, vs1
389; PWR9LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
390; PWR9LE-NEXT:    xscvspdpn f1, vs0
391; PWR9LE-NEXT:    blr
392;
393; PWR9BE-LABEL: v16f32_fast:
394; PWR9BE:       # %bb.0: # %entry
395; PWR9BE-NEXT:    xvmaxsp vs0, v3, v5
396; PWR9BE-NEXT:    xvmaxsp vs1, v2, v4
397; PWR9BE-NEXT:    xvmaxsp vs0, vs1, vs0
398; PWR9BE-NEXT:    xxswapd v2, vs0
399; PWR9BE-NEXT:    xvmaxsp vs0, vs0, v2
400; PWR9BE-NEXT:    xxspltw vs1, vs0, 1
401; PWR9BE-NEXT:    xvmaxsp vs0, vs0, vs1
402; PWR9BE-NEXT:    xscvspdpn f1, vs0
403; PWR9BE-NEXT:    blr
404;
405; PWR10LE-LABEL: v16f32_fast:
406; PWR10LE:       # %bb.0: # %entry
407; PWR10LE-NEXT:    xvmaxsp vs0, v3, v5
408; PWR10LE-NEXT:    xvmaxsp vs1, v2, v4
409; PWR10LE-NEXT:    xvmaxsp vs0, vs1, vs0
410; PWR10LE-NEXT:    xxswapd v2, vs0
411; PWR10LE-NEXT:    xvmaxsp vs0, vs0, v2
412; PWR10LE-NEXT:    xxspltw vs1, vs0, 2
413; PWR10LE-NEXT:    xvmaxsp vs0, vs0, vs1
414; PWR10LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
415; PWR10LE-NEXT:    xscvspdpn f1, vs0
416; PWR10LE-NEXT:    blr
417;
418; PWR10BE-LABEL: v16f32_fast:
419; PWR10BE:       # %bb.0: # %entry
420; PWR10BE-NEXT:    xvmaxsp vs0, v3, v5
421; PWR10BE-NEXT:    xvmaxsp vs1, v2, v4
422; PWR10BE-NEXT:    xvmaxsp vs0, vs1, vs0
423; PWR10BE-NEXT:    xxswapd v2, vs0
424; PWR10BE-NEXT:    xvmaxsp vs0, vs0, v2
425; PWR10BE-NEXT:    xxspltw vs1, vs0, 1
426; PWR10BE-NEXT:    xvmaxsp vs0, vs0, vs1
427; PWR10BE-NEXT:    xscvspdpn f1, vs0
428; PWR10BE-NEXT:    blr
429entry:
430  %0 = call fast float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a)
431  ret float %0
432}
433
434define dso_local float @v32f32(<32 x float> %a) local_unnamed_addr #0 {
435; PWR9LE-LABEL: v32f32:
436; PWR9LE:       # %bb.0: # %entry
437; PWR9LE-NEXT:    xvmaxsp vs0, v5, v9
438; PWR9LE-NEXT:    xvmaxsp vs1, v3, v7
439; PWR9LE-NEXT:    xvmaxsp vs2, v2, v6
440; PWR9LE-NEXT:    xvmaxsp vs0, vs1, vs0
441; PWR9LE-NEXT:    xvmaxsp vs1, v4, v8
442; PWR9LE-NEXT:    xvmaxsp vs1, vs2, vs1
443; PWR9LE-NEXT:    xvmaxsp vs0, vs1, vs0
444; PWR9LE-NEXT:    xxswapd vs1, vs0
445; PWR9LE-NEXT:    xxsldwi vs2, vs0, vs0, 3
446; PWR9LE-NEXT:    xscvspdpn f1, vs1
447; PWR9LE-NEXT:    xscvspdpn f2, vs2
448; PWR9LE-NEXT:    xsmaxdp f1, f2, f1
449; PWR9LE-NEXT:    xxsldwi vs2, vs0, vs0, 1
450; PWR9LE-NEXT:    xscvspdpn f0, vs0
451; PWR9LE-NEXT:    xscvspdpn f2, vs2
452; PWR9LE-NEXT:    xsmaxdp f1, f1, f2
453; PWR9LE-NEXT:    xsmaxdp f1, f1, f0
454; PWR9LE-NEXT:    blr
455;
456; PWR9BE-LABEL: v32f32:
457; PWR9BE:       # %bb.0: # %entry
458; PWR9BE-NEXT:    xvmaxsp vs0, v5, v9
459; PWR9BE-NEXT:    xvmaxsp vs1, v3, v7
460; PWR9BE-NEXT:    xvmaxsp vs2, v2, v6
461; PWR9BE-NEXT:    xvmaxsp vs0, vs1, vs0
462; PWR9BE-NEXT:    xvmaxsp vs1, v4, v8
463; PWR9BE-NEXT:    xvmaxsp vs1, vs2, vs1
464; PWR9BE-NEXT:    xvmaxsp vs0, vs1, vs0
465; PWR9BE-NEXT:    xxsldwi vs2, vs0, vs0, 1
466; PWR9BE-NEXT:    xscvspdpn f1, vs0
467; PWR9BE-NEXT:    xscvspdpn f2, vs2
468; PWR9BE-NEXT:    xsmaxdp f1, f1, f2
469; PWR9BE-NEXT:    xxswapd vs2, vs0
470; PWR9BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
471; PWR9BE-NEXT:    xscvspdpn f2, vs2
472; PWR9BE-NEXT:    xscvspdpn f0, vs0
473; PWR9BE-NEXT:    xsmaxdp f1, f1, f2
474; PWR9BE-NEXT:    xsmaxdp f1, f1, f0
475; PWR9BE-NEXT:    blr
476;
477; PWR10LE-LABEL: v32f32:
478; PWR10LE:       # %bb.0: # %entry
479; PWR10LE-NEXT:    xvmaxsp vs0, v5, v9
480; PWR10LE-NEXT:    xvmaxsp vs1, v3, v7
481; PWR10LE-NEXT:    xvmaxsp vs2, v2, v6
482; PWR10LE-NEXT:    xvmaxsp vs0, vs1, vs0
483; PWR10LE-NEXT:    xvmaxsp vs1, v4, v8
484; PWR10LE-NEXT:    xvmaxsp vs1, vs2, vs1
485; PWR10LE-NEXT:    xvmaxsp vs0, vs1, vs0
486; PWR10LE-NEXT:    xxswapd vs1, vs0
487; PWR10LE-NEXT:    xxsldwi vs2, vs0, vs0, 3
488; PWR10LE-NEXT:    xscvspdpn f1, vs1
489; PWR10LE-NEXT:    xscvspdpn f2, vs2
490; PWR10LE-NEXT:    xsmaxdp f1, f2, f1
491; PWR10LE-NEXT:    xxsldwi vs2, vs0, vs0, 1
492; PWR10LE-NEXT:    xscvspdpn f0, vs0
493; PWR10LE-NEXT:    xscvspdpn f2, vs2
494; PWR10LE-NEXT:    xsmaxdp f1, f1, f2
495; PWR10LE-NEXT:    xsmaxdp f1, f1, f0
496; PWR10LE-NEXT:    blr
497;
498; PWR10BE-LABEL: v32f32:
499; PWR10BE:       # %bb.0: # %entry
500; PWR10BE-NEXT:    xvmaxsp vs0, v5, v9
501; PWR10BE-NEXT:    xvmaxsp vs1, v3, v7
502; PWR10BE-NEXT:    xvmaxsp vs2, v2, v6
503; PWR10BE-NEXT:    xvmaxsp vs0, vs1, vs0
504; PWR10BE-NEXT:    xvmaxsp vs1, v4, v8
505; PWR10BE-NEXT:    xvmaxsp vs1, vs2, vs1
506; PWR10BE-NEXT:    xvmaxsp vs0, vs1, vs0
507; PWR10BE-NEXT:    xxsldwi vs2, vs0, vs0, 1
508; PWR10BE-NEXT:    xscvspdpn f1, vs0
509; PWR10BE-NEXT:    xscvspdpn f2, vs2
510; PWR10BE-NEXT:    xsmaxdp f1, f1, f2
511; PWR10BE-NEXT:    xxswapd vs2, vs0
512; PWR10BE-NEXT:    xxsldwi vs0, vs0, vs0, 3
513; PWR10BE-NEXT:    xscvspdpn f2, vs2
514; PWR10BE-NEXT:    xscvspdpn f0, vs0
515; PWR10BE-NEXT:    xsmaxdp f1, f1, f2
516; PWR10BE-NEXT:    xsmaxdp f1, f1, f0
517; PWR10BE-NEXT:    blr
518entry:
519  %0 = call float @llvm.vector.reduce.fmax.v32f32(<32 x float> %a)
520  ret float %0
521}
522
523define dso_local float @v32f32_fast(<32 x float> %a) local_unnamed_addr #0 {
524; PWR9LE-LABEL: v32f32_fast:
525; PWR9LE:       # %bb.0: # %entry
526; PWR9LE-NEXT:    xvmaxsp vs0, v4, v8
527; PWR9LE-NEXT:    xvmaxsp vs1, v2, v6
528; PWR9LE-NEXT:    xvmaxsp vs2, v5, v9
529; PWR9LE-NEXT:    xvmaxsp vs3, v3, v7
530; PWR9LE-NEXT:    xvmaxsp vs2, vs3, vs2
531; PWR9LE-NEXT:    xvmaxsp vs0, vs1, vs0
532; PWR9LE-NEXT:    xvmaxsp vs0, vs0, vs2
533; PWR9LE-NEXT:    xxswapd v2, vs0
534; PWR9LE-NEXT:    xvmaxsp vs0, vs0, v2
535; PWR9LE-NEXT:    xxspltw vs1, vs0, 2
536; PWR9LE-NEXT:    xvmaxsp vs0, vs0, vs1
537; PWR9LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
538; PWR9LE-NEXT:    xscvspdpn f1, vs0
539; PWR9LE-NEXT:    blr
540;
541; PWR9BE-LABEL: v32f32_fast:
542; PWR9BE:       # %bb.0: # %entry
543; PWR9BE-NEXT:    xvmaxsp vs0, v4, v8
544; PWR9BE-NEXT:    xvmaxsp vs1, v2, v6
545; PWR9BE-NEXT:    xvmaxsp vs2, v5, v9
546; PWR9BE-NEXT:    xvmaxsp vs3, v3, v7
547; PWR9BE-NEXT:    xvmaxsp vs2, vs3, vs2
548; PWR9BE-NEXT:    xvmaxsp vs0, vs1, vs0
549; PWR9BE-NEXT:    xvmaxsp vs0, vs0, vs2
550; PWR9BE-NEXT:    xxswapd v2, vs0
551; PWR9BE-NEXT:    xvmaxsp vs0, vs0, v2
552; PWR9BE-NEXT:    xxspltw vs1, vs0, 1
553; PWR9BE-NEXT:    xvmaxsp vs0, vs0, vs1
554; PWR9BE-NEXT:    xscvspdpn f1, vs0
555; PWR9BE-NEXT:    blr
556;
557; PWR10LE-LABEL: v32f32_fast:
558; PWR10LE:       # %bb.0: # %entry
559; PWR10LE-NEXT:    xvmaxsp vs0, v4, v8
560; PWR10LE-NEXT:    xvmaxsp vs1, v2, v6
561; PWR10LE-NEXT:    xvmaxsp vs2, v5, v9
562; PWR10LE-NEXT:    xvmaxsp vs3, v3, v7
563; PWR10LE-NEXT:    xvmaxsp vs2, vs3, vs2
564; PWR10LE-NEXT:    xvmaxsp vs0, vs1, vs0
565; PWR10LE-NEXT:    xvmaxsp vs0, vs0, vs2
566; PWR10LE-NEXT:    xxswapd v2, vs0
567; PWR10LE-NEXT:    xvmaxsp vs0, vs0, v2
568; PWR10LE-NEXT:    xxspltw vs1, vs0, 2
569; PWR10LE-NEXT:    xvmaxsp vs0, vs0, vs1
570; PWR10LE-NEXT:    xxsldwi vs0, vs0, vs0, 3
571; PWR10LE-NEXT:    xscvspdpn f1, vs0
572; PWR10LE-NEXT:    blr
573;
574; PWR10BE-LABEL: v32f32_fast:
575; PWR10BE:       # %bb.0: # %entry
576; PWR10BE-NEXT:    xvmaxsp vs0, v4, v8
577; PWR10BE-NEXT:    xvmaxsp vs1, v2, v6
578; PWR10BE-NEXT:    xvmaxsp vs2, v5, v9
579; PWR10BE-NEXT:    xvmaxsp vs3, v3, v7
580; PWR10BE-NEXT:    xvmaxsp vs2, vs3, vs2
581; PWR10BE-NEXT:    xvmaxsp vs0, vs1, vs0
582; PWR10BE-NEXT:    xvmaxsp vs0, vs0, vs2
583; PWR10BE-NEXT:    xxswapd v2, vs0
584; PWR10BE-NEXT:    xvmaxsp vs0, vs0, v2
585; PWR10BE-NEXT:    xxspltw vs1, vs0, 1
586; PWR10BE-NEXT:    xvmaxsp vs0, vs0, vs1
587; PWR10BE-NEXT:    xscvspdpn f1, vs0
588; PWR10BE-NEXT:    blr
589entry:
590  %0 = call fast float @llvm.vector.reduce.fmax.v32f32(<32 x float> %a)
591  ret float %0
592}
593
594declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>) #0
595declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) #0
596declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>) #0
597declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>) #0
598declare float @llvm.vector.reduce.fmax.v32f32(<32 x float>) #0
599
600;;
601;; Vectors of f64
602;;
603define dso_local double @v2f64(<2 x double> %a) local_unnamed_addr #0 {
604; PWR9LE-LABEL: v2f64:
605; PWR9LE:       # %bb.0: # %entry
606; PWR9LE-NEXT:    xxswapd vs0, v2
607; PWR9LE-NEXT:    xsmaxdp f1, f0, v2
608; PWR9LE-NEXT:    blr
609;
610; PWR9BE-LABEL: v2f64:
611; PWR9BE:       # %bb.0: # %entry
612; PWR9BE-NEXT:    xxswapd vs0, v2
613; PWR9BE-NEXT:    xsmaxdp f1, v2, f0
614; PWR9BE-NEXT:    blr
615;
616; PWR10LE-LABEL: v2f64:
617; PWR10LE:       # %bb.0: # %entry
618; PWR10LE-NEXT:    xxswapd vs0, v2
619; PWR10LE-NEXT:    xsmaxdp f1, f0, v2
620; PWR10LE-NEXT:    blr
621;
622; PWR10BE-LABEL: v2f64:
623; PWR10BE:       # %bb.0: # %entry
624; PWR10BE-NEXT:    xxswapd vs0, v2
625; PWR10BE-NEXT:    xsmaxdp f1, v2, f0
626; PWR10BE-NEXT:    blr
627entry:
628  %0 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a)
629  ret double %0
630}
631
632define dso_local double @v2f64_fast(<2 x double> %a) local_unnamed_addr #0 {
633; PWR9LE-LABEL: v2f64_fast:
634; PWR9LE:       # %bb.0: # %entry
635; PWR9LE-NEXT:    xxswapd vs0, v2
636; PWR9LE-NEXT:    xvmaxdp vs0, v2, vs0
637; PWR9LE-NEXT:    xxswapd vs1, vs0
638; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
639; PWR9LE-NEXT:    blr
640;
641; PWR9BE-LABEL: v2f64_fast:
642; PWR9BE:       # %bb.0: # %entry
643; PWR9BE-NEXT:    xxswapd vs0, v2
644; PWR9BE-NEXT:    xvmaxdp vs1, v2, vs0
645; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
646; PWR9BE-NEXT:    blr
647;
648; PWR10LE-LABEL: v2f64_fast:
649; PWR10LE:       # %bb.0: # %entry
650; PWR10LE-NEXT:    xxswapd vs0, v2
651; PWR10LE-NEXT:    xvmaxdp vs0, v2, vs0
652; PWR10LE-NEXT:    xxswapd vs1, vs0
653; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
654; PWR10LE-NEXT:    blr
655;
656; PWR10BE-LABEL: v2f64_fast:
657; PWR10BE:       # %bb.0: # %entry
658; PWR10BE-NEXT:    xxswapd vs0, v2
659; PWR10BE-NEXT:    xvmaxdp vs1, v2, vs0
660; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
661; PWR10BE-NEXT:    blr
662entry:
663  %0 = call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a)
664  ret double %0
665}
666
667define dso_local double @v4f64(<4 x double> %a) local_unnamed_addr #0 {
668; PWR9LE-LABEL: v4f64:
669; PWR9LE:       # %bb.0: # %entry
670; PWR9LE-NEXT:    xvmaxdp vs0, v2, v3
671; PWR9LE-NEXT:    xxswapd vs1, vs0
672; PWR9LE-NEXT:    xsmaxdp f1, f1, f0
673; PWR9LE-NEXT:    blr
674;
675; PWR9BE-LABEL: v4f64:
676; PWR9BE:       # %bb.0: # %entry
677; PWR9BE-NEXT:    xvmaxdp vs0, v2, v3
678; PWR9BE-NEXT:    xxswapd vs1, vs0
679; PWR9BE-NEXT:    xsmaxdp f1, f0, f1
680; PWR9BE-NEXT:    blr
681;
682; PWR10LE-LABEL: v4f64:
683; PWR10LE:       # %bb.0: # %entry
684; PWR10LE-NEXT:    xvmaxdp vs0, v2, v3
685; PWR10LE-NEXT:    xxswapd vs1, vs0
686; PWR10LE-NEXT:    xsmaxdp f1, f1, f0
687; PWR10LE-NEXT:    blr
688;
689; PWR10BE-LABEL: v4f64:
690; PWR10BE:       # %bb.0: # %entry
691; PWR10BE-NEXT:    xvmaxdp vs0, v2, v3
692; PWR10BE-NEXT:    xxswapd vs1, vs0
693; PWR10BE-NEXT:    xsmaxdp f1, f0, f1
694; PWR10BE-NEXT:    blr
695entry:
696  %0 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %a)
697  ret double %0
698}
699
700define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 {
701; PWR9LE-LABEL: v4f64_fast:
702; PWR9LE:       # %bb.0: # %entry
703; PWR9LE-NEXT:    xvmaxdp vs0, v2, v3
704; PWR9LE-NEXT:    xxswapd vs1, vs0
705; PWR9LE-NEXT:    xvmaxdp vs0, vs0, vs1
706; PWR9LE-NEXT:    xxswapd vs1, vs0
707; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
708; PWR9LE-NEXT:    blr
709;
710; PWR9BE-LABEL: v4f64_fast:
711; PWR9BE:       # %bb.0: # %entry
712; PWR9BE-NEXT:    xvmaxdp vs0, v2, v3
713; PWR9BE-NEXT:    xxswapd vs1, vs0
714; PWR9BE-NEXT:    xvmaxdp vs1, vs0, vs1
715; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
716; PWR9BE-NEXT:    blr
717;
718; PWR10LE-LABEL: v4f64_fast:
719; PWR10LE:       # %bb.0: # %entry
720; PWR10LE-NEXT:    xvmaxdp vs0, v2, v3
721; PWR10LE-NEXT:    xxswapd vs1, vs0
722; PWR10LE-NEXT:    xvmaxdp vs0, vs0, vs1
723; PWR10LE-NEXT:    xxswapd vs1, vs0
724; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
725; PWR10LE-NEXT:    blr
726;
727; PWR10BE-LABEL: v4f64_fast:
728; PWR10BE:       # %bb.0: # %entry
729; PWR10BE-NEXT:    xvmaxdp vs0, v2, v3
730; PWR10BE-NEXT:    xxswapd vs1, vs0
731; PWR10BE-NEXT:    xvmaxdp vs1, vs0, vs1
732; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
733; PWR10BE-NEXT:    blr
734entry:
735  %0 = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> %a)
736  ret double %0
737}
738
739define dso_local double @v8f64(<8 x double> %a) local_unnamed_addr #0 {
740; PWR9LE-LABEL: v8f64:
741; PWR9LE:       # %bb.0: # %entry
742; PWR9LE-NEXT:    xvmaxdp vs0, v3, v5
743; PWR9LE-NEXT:    xvmaxdp vs1, v2, v4
744; PWR9LE-NEXT:    xvmaxdp vs0, vs1, vs0
745; PWR9LE-NEXT:    xxswapd vs1, vs0
746; PWR9LE-NEXT:    xsmaxdp f1, f1, f0
747; PWR9LE-NEXT:    blr
748;
749; PWR9BE-LABEL: v8f64:
750; PWR9BE:       # %bb.0: # %entry
751; PWR9BE-NEXT:    xvmaxdp vs0, v3, v5
752; PWR9BE-NEXT:    xvmaxdp vs1, v2, v4
753; PWR9BE-NEXT:    xvmaxdp vs0, vs1, vs0
754; PWR9BE-NEXT:    xxswapd vs1, vs0
755; PWR9BE-NEXT:    xsmaxdp f1, f0, f1
756; PWR9BE-NEXT:    blr
757;
758; PWR10LE-LABEL: v8f64:
759; PWR10LE:       # %bb.0: # %entry
760; PWR10LE-NEXT:    xvmaxdp vs0, v3, v5
761; PWR10LE-NEXT:    xvmaxdp vs1, v2, v4
762; PWR10LE-NEXT:    xvmaxdp vs0, vs1, vs0
763; PWR10LE-NEXT:    xxswapd vs1, vs0
764; PWR10LE-NEXT:    xsmaxdp f1, f1, f0
765; PWR10LE-NEXT:    blr
766;
767; PWR10BE-LABEL: v8f64:
768; PWR10BE:       # %bb.0: # %entry
769; PWR10BE-NEXT:    xvmaxdp vs0, v3, v5
770; PWR10BE-NEXT:    xvmaxdp vs1, v2, v4
771; PWR10BE-NEXT:    xvmaxdp vs0, vs1, vs0
772; PWR10BE-NEXT:    xxswapd vs1, vs0
773; PWR10BE-NEXT:    xsmaxdp f1, f0, f1
774; PWR10BE-NEXT:    blr
775entry:
776  %0 = call double @llvm.vector.reduce.fmax.v8f64(<8 x double> %a)
777  ret double %0
778}
779
780define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 {
781; PWR9LE-LABEL: v8f64_fast:
782; PWR9LE:       # %bb.0: # %entry
783; PWR9LE-NEXT:    xvmaxdp vs0, v3, v5
784; PWR9LE-NEXT:    xvmaxdp vs1, v2, v4
785; PWR9LE-NEXT:    xvmaxdp vs0, vs1, vs0
786; PWR9LE-NEXT:    xxswapd vs1, vs0
787; PWR9LE-NEXT:    xvmaxdp vs0, vs0, vs1
788; PWR9LE-NEXT:    xxswapd vs1, vs0
789; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
790; PWR9LE-NEXT:    blr
791;
792; PWR9BE-LABEL: v8f64_fast:
793; PWR9BE:       # %bb.0: # %entry
794; PWR9BE-NEXT:    xvmaxdp vs0, v3, v5
795; PWR9BE-NEXT:    xvmaxdp vs1, v2, v4
796; PWR9BE-NEXT:    xvmaxdp vs0, vs1, vs0
797; PWR9BE-NEXT:    xxswapd vs1, vs0
798; PWR9BE-NEXT:    xvmaxdp vs1, vs0, vs1
799; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
800; PWR9BE-NEXT:    blr
801;
802; PWR10LE-LABEL: v8f64_fast:
803; PWR10LE:       # %bb.0: # %entry
804; PWR10LE-NEXT:    xvmaxdp vs0, v3, v5
805; PWR10LE-NEXT:    xvmaxdp vs1, v2, v4
806; PWR10LE-NEXT:    xvmaxdp vs0, vs1, vs0
807; PWR10LE-NEXT:    xxswapd vs1, vs0
808; PWR10LE-NEXT:    xvmaxdp vs0, vs0, vs1
809; PWR10LE-NEXT:    xxswapd vs1, vs0
810; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
811; PWR10LE-NEXT:    blr
812;
813; PWR10BE-LABEL: v8f64_fast:
814; PWR10BE:       # %bb.0: # %entry
815; PWR10BE-NEXT:    xvmaxdp vs0, v3, v5
816; PWR10BE-NEXT:    xvmaxdp vs1, v2, v4
817; PWR10BE-NEXT:    xvmaxdp vs0, vs1, vs0
818; PWR10BE-NEXT:    xxswapd vs1, vs0
819; PWR10BE-NEXT:    xvmaxdp vs1, vs0, vs1
820; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
821; PWR10BE-NEXT:    blr
822entry:
823  %0 = call fast double @llvm.vector.reduce.fmax.v8f64(<8 x double> %a)
824  ret double %0
825}
826
827define dso_local double @v16f64(<16 x double> %a) local_unnamed_addr #0 {
828; PWR9LE-LABEL: v16f64:
829; PWR9LE:       # %bb.0: # %entry
830; PWR9LE-NEXT:    xvmaxdp vs0, v5, v9
831; PWR9LE-NEXT:    xvmaxdp vs1, v3, v7
832; PWR9LE-NEXT:    xvmaxdp vs2, v2, v6
833; PWR9LE-NEXT:    xvmaxdp vs0, vs1, vs0
834; PWR9LE-NEXT:    xvmaxdp vs1, v4, v8
835; PWR9LE-NEXT:    xvmaxdp vs1, vs2, vs1
836; PWR9LE-NEXT:    xvmaxdp vs0, vs1, vs0
837; PWR9LE-NEXT:    xxswapd vs1, vs0
838; PWR9LE-NEXT:    xsmaxdp f1, f1, f0
839; PWR9LE-NEXT:    blr
840;
841; PWR9BE-LABEL: v16f64:
842; PWR9BE:       # %bb.0: # %entry
843; PWR9BE-NEXT:    xvmaxdp vs0, v5, v9
844; PWR9BE-NEXT:    xvmaxdp vs1, v3, v7
845; PWR9BE-NEXT:    xvmaxdp vs2, v2, v6
846; PWR9BE-NEXT:    xvmaxdp vs0, vs1, vs0
847; PWR9BE-NEXT:    xvmaxdp vs1, v4, v8
848; PWR9BE-NEXT:    xvmaxdp vs1, vs2, vs1
849; PWR9BE-NEXT:    xvmaxdp vs0, vs1, vs0
850; PWR9BE-NEXT:    xxswapd vs1, vs0
851; PWR9BE-NEXT:    xsmaxdp f1, f0, f1
852; PWR9BE-NEXT:    blr
853;
854; PWR10LE-LABEL: v16f64:
855; PWR10LE:       # %bb.0: # %entry
856; PWR10LE-NEXT:    xvmaxdp vs0, v5, v9
857; PWR10LE-NEXT:    xvmaxdp vs1, v3, v7
858; PWR10LE-NEXT:    xvmaxdp vs2, v2, v6
859; PWR10LE-NEXT:    xvmaxdp vs0, vs1, vs0
860; PWR10LE-NEXT:    xvmaxdp vs1, v4, v8
861; PWR10LE-NEXT:    xvmaxdp vs1, vs2, vs1
862; PWR10LE-NEXT:    xvmaxdp vs0, vs1, vs0
863; PWR10LE-NEXT:    xxswapd vs1, vs0
864; PWR10LE-NEXT:    xsmaxdp f1, f1, f0
865; PWR10LE-NEXT:    blr
866;
867; PWR10BE-LABEL: v16f64:
868; PWR10BE:       # %bb.0: # %entry
869; PWR10BE-NEXT:    xvmaxdp vs0, v5, v9
870; PWR10BE-NEXT:    xvmaxdp vs1, v3, v7
871; PWR10BE-NEXT:    xvmaxdp vs2, v2, v6
872; PWR10BE-NEXT:    xvmaxdp vs0, vs1, vs0
873; PWR10BE-NEXT:    xvmaxdp vs1, v4, v8
874; PWR10BE-NEXT:    xvmaxdp vs1, vs2, vs1
875; PWR10BE-NEXT:    xvmaxdp vs0, vs1, vs0
876; PWR10BE-NEXT:    xxswapd vs1, vs0
877; PWR10BE-NEXT:    xsmaxdp f1, f0, f1
878; PWR10BE-NEXT:    blr
879entry:
880  %0 = call double @llvm.vector.reduce.fmax.v16f64(<16 x double> %a)
881  ret double %0
882}
883
884define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 {
885; PWR9LE-LABEL: v16f64_fast:
886; PWR9LE:       # %bb.0: # %entry
887; PWR9LE-NEXT:    xvmaxdp vs0, v4, v8
888; PWR9LE-NEXT:    xvmaxdp vs1, v2, v6
889; PWR9LE-NEXT:    xvmaxdp vs2, v5, v9
890; PWR9LE-NEXT:    xvmaxdp vs3, v3, v7
891; PWR9LE-NEXT:    xvmaxdp vs2, vs3, vs2
892; PWR9LE-NEXT:    xvmaxdp vs0, vs1, vs0
893; PWR9LE-NEXT:    xvmaxdp vs0, vs0, vs2
894; PWR9LE-NEXT:    xxswapd vs1, vs0
895; PWR9LE-NEXT:    xvmaxdp vs0, vs0, vs1
896; PWR9LE-NEXT:    xxswapd vs1, vs0
897; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
898; PWR9LE-NEXT:    blr
899;
900; PWR9BE-LABEL: v16f64_fast:
901; PWR9BE:       # %bb.0: # %entry
902; PWR9BE-NEXT:    xvmaxdp vs0, v4, v8
903; PWR9BE-NEXT:    xvmaxdp vs1, v2, v6
904; PWR9BE-NEXT:    xvmaxdp vs2, v5, v9
905; PWR9BE-NEXT:    xvmaxdp vs3, v3, v7
906; PWR9BE-NEXT:    xvmaxdp vs2, vs3, vs2
907; PWR9BE-NEXT:    xvmaxdp vs0, vs1, vs0
908; PWR9BE-NEXT:    xvmaxdp vs0, vs0, vs2
909; PWR9BE-NEXT:    xxswapd vs1, vs0
910; PWR9BE-NEXT:    xvmaxdp vs1, vs0, vs1
911; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
912; PWR9BE-NEXT:    blr
913;
914; PWR10LE-LABEL: v16f64_fast:
915; PWR10LE:       # %bb.0: # %entry
916; PWR10LE-NEXT:    xvmaxdp vs0, v4, v8
917; PWR10LE-NEXT:    xvmaxdp vs1, v2, v6
918; PWR10LE-NEXT:    xvmaxdp vs2, v5, v9
919; PWR10LE-NEXT:    xvmaxdp vs3, v3, v7
920; PWR10LE-NEXT:    xvmaxdp vs2, vs3, vs2
921; PWR10LE-NEXT:    xvmaxdp vs0, vs1, vs0
922; PWR10LE-NEXT:    xvmaxdp vs0, vs0, vs2
923; PWR10LE-NEXT:    xxswapd vs1, vs0
924; PWR10LE-NEXT:    xvmaxdp vs0, vs0, vs1
925; PWR10LE-NEXT:    xxswapd vs1, vs0
926; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
927; PWR10LE-NEXT:    blr
928;
929; PWR10BE-LABEL: v16f64_fast:
930; PWR10BE:       # %bb.0: # %entry
931; PWR10BE-NEXT:    xvmaxdp vs0, v4, v8
932; PWR10BE-NEXT:    xvmaxdp vs1, v2, v6
933; PWR10BE-NEXT:    xvmaxdp vs2, v5, v9
934; PWR10BE-NEXT:    xvmaxdp vs3, v3, v7
935; PWR10BE-NEXT:    xvmaxdp vs2, vs3, vs2
936; PWR10BE-NEXT:    xvmaxdp vs0, vs1, vs0
937; PWR10BE-NEXT:    xvmaxdp vs0, vs0, vs2
938; PWR10BE-NEXT:    xxswapd vs1, vs0
939; PWR10BE-NEXT:    xvmaxdp vs1, vs0, vs1
940; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
941; PWR10BE-NEXT:    blr
942entry:
943  %0 = call fast double @llvm.vector.reduce.fmax.v16f64(<16 x double> %a)
944  ret double %0
945}
946
947define dso_local double @v32f64(<32 x double> %a) local_unnamed_addr #0 {
948; PWR9LE-LABEL: v32f64:
949; PWR9LE:       # %bb.0: # %entry
950; PWR9LE-NEXT:    lxv vs3, 272(r1)
951; PWR9LE-NEXT:    lxv vs2, 240(r1)
952; PWR9LE-NEXT:    xvmaxdp vs4, v5, v13
953; PWR9LE-NEXT:    lxv vs1, 256(r1)
954; PWR9LE-NEXT:    lxv vs0, 224(r1)
955; PWR9LE-NEXT:    xvmaxdp vs3, v9, vs3
956; PWR9LE-NEXT:    xvmaxdp vs2, v7, vs2
957; PWR9LE-NEXT:    xvmaxdp vs1, v8, vs1
958; PWR9LE-NEXT:    xvmaxdp vs0, v6, vs0
959; PWR9LE-NEXT:    xvmaxdp vs3, vs4, vs3
960; PWR9LE-NEXT:    xvmaxdp vs4, v3, v11
961; PWR9LE-NEXT:    xvmaxdp vs2, vs4, vs2
962; PWR9LE-NEXT:    xvmaxdp vs2, vs2, vs3
963; PWR9LE-NEXT:    xvmaxdp vs3, v4, v12
964; PWR9LE-NEXT:    xvmaxdp vs1, vs3, vs1
965; PWR9LE-NEXT:    xvmaxdp vs3, v2, v10
966; PWR9LE-NEXT:    xvmaxdp vs0, vs3, vs0
967; PWR9LE-NEXT:    xvmaxdp vs0, vs0, vs1
968; PWR9LE-NEXT:    xvmaxdp vs0, vs0, vs2
969; PWR9LE-NEXT:    xxswapd vs1, vs0
970; PWR9LE-NEXT:    xsmaxdp f1, f1, f0
971; PWR9LE-NEXT:    blr
972;
973; PWR9BE-LABEL: v32f64:
974; PWR9BE:       # %bb.0: # %entry
975; PWR9BE-NEXT:    lxv vs3, 288(r1)
976; PWR9BE-NEXT:    lxv vs2, 256(r1)
977; PWR9BE-NEXT:    xvmaxdp vs4, v5, v13
978; PWR9BE-NEXT:    lxv vs1, 272(r1)
979; PWR9BE-NEXT:    lxv vs0, 240(r1)
980; PWR9BE-NEXT:    xvmaxdp vs3, v9, vs3
981; PWR9BE-NEXT:    xvmaxdp vs2, v7, vs2
982; PWR9BE-NEXT:    xvmaxdp vs1, v8, vs1
983; PWR9BE-NEXT:    xvmaxdp vs0, v6, vs0
984; PWR9BE-NEXT:    xvmaxdp vs3, vs4, vs3
985; PWR9BE-NEXT:    xvmaxdp vs4, v3, v11
986; PWR9BE-NEXT:    xvmaxdp vs2, vs4, vs2
987; PWR9BE-NEXT:    xvmaxdp vs2, vs2, vs3
988; PWR9BE-NEXT:    xvmaxdp vs3, v4, v12
989; PWR9BE-NEXT:    xvmaxdp vs1, vs3, vs1
990; PWR9BE-NEXT:    xvmaxdp vs3, v2, v10
991; PWR9BE-NEXT:    xvmaxdp vs0, vs3, vs0
992; PWR9BE-NEXT:    xvmaxdp vs0, vs0, vs1
993; PWR9BE-NEXT:    xvmaxdp vs0, vs0, vs2
994; PWR9BE-NEXT:    xxswapd vs1, vs0
995; PWR9BE-NEXT:    xsmaxdp f1, f0, f1
996; PWR9BE-NEXT:    blr
997;
998; PWR10LE-LABEL: v32f64:
999; PWR10LE:       # %bb.0: # %entry
1000; PWR10LE-NEXT:    lxv vs3, 272(r1)
1001; PWR10LE-NEXT:    lxv vs2, 240(r1)
1002; PWR10LE-NEXT:    xvmaxdp vs4, v5, v13
1003; PWR10LE-NEXT:    xvmaxdp vs3, v9, vs3
1004; PWR10LE-NEXT:    lxv vs1, 256(r1)
1005; PWR10LE-NEXT:    xvmaxdp vs2, v7, vs2
1006; PWR10LE-NEXT:    lxv vs0, 224(r1)
1007; PWR10LE-NEXT:    xvmaxdp vs1, v8, vs1
1008; PWR10LE-NEXT:    xvmaxdp vs0, v6, vs0
1009; PWR10LE-NEXT:    xvmaxdp vs3, vs4, vs3
1010; PWR10LE-NEXT:    xvmaxdp vs4, v3, v11
1011; PWR10LE-NEXT:    xvmaxdp vs2, vs4, vs2
1012; PWR10LE-NEXT:    xvmaxdp vs2, vs2, vs3
1013; PWR10LE-NEXT:    xvmaxdp vs3, v4, v12
1014; PWR10LE-NEXT:    xvmaxdp vs1, vs3, vs1
1015; PWR10LE-NEXT:    xvmaxdp vs3, v2, v10
1016; PWR10LE-NEXT:    xvmaxdp vs0, vs3, vs0
1017; PWR10LE-NEXT:    xvmaxdp vs0, vs0, vs1
1018; PWR10LE-NEXT:    xvmaxdp vs0, vs0, vs2
1019; PWR10LE-NEXT:    xxswapd vs1, vs0
1020; PWR10LE-NEXT:    xsmaxdp f1, f1, f0
1021; PWR10LE-NEXT:    blr
1022;
1023; PWR10BE-LABEL: v32f64:
1024; PWR10BE:       # %bb.0: # %entry
1025; PWR10BE-NEXT:    lxv vs3, 288(r1)
1026; PWR10BE-NEXT:    lxv vs2, 256(r1)
1027; PWR10BE-NEXT:    xvmaxdp vs4, v5, v13
1028; PWR10BE-NEXT:    xvmaxdp vs3, v9, vs3
1029; PWR10BE-NEXT:    lxv vs1, 272(r1)
1030; PWR10BE-NEXT:    xvmaxdp vs2, v7, vs2
1031; PWR10BE-NEXT:    lxv vs0, 240(r1)
1032; PWR10BE-NEXT:    xvmaxdp vs1, v8, vs1
1033; PWR10BE-NEXT:    xvmaxdp vs0, v6, vs0
1034; PWR10BE-NEXT:    xvmaxdp vs3, vs4, vs3
1035; PWR10BE-NEXT:    xvmaxdp vs4, v3, v11
1036; PWR10BE-NEXT:    xvmaxdp vs2, vs4, vs2
1037; PWR10BE-NEXT:    xvmaxdp vs2, vs2, vs3
1038; PWR10BE-NEXT:    xvmaxdp vs3, v4, v12
1039; PWR10BE-NEXT:    xvmaxdp vs1, vs3, vs1
1040; PWR10BE-NEXT:    xvmaxdp vs3, v2, v10
1041; PWR10BE-NEXT:    xvmaxdp vs0, vs3, vs0
1042; PWR10BE-NEXT:    xvmaxdp vs0, vs0, vs1
1043; PWR10BE-NEXT:    xvmaxdp vs0, vs0, vs2
1044; PWR10BE-NEXT:    xxswapd vs1, vs0
1045; PWR10BE-NEXT:    xsmaxdp f1, f0, f1
1046; PWR10BE-NEXT:    blr
1047entry:
1048  %0 = call double @llvm.vector.reduce.fmax.v32f64(<32 x double> %a)
1049  ret double %0
1050}
1051
1052define dso_local double @v32f64_fast(<32 x double> %a) local_unnamed_addr #0 {
1053; PWR9LE-LABEL: v32f64_fast:
1054; PWR9LE:       # %bb.0: # %entry
1055; PWR9LE-NEXT:    lxv vs0, 256(r1)
1056; PWR9LE-NEXT:    lxv vs1, 224(r1)
1057; PWR9LE-NEXT:    lxv vs2, 272(r1)
1058; PWR9LE-NEXT:    lxv vs3, 240(r1)
1059; PWR9LE-NEXT:    xvmaxdp vs4, v3, v11
1060; PWR9LE-NEXT:    xvmaxdp vs5, v5, v13
1061; PWR9LE-NEXT:    xvmaxdp vs6, v2, v10
1062; PWR9LE-NEXT:    xvmaxdp vs7, v4, v12
1063; PWR9LE-NEXT:    xvmaxdp vs3, v7, vs3
1064; PWR9LE-NEXT:    xvmaxdp vs2, v9, vs2
1065; PWR9LE-NEXT:    xvmaxdp vs1, v6, vs1
1066; PWR9LE-NEXT:    xvmaxdp vs0, v8, vs0
1067; PWR9LE-NEXT:    xvmaxdp vs0, vs7, vs0
1068; PWR9LE-NEXT:    xvmaxdp vs1, vs6, vs1
1069; PWR9LE-NEXT:    xvmaxdp vs2, vs5, vs2
1070; PWR9LE-NEXT:    xvmaxdp vs3, vs4, vs3
1071; PWR9LE-NEXT:    xvmaxdp vs2, vs3, vs2
1072; PWR9LE-NEXT:    xvmaxdp vs0, vs1, vs0
1073; PWR9LE-NEXT:    xvmaxdp vs0, vs0, vs2
1074; PWR9LE-NEXT:    xxswapd vs1, vs0
1075; PWR9LE-NEXT:    xvmaxdp vs0, vs0, vs1
1076; PWR9LE-NEXT:    xxswapd vs1, vs0
1077; PWR9LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
1078; PWR9LE-NEXT:    blr
1079;
1080; PWR9BE-LABEL: v32f64_fast:
1081; PWR9BE:       # %bb.0: # %entry
1082; PWR9BE-NEXT:    lxv vs0, 272(r1)
1083; PWR9BE-NEXT:    lxv vs1, 240(r1)
1084; PWR9BE-NEXT:    lxv vs2, 288(r1)
1085; PWR9BE-NEXT:    lxv vs3, 256(r1)
1086; PWR9BE-NEXT:    xvmaxdp vs4, v3, v11
1087; PWR9BE-NEXT:    xvmaxdp vs5, v5, v13
1088; PWR9BE-NEXT:    xvmaxdp vs6, v2, v10
1089; PWR9BE-NEXT:    xvmaxdp vs7, v4, v12
1090; PWR9BE-NEXT:    xvmaxdp vs3, v7, vs3
1091; PWR9BE-NEXT:    xvmaxdp vs2, v9, vs2
1092; PWR9BE-NEXT:    xvmaxdp vs1, v6, vs1
1093; PWR9BE-NEXT:    xvmaxdp vs0, v8, vs0
1094; PWR9BE-NEXT:    xvmaxdp vs0, vs7, vs0
1095; PWR9BE-NEXT:    xvmaxdp vs1, vs6, vs1
1096; PWR9BE-NEXT:    xvmaxdp vs2, vs5, vs2
1097; PWR9BE-NEXT:    xvmaxdp vs3, vs4, vs3
1098; PWR9BE-NEXT:    xvmaxdp vs2, vs3, vs2
1099; PWR9BE-NEXT:    xvmaxdp vs0, vs1, vs0
1100; PWR9BE-NEXT:    xvmaxdp vs0, vs0, vs2
1101; PWR9BE-NEXT:    xxswapd vs1, vs0
1102; PWR9BE-NEXT:    xvmaxdp vs1, vs0, vs1
1103; PWR9BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
1104; PWR9BE-NEXT:    blr
1105;
1106; PWR10LE-LABEL: v32f64_fast:
1107; PWR10LE:       # %bb.0: # %entry
1108; PWR10LE-NEXT:    lxv vs0, 256(r1)
1109; PWR10LE-NEXT:    lxv vs1, 224(r1)
1110; PWR10LE-NEXT:    xvmaxdp vs4, v3, v11
1111; PWR10LE-NEXT:    xvmaxdp vs5, v5, v13
1112; PWR10LE-NEXT:    xvmaxdp vs6, v2, v10
1113; PWR10LE-NEXT:    xvmaxdp vs7, v4, v12
1114; PWR10LE-NEXT:    xvmaxdp vs1, v6, vs1
1115; PWR10LE-NEXT:    lxv vs2, 272(r1)
1116; PWR10LE-NEXT:    lxv vs3, 240(r1)
1117; PWR10LE-NEXT:    xvmaxdp vs3, v7, vs3
1118; PWR10LE-NEXT:    xvmaxdp vs2, v9, vs2
1119; PWR10LE-NEXT:    xvmaxdp vs0, v8, vs0
1120; PWR10LE-NEXT:    xvmaxdp vs0, vs7, vs0
1121; PWR10LE-NEXT:    xvmaxdp vs1, vs6, vs1
1122; PWR10LE-NEXT:    xvmaxdp vs2, vs5, vs2
1123; PWR10LE-NEXT:    xvmaxdp vs3, vs4, vs3
1124; PWR10LE-NEXT:    xvmaxdp vs2, vs3, vs2
1125; PWR10LE-NEXT:    xvmaxdp vs0, vs1, vs0
1126; PWR10LE-NEXT:    xvmaxdp vs0, vs0, vs2
1127; PWR10LE-NEXT:    xxswapd vs1, vs0
1128; PWR10LE-NEXT:    xvmaxdp vs0, vs0, vs1
1129; PWR10LE-NEXT:    xxswapd vs1, vs0
1130; PWR10LE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
1131; PWR10LE-NEXT:    blr
1132;
1133; PWR10BE-LABEL: v32f64_fast:
1134; PWR10BE:       # %bb.0: # %entry
1135; PWR10BE-NEXT:    lxv vs0, 272(r1)
1136; PWR10BE-NEXT:    lxv vs1, 240(r1)
1137; PWR10BE-NEXT:    xvmaxdp vs4, v3, v11
1138; PWR10BE-NEXT:    xvmaxdp vs5, v5, v13
1139; PWR10BE-NEXT:    xvmaxdp vs6, v2, v10
1140; PWR10BE-NEXT:    xvmaxdp vs7, v4, v12
1141; PWR10BE-NEXT:    xvmaxdp vs1, v6, vs1
1142; PWR10BE-NEXT:    lxv vs2, 288(r1)
1143; PWR10BE-NEXT:    lxv vs3, 256(r1)
1144; PWR10BE-NEXT:    xvmaxdp vs3, v7, vs3
1145; PWR10BE-NEXT:    xvmaxdp vs2, v9, vs2
1146; PWR10BE-NEXT:    xvmaxdp vs0, v8, vs0
1147; PWR10BE-NEXT:    xvmaxdp vs0, vs7, vs0
1148; PWR10BE-NEXT:    xvmaxdp vs1, vs6, vs1
1149; PWR10BE-NEXT:    xvmaxdp vs2, vs5, vs2
1150; PWR10BE-NEXT:    xvmaxdp vs3, vs4, vs3
1151; PWR10BE-NEXT:    xvmaxdp vs2, vs3, vs2
1152; PWR10BE-NEXT:    xvmaxdp vs0, vs1, vs0
1153; PWR10BE-NEXT:    xvmaxdp vs0, vs0, vs2
1154; PWR10BE-NEXT:    xxswapd vs1, vs0
1155; PWR10BE-NEXT:    xvmaxdp vs1, vs0, vs1
1156; PWR10BE-NEXT:    # kill: def $f1 killed $f1 killed $vsl1
1157; PWR10BE-NEXT:    blr
1158entry:
1159  %0 = call fast double @llvm.vector.reduce.fmax.v32f64(<32 x double> %a)
1160  ret double %0
1161}
1162
1163declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) #0
1164declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>) #0
1165declare double @llvm.vector.reduce.fmax.v8f64(<8 x double>) #0
1166declare double @llvm.vector.reduce.fmax.v16f64(<16 x double>) #0
1167declare double @llvm.vector.reduce.fmax.v32f64(<32 x double>) #0
1168
1169attributes #0 = { nounwind }
1170