1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64-unknown-linux -mcpu=pwr8 | FileCheck %s
3; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr9 | FileCheck %s
4; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc64le-unknown-linux -mcpu=pwr8 -mattr=-vsx | FileCheck %s -check-prefix=NOVSX
5; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s -mtriple=powerpc-unknown-linux -mattr=spe | FileCheck %s -check-prefix=SPE
6
7declare float @llvm.experimental.constrained.fadd.f32(float, float, metadata, metadata)
8declare double @llvm.experimental.constrained.fadd.f64(double, double, metadata, metadata)
9declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata)
10declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)
11
12declare float @llvm.experimental.constrained.fsub.f32(float, float, metadata, metadata)
13declare double @llvm.experimental.constrained.fsub.f64(double, double, metadata, metadata)
14declare <4 x float> @llvm.experimental.constrained.fsub.v4f32(<4 x float>, <4 x float>, metadata, metadata)
15declare <2 x double> @llvm.experimental.constrained.fsub.v2f64(<2 x double>, <2 x double>, metadata, metadata)
16
17declare float @llvm.experimental.constrained.fmul.f32(float, float, metadata, metadata)
18declare double @llvm.experimental.constrained.fmul.f64(double, double, metadata, metadata)
19declare <4 x float> @llvm.experimental.constrained.fmul.v4f32(<4 x float>, <4 x float>, metadata, metadata)
20declare <2 x double> @llvm.experimental.constrained.fmul.v2f64(<2 x double>, <2 x double>, metadata, metadata)
21
22declare float @llvm.experimental.constrained.fdiv.f32(float, float, metadata, metadata)
23declare double @llvm.experimental.constrained.fdiv.f64(double, double, metadata, metadata)
24declare <4 x float> @llvm.experimental.constrained.fdiv.v4f32(<4 x float>, <4 x float>, metadata, metadata)
25declare <2 x double> @llvm.experimental.constrained.fdiv.v2f64(<2 x double>, <2 x double>, metadata, metadata)
26
27declare float @llvm.experimental.constrained.fma.f32(float, float, float, metadata, metadata)
28declare double @llvm.experimental.constrained.fma.f64(double, double, double, metadata, metadata)
29declare <4 x float> @llvm.experimental.constrained.fma.v4f32(<4 x float>, <4 x float>, <4 x float>, metadata, metadata)
30declare <2 x double> @llvm.experimental.constrained.fma.v2f64(<2 x double>, <2 x double>, <2 x double>, metadata, metadata)
31
32declare float @llvm.experimental.constrained.sqrt.f32(float, metadata, metadata)
33declare double @llvm.experimental.constrained.sqrt.f64(double, metadata, metadata)
34declare <4 x float> @llvm.experimental.constrained.sqrt.v4f32(<4 x float>, metadata, metadata)
35declare <2 x double> @llvm.experimental.constrained.sqrt.v2f64(<2 x double>, metadata, metadata)
36
37define float @fadd_f32(float %f1, float %f2) #0 {
38; CHECK-LABEL: fadd_f32:
39; CHECK:       # %bb.0:
40; CHECK-NEXT:    xsaddsp f1, f1, f2
41; CHECK-NEXT:    blr
42;
43; NOVSX-LABEL: fadd_f32:
44; NOVSX:       # %bb.0:
45; NOVSX-NEXT:    fadds f1, f1, f2
46; NOVSX-NEXT:    blr
47;
48; SPE-LABEL: fadd_f32:
49; SPE:       # %bb.0:
50; SPE-NEXT:    efsadd r3, r3, r4
51; SPE-NEXT:    blr
52  %res = call float @llvm.experimental.constrained.fadd.f32(
53                        float %f1, float %f2,
54                        metadata !"round.dynamic",
55                        metadata !"fpexcept.strict") #0
56  ret float %res
57}
58
59define double @fadd_f64(double %f1, double %f2) #0 {
60; CHECK-LABEL: fadd_f64:
61; CHECK:       # %bb.0:
62; CHECK-NEXT:    xsadddp f1, f1, f2
63; CHECK-NEXT:    blr
64;
65; NOVSX-LABEL: fadd_f64:
66; NOVSX:       # %bb.0:
67; NOVSX-NEXT:    fadd f1, f1, f2
68; NOVSX-NEXT:    blr
69;
70; SPE-LABEL: fadd_f64:
71; SPE:       # %bb.0:
72; SPE-NEXT:    evmergelo r5, r5, r6
73; SPE-NEXT:    evmergelo r3, r3, r4
74; SPE-NEXT:    efdadd r4, r3, r5
75; SPE-NEXT:    evmergehi r3, r4, r4
76; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
77; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
78; SPE-NEXT:    blr
79  %res = call double @llvm.experimental.constrained.fadd.f64(
80                        double %f1, double %f2,
81                        metadata !"round.dynamic",
82                        metadata !"fpexcept.strict") #0
83  ret double %res
84}
85
86define <4 x float> @fadd_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 {
87; CHECK-LABEL: fadd_v4f32:
88; CHECK:       # %bb.0:
89; CHECK-NEXT:    xvaddsp v2, v2, v3
90; CHECK-NEXT:    blr
91;
92; NOVSX-LABEL: fadd_v4f32:
93; NOVSX:       # %bb.0:
94; NOVSX-NEXT:    addi r3, r1, -32
95; NOVSX-NEXT:    addi r4, r1, -48
96; NOVSX-NEXT:    stvx v3, 0, r3
97; NOVSX-NEXT:    stvx v2, 0, r4
98; NOVSX-NEXT:    addi r3, r1, -16
99; NOVSX-NEXT:    lfs f0, -20(r1)
100; NOVSX-NEXT:    lfs f1, -36(r1)
101; NOVSX-NEXT:    fadds f0, f1, f0
102; NOVSX-NEXT:    lfs f1, -40(r1)
103; NOVSX-NEXT:    stfs f0, -4(r1)
104; NOVSX-NEXT:    lfs f0, -24(r1)
105; NOVSX-NEXT:    fadds f0, f1, f0
106; NOVSX-NEXT:    lfs f1, -44(r1)
107; NOVSX-NEXT:    stfs f0, -8(r1)
108; NOVSX-NEXT:    lfs f0, -28(r1)
109; NOVSX-NEXT:    fadds f0, f1, f0
110; NOVSX-NEXT:    lfs f1, -48(r1)
111; NOVSX-NEXT:    stfs f0, -12(r1)
112; NOVSX-NEXT:    lfs f0, -32(r1)
113; NOVSX-NEXT:    fadds f0, f1, f0
114; NOVSX-NEXT:    stfs f0, -16(r1)
115; NOVSX-NEXT:    lvx v2, 0, r3
116; NOVSX-NEXT:    blr
117;
118; SPE-LABEL: fadd_v4f32:
119; SPE:       # %bb.0:
120; SPE-NEXT:    efsadd r6, r6, r10
121; SPE-NEXT:    efsadd r5, r5, r9
122; SPE-NEXT:    efsadd r4, r4, r8
123; SPE-NEXT:    efsadd r3, r3, r7
124; SPE-NEXT:    blr
125  %res = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(
126                        <4 x float> %vf1, <4 x float> %vf2,
127                        metadata !"round.dynamic",
128                        metadata !"fpexcept.strict") #0
129  ret <4 x float> %res
130}
131
132define <2 x double> @fadd_v2f64(<2 x double> %vf1, <2 x double> %vf2) #0 {
133; CHECK-LABEL: fadd_v2f64:
134; CHECK:       # %bb.0:
135; CHECK-NEXT:    xvadddp v2, v2, v3
136; CHECK-NEXT:    blr
137;
138; NOVSX-LABEL: fadd_v2f64:
139; NOVSX:       # %bb.0:
140; NOVSX-NEXT:    fadd f2, f2, f4
141; NOVSX-NEXT:    fadd f1, f1, f3
142; NOVSX-NEXT:    blr
143;
144; SPE-LABEL: fadd_v2f64:
145; SPE:       # %bb.0:
146; SPE-NEXT:    evldd r4, 8(r1)
147; SPE-NEXT:    evmergelo r7, r7, r8
148; SPE-NEXT:    evmergelo r8, r9, r10
149; SPE-NEXT:    li r9, 8
150; SPE-NEXT:    evmergelo r5, r5, r6
151; SPE-NEXT:    efdadd r4, r7, r4
152; SPE-NEXT:    evstddx r4, r3, r9
153; SPE-NEXT:    efdadd r4, r5, r8
154; SPE-NEXT:    evstdd r4, 0(r3)
155; SPE-NEXT:    blr
156  %res = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(
157                        <2 x double> %vf1, <2 x double> %vf2,
158                        metadata !"round.dynamic",
159                        metadata !"fpexcept.strict") #0
160  ret <2 x double> %res
161}
162
163define float @fsub_f32(float %f1, float %f2) #0 {
164; CHECK-LABEL: fsub_f32:
165; CHECK:       # %bb.0:
166; CHECK-NEXT:    xssubsp f1, f1, f2
167; CHECK-NEXT:    blr
168;
169; NOVSX-LABEL: fsub_f32:
170; NOVSX:       # %bb.0:
171; NOVSX-NEXT:    fsubs f1, f1, f2
172; NOVSX-NEXT:    blr
173;
174; SPE-LABEL: fsub_f32:
175; SPE:       # %bb.0:
176; SPE-NEXT:    efssub r3, r3, r4
177; SPE-NEXT:    blr
178
179  %res = call float @llvm.experimental.constrained.fsub.f32(
180                        float %f1, float %f2,
181                        metadata !"round.dynamic",
182                        metadata !"fpexcept.strict") #0
183  ret float %res;
184}
185
186define double @fsub_f64(double %f1, double %f2) #0 {
187; CHECK-LABEL: fsub_f64:
188; CHECK:       # %bb.0:
189; CHECK-NEXT:    xssubdp f1, f1, f2
190; CHECK-NEXT:    blr
191;
192; NOVSX-LABEL: fsub_f64:
193; NOVSX:       # %bb.0:
194; NOVSX-NEXT:    fsub f1, f1, f2
195; NOVSX-NEXT:    blr
196;
197; SPE-LABEL: fsub_f64:
198; SPE:       # %bb.0:
199; SPE-NEXT:    evmergelo r5, r5, r6
200; SPE-NEXT:    evmergelo r3, r3, r4
201; SPE-NEXT:    efdsub r4, r3, r5
202; SPE-NEXT:    evmergehi r3, r4, r4
203; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
204; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
205; SPE-NEXT:    blr
206
207  %res = call double @llvm.experimental.constrained.fsub.f64(
208                        double %f1, double %f2,
209                        metadata !"round.dynamic",
210                        metadata !"fpexcept.strict") #0
211  ret double %res;
212}
213
214define <4 x float> @fsub_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 {
215; CHECK-LABEL: fsub_v4f32:
216; CHECK:       # %bb.0:
217; CHECK-NEXT:    xvsubsp v2, v2, v3
218; CHECK-NEXT:    blr
219;
220; NOVSX-LABEL: fsub_v4f32:
221; NOVSX:       # %bb.0:
222; NOVSX-NEXT:    addi r3, r1, -32
223; NOVSX-NEXT:    addi r4, r1, -48
224; NOVSX-NEXT:    stvx v3, 0, r3
225; NOVSX-NEXT:    stvx v2, 0, r4
226; NOVSX-NEXT:    addi r3, r1, -16
227; NOVSX-NEXT:    lfs f0, -20(r1)
228; NOVSX-NEXT:    lfs f1, -36(r1)
229; NOVSX-NEXT:    fsubs f0, f1, f0
230; NOVSX-NEXT:    lfs f1, -40(r1)
231; NOVSX-NEXT:    stfs f0, -4(r1)
232; NOVSX-NEXT:    lfs f0, -24(r1)
233; NOVSX-NEXT:    fsubs f0, f1, f0
234; NOVSX-NEXT:    lfs f1, -44(r1)
235; NOVSX-NEXT:    stfs f0, -8(r1)
236; NOVSX-NEXT:    lfs f0, -28(r1)
237; NOVSX-NEXT:    fsubs f0, f1, f0
238; NOVSX-NEXT:    lfs f1, -48(r1)
239; NOVSX-NEXT:    stfs f0, -12(r1)
240; NOVSX-NEXT:    lfs f0, -32(r1)
241; NOVSX-NEXT:    fsubs f0, f1, f0
242; NOVSX-NEXT:    stfs f0, -16(r1)
243; NOVSX-NEXT:    lvx v2, 0, r3
244; NOVSX-NEXT:    blr
245;
246; SPE-LABEL: fsub_v4f32:
247; SPE:       # %bb.0:
248; SPE-NEXT:    efssub r6, r6, r10
249; SPE-NEXT:    efssub r5, r5, r9
250; SPE-NEXT:    efssub r4, r4, r8
251; SPE-NEXT:    efssub r3, r3, r7
252; SPE-NEXT:    blr
253  %res = call <4 x float> @llvm.experimental.constrained.fsub.v4f32(
254                        <4 x float> %vf1, <4 x float> %vf2,
255                        metadata !"round.dynamic",
256                        metadata !"fpexcept.strict") #0
257  ret <4 x float> %res;
258}
259
260define <2 x double> @fsub_v2f64(<2 x double> %vf1, <2 x double> %vf2) #0 {
261; CHECK-LABEL: fsub_v2f64:
262; CHECK:       # %bb.0:
263; CHECK-NEXT:    xvsubdp v2, v2, v3
264; CHECK-NEXT:    blr
265;
266; NOVSX-LABEL: fsub_v2f64:
267; NOVSX:       # %bb.0:
268; NOVSX-NEXT:    fsub f2, f2, f4
269; NOVSX-NEXT:    fsub f1, f1, f3
270; NOVSX-NEXT:    blr
271;
272; SPE-LABEL: fsub_v2f64:
273; SPE:       # %bb.0:
274; SPE-NEXT:    evldd r4, 8(r1)
275; SPE-NEXT:    evmergelo r7, r7, r8
276; SPE-NEXT:    evmergelo r8, r9, r10
277; SPE-NEXT:    li r9, 8
278; SPE-NEXT:    evmergelo r5, r5, r6
279; SPE-NEXT:    efdsub r4, r7, r4
280; SPE-NEXT:    evstddx r4, r3, r9
281; SPE-NEXT:    efdsub r4, r5, r8
282; SPE-NEXT:    evstdd r4, 0(r3)
283; SPE-NEXT:    blr
284  %res = call <2 x double> @llvm.experimental.constrained.fsub.v2f64(
285                        <2 x double> %vf1, <2 x double> %vf2,
286                        metadata !"round.dynamic",
287                        metadata !"fpexcept.strict") #0
288  ret <2 x double> %res;
289}
290
291define float @fmul_f32(float %f1, float %f2) #0 {
292; CHECK-LABEL: fmul_f32:
293; CHECK:       # %bb.0:
294; CHECK-NEXT:    xsmulsp f1, f1, f2
295; CHECK-NEXT:    blr
296;
297; NOVSX-LABEL: fmul_f32:
298; NOVSX:       # %bb.0:
299; NOVSX-NEXT:    fmuls f1, f1, f2
300; NOVSX-NEXT:    blr
301;
302; SPE-LABEL: fmul_f32:
303; SPE:       # %bb.0:
304; SPE-NEXT:    efsmul r3, r3, r4
305; SPE-NEXT:    blr
306
307  %res = call float @llvm.experimental.constrained.fmul.f32(
308                        float %f1, float %f2,
309                        metadata !"round.dynamic",
310                        metadata !"fpexcept.strict") #0
311  ret float %res;
312}
313
314define double @fmul_f64(double %f1, double %f2) #0 {
315; CHECK-LABEL: fmul_f64:
316; CHECK:       # %bb.0:
317; CHECK-NEXT:    xsmuldp f1, f1, f2
318; CHECK-NEXT:    blr
319;
320; NOVSX-LABEL: fmul_f64:
321; NOVSX:       # %bb.0:
322; NOVSX-NEXT:    fmul f1, f1, f2
323; NOVSX-NEXT:    blr
324;
325; SPE-LABEL: fmul_f64:
326; SPE:       # %bb.0:
327; SPE-NEXT:    evmergelo r5, r5, r6
328; SPE-NEXT:    evmergelo r3, r3, r4
329; SPE-NEXT:    efdmul r4, r3, r5
330; SPE-NEXT:    evmergehi r3, r4, r4
331; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
332; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
333; SPE-NEXT:    blr
334
335  %res = call double @llvm.experimental.constrained.fmul.f64(
336                        double %f1, double %f2,
337                        metadata !"round.dynamic",
338                        metadata !"fpexcept.strict") #0
339  ret double %res;
340}
341
342define <4 x float> @fmul_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 {
343; CHECK-LABEL: fmul_v4f32:
344; CHECK:       # %bb.0:
345; CHECK-NEXT:    xvmulsp v2, v2, v3
346; CHECK-NEXT:    blr
347;
348; NOVSX-LABEL: fmul_v4f32:
349; NOVSX:       # %bb.0:
350; NOVSX-NEXT:    addi r3, r1, -32
351; NOVSX-NEXT:    addi r4, r1, -48
352; NOVSX-NEXT:    stvx v3, 0, r3
353; NOVSX-NEXT:    stvx v2, 0, r4
354; NOVSX-NEXT:    addi r3, r1, -16
355; NOVSX-NEXT:    lfs f0, -20(r1)
356; NOVSX-NEXT:    lfs f1, -36(r1)
357; NOVSX-NEXT:    fmuls f0, f1, f0
358; NOVSX-NEXT:    lfs f1, -40(r1)
359; NOVSX-NEXT:    stfs f0, -4(r1)
360; NOVSX-NEXT:    lfs f0, -24(r1)
361; NOVSX-NEXT:    fmuls f0, f1, f0
362; NOVSX-NEXT:    lfs f1, -44(r1)
363; NOVSX-NEXT:    stfs f0, -8(r1)
364; NOVSX-NEXT:    lfs f0, -28(r1)
365; NOVSX-NEXT:    fmuls f0, f1, f0
366; NOVSX-NEXT:    lfs f1, -48(r1)
367; NOVSX-NEXT:    stfs f0, -12(r1)
368; NOVSX-NEXT:    lfs f0, -32(r1)
369; NOVSX-NEXT:    fmuls f0, f1, f0
370; NOVSX-NEXT:    stfs f0, -16(r1)
371; NOVSX-NEXT:    lvx v2, 0, r3
372; NOVSX-NEXT:    blr
373;
374; SPE-LABEL: fmul_v4f32:
375; SPE:       # %bb.0:
376; SPE-NEXT:    efsmul r6, r6, r10
377; SPE-NEXT:    efsmul r5, r5, r9
378; SPE-NEXT:    efsmul r4, r4, r8
379; SPE-NEXT:    efsmul r3, r3, r7
380; SPE-NEXT:    blr
381  %res = call <4 x float> @llvm.experimental.constrained.fmul.v4f32(
382                        <4 x float> %vf1, <4 x float> %vf2,
383                        metadata !"round.dynamic",
384                        metadata !"fpexcept.strict") #0
385  ret <4 x float> %res;
386}
387
388define <2 x double> @fmul_v2f64(<2 x double> %vf1, <2 x double> %vf2) #0 {
389; CHECK-LABEL: fmul_v2f64:
390; CHECK:       # %bb.0:
391; CHECK-NEXT:    xvmuldp v2, v2, v3
392; CHECK-NEXT:    blr
393;
394; NOVSX-LABEL: fmul_v2f64:
395; NOVSX:       # %bb.0:
396; NOVSX-NEXT:    fmul f2, f2, f4
397; NOVSX-NEXT:    fmul f1, f1, f3
398; NOVSX-NEXT:    blr
399;
400; SPE-LABEL: fmul_v2f64:
401; SPE:       # %bb.0:
402; SPE-NEXT:    evldd r4, 8(r1)
403; SPE-NEXT:    evmergelo r7, r7, r8
404; SPE-NEXT:    evmergelo r8, r9, r10
405; SPE-NEXT:    li r9, 8
406; SPE-NEXT:    evmergelo r5, r5, r6
407; SPE-NEXT:    efdmul r4, r7, r4
408; SPE-NEXT:    evstddx r4, r3, r9
409; SPE-NEXT:    efdmul r4, r5, r8
410; SPE-NEXT:    evstdd r4, 0(r3)
411; SPE-NEXT:    blr
412  %res = call <2 x double> @llvm.experimental.constrained.fmul.v2f64(
413                        <2 x double> %vf1, <2 x double> %vf2,
414                        metadata !"round.dynamic",
415                        metadata !"fpexcept.strict") #0
416  ret <2 x double> %res;
417}
418
419define float @fdiv_f32(float %f1, float %f2) #0 {
420; CHECK-LABEL: fdiv_f32:
421; CHECK:       # %bb.0:
422; CHECK-NEXT:    xsdivsp f1, f1, f2
423; CHECK-NEXT:    blr
424;
425; NOVSX-LABEL: fdiv_f32:
426; NOVSX:       # %bb.0:
427; NOVSX-NEXT:    fdivs f1, f1, f2
428; NOVSX-NEXT:    blr
429;
430; SPE-LABEL: fdiv_f32:
431; SPE:       # %bb.0:
432; SPE-NEXT:    efsdiv r3, r3, r4
433; SPE-NEXT:    blr
434
435  %res = call float @llvm.experimental.constrained.fdiv.f32(
436                        float %f1, float %f2,
437                        metadata !"round.dynamic",
438                        metadata !"fpexcept.strict") #0
439  ret float %res;
440}
441
442define double @fdiv_f64(double %f1, double %f2) #0 {
443; CHECK-LABEL: fdiv_f64:
444; CHECK:       # %bb.0:
445; CHECK-NEXT:    xsdivdp f1, f1, f2
446; CHECK-NEXT:    blr
447;
448; NOVSX-LABEL: fdiv_f64:
449; NOVSX:       # %bb.0:
450; NOVSX-NEXT:    fdiv f1, f1, f2
451; NOVSX-NEXT:    blr
452;
453; SPE-LABEL: fdiv_f64:
454; SPE:       # %bb.0:
455; SPE-NEXT:    evmergelo r5, r5, r6
456; SPE-NEXT:    evmergelo r3, r3, r4
457; SPE-NEXT:    efddiv r4, r3, r5
458; SPE-NEXT:    evmergehi r3, r4, r4
459; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
460; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
461; SPE-NEXT:    blr
462
463  %res = call double @llvm.experimental.constrained.fdiv.f64(
464                        double %f1, double %f2,
465                        metadata !"round.dynamic",
466                        metadata !"fpexcept.strict") #0
467  ret double %res;
468}
469
470define <4 x float> @fdiv_v4f32(<4 x float> %vf1, <4 x float> %vf2) #0 {
471; CHECK-LABEL: fdiv_v4f32:
472; CHECK:       # %bb.0:
473; CHECK-NEXT:    xvdivsp v2, v2, v3
474; CHECK-NEXT:    blr
475;
476; NOVSX-LABEL: fdiv_v4f32:
477; NOVSX:       # %bb.0:
478; NOVSX-NEXT:    addi r3, r1, -32
479; NOVSX-NEXT:    addi r4, r1, -48
480; NOVSX-NEXT:    stvx v3, 0, r3
481; NOVSX-NEXT:    stvx v2, 0, r4
482; NOVSX-NEXT:    addi r3, r1, -16
483; NOVSX-NEXT:    lfs f0, -20(r1)
484; NOVSX-NEXT:    lfs f1, -36(r1)
485; NOVSX-NEXT:    fdivs f0, f1, f0
486; NOVSX-NEXT:    lfs f1, -40(r1)
487; NOVSX-NEXT:    stfs f0, -4(r1)
488; NOVSX-NEXT:    lfs f0, -24(r1)
489; NOVSX-NEXT:    fdivs f0, f1, f0
490; NOVSX-NEXT:    lfs f1, -44(r1)
491; NOVSX-NEXT:    stfs f0, -8(r1)
492; NOVSX-NEXT:    lfs f0, -28(r1)
493; NOVSX-NEXT:    fdivs f0, f1, f0
494; NOVSX-NEXT:    lfs f1, -48(r1)
495; NOVSX-NEXT:    stfs f0, -12(r1)
496; NOVSX-NEXT:    lfs f0, -32(r1)
497; NOVSX-NEXT:    fdivs f0, f1, f0
498; NOVSX-NEXT:    stfs f0, -16(r1)
499; NOVSX-NEXT:    lvx v2, 0, r3
500; NOVSX-NEXT:    blr
501;
502; SPE-LABEL: fdiv_v4f32:
503; SPE:       # %bb.0:
504; SPE-NEXT:    efsdiv r6, r6, r10
505; SPE-NEXT:    efsdiv r5, r5, r9
506; SPE-NEXT:    efsdiv r4, r4, r8
507; SPE-NEXT:    efsdiv r3, r3, r7
508; SPE-NEXT:    blr
509  %res = call <4 x float> @llvm.experimental.constrained.fdiv.v4f32(
510                        <4 x float> %vf1, <4 x float> %vf2,
511                        metadata !"round.dynamic",
512                        metadata !"fpexcept.strict") #0
513  ret <4 x float> %res
514}
515
516define <2 x double> @fdiv_v2f64(<2 x double> %vf1, <2 x double> %vf2) #0 {
517; CHECK-LABEL: fdiv_v2f64:
518; CHECK:       # %bb.0:
519; CHECK-NEXT:    xvdivdp v2, v2, v3
520; CHECK-NEXT:    blr
521;
522; NOVSX-LABEL: fdiv_v2f64:
523; NOVSX:       # %bb.0:
524; NOVSX-NEXT:    fdiv f2, f2, f4
525; NOVSX-NEXT:    fdiv f1, f1, f3
526; NOVSX-NEXT:    blr
527;
528; SPE-LABEL: fdiv_v2f64:
529; SPE:       # %bb.0:
530; SPE-NEXT:    evldd r4, 8(r1)
531; SPE-NEXT:    evmergelo r7, r7, r8
532; SPE-NEXT:    evmergelo r8, r9, r10
533; SPE-NEXT:    evmergelo r5, r5, r6
534; SPE-NEXT:    efddiv r4, r7, r4
535; SPE-NEXT:    li r7, 8
536; SPE-NEXT:    evstddx r4, r3, r7
537; SPE-NEXT:    efddiv r4, r5, r8
538; SPE-NEXT:    evstdd r4, 0(r3)
539; SPE-NEXT:    blr
540  %res = call <2 x double> @llvm.experimental.constrained.fdiv.v2f64(
541                        <2 x double> %vf1, <2 x double> %vf2,
542                        metadata !"round.dynamic",
543                        metadata !"fpexcept.strict") #0
544  ret <2 x double> %res
545}
546
547define double @no_fma_fold(double %f1, double %f2, double %f3) #0 {
548; CHECK-LABEL: no_fma_fold:
549; CHECK:       # %bb.0:
550; CHECK-NEXT:    xsmuldp f0, f1, f2
551; CHECK-NEXT:    xsadddp f1, f0, f3
552; CHECK-NEXT:    blr
553;
554; NOVSX-LABEL: no_fma_fold:
555; NOVSX:       # %bb.0:
556; NOVSX-NEXT:    fmul f0, f1, f2
557; NOVSX-NEXT:    fadd f1, f0, f3
558; NOVSX-NEXT:    blr
559;
560; SPE-LABEL: no_fma_fold:
561; SPE:       # %bb.0:
562; SPE-NEXT:    evmergelo r7, r7, r8
563; SPE-NEXT:    evmergelo r5, r5, r6
564; SPE-NEXT:    evmergelo r3, r3, r4
565; SPE-NEXT:    efdmul r3, r3, r5
566; SPE-NEXT:    efdadd r4, r3, r7
567; SPE-NEXT:    evmergehi r3, r4, r4
568; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
569; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
570; SPE-NEXT:    blr
571  %mul = call double @llvm.experimental.constrained.fmul.f64(
572                        double %f1, double %f2,
573                        metadata !"round.dynamic",
574                        metadata !"fpexcept.strict") #0
575  %add = call double @llvm.experimental.constrained.fadd.f64(
576                        double %mul, double %f3,
577                        metadata !"round.dynamic",
578                        metadata !"fpexcept.strict") #0
579  ret double %add
580}
581
582define float @fmadd_f32(float %f0, float %f1, float %f2) #0 {
583; CHECK-LABEL: fmadd_f32:
584; CHECK:       # %bb.0:
585; CHECK-NEXT:    xsmaddasp f3, f1, f2
586; CHECK-NEXT:    fmr f1, f3
587; CHECK-NEXT:    blr
588;
589; NOVSX-LABEL: fmadd_f32:
590; NOVSX:       # %bb.0:
591; NOVSX-NEXT:    fmadds f1, f1, f2, f3
592; NOVSX-NEXT:    blr
593;
594; SPE-LABEL: fmadd_f32:
595; SPE:       # %bb.0:
596; SPE-NEXT:    mflr r0
597; SPE-NEXT:    stw r0, 4(r1)
598; SPE-NEXT:    stwu r1, -16(r1)
599; SPE-NEXT:    .cfi_def_cfa_offset 16
600; SPE-NEXT:    .cfi_offset lr, 4
601; SPE-NEXT:    bl fmaf
602; SPE-NEXT:    lwz r0, 20(r1)
603; SPE-NEXT:    addi r1, r1, 16
604; SPE-NEXT:    mtlr r0
605; SPE-NEXT:    blr
606  %res = call float @llvm.experimental.constrained.fma.f32(
607                        float %f0, float %f1, float %f2,
608                        metadata !"round.dynamic",
609                        metadata !"fpexcept.strict") #0
610  ret float %res
611}
612
613define double @fmadd_f64(double %f0, double %f1, double %f2) #0 {
614; CHECK-LABEL: fmadd_f64:
615; CHECK:       # %bb.0:
616; CHECK-NEXT:    xsmaddadp f3, f1, f2
617; CHECK-NEXT:    fmr f1, f3
618; CHECK-NEXT:    blr
619;
620; NOVSX-LABEL: fmadd_f64:
621; NOVSX:       # %bb.0:
622; NOVSX-NEXT:    fmadd f1, f1, f2, f3
623; NOVSX-NEXT:    blr
624;
625; SPE-LABEL: fmadd_f64:
626; SPE:       # %bb.0:
627; SPE-NEXT:    mflr r0
628; SPE-NEXT:    stw r0, 4(r1)
629; SPE-NEXT:    stwu r1, -16(r1)
630; SPE-NEXT:    .cfi_def_cfa_offset 16
631; SPE-NEXT:    .cfi_offset lr, 4
632; SPE-NEXT:    evmergelo r8, r7, r8
633; SPE-NEXT:    evmergelo r6, r5, r6
634; SPE-NEXT:    evmergelo r4, r3, r4
635; SPE-NEXT:    evmergehi r3, r4, r4
636; SPE-NEXT:    evmergehi r5, r6, r6
637; SPE-NEXT:    evmergehi r7, r8, r8
638; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
639; SPE-NEXT:    # kill: def $r6 killed $r6 killed $s6
640; SPE-NEXT:    # kill: def $r8 killed $r8 killed $s8
641; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
642; SPE-NEXT:    # kill: def $r5 killed $r5 killed $s5
643; SPE-NEXT:    # kill: def $r7 killed $r7 killed $s7
644; SPE-NEXT:    bl fma
645; SPE-NEXT:    evmergelo r4, r3, r4
646; SPE-NEXT:    evmergehi r3, r4, r4
647; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
648; SPE-NEXT:    lwz r0, 20(r1)
649; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
650; SPE-NEXT:    addi r1, r1, 16
651; SPE-NEXT:    mtlr r0
652; SPE-NEXT:    blr
653  %res = call double @llvm.experimental.constrained.fma.f64(
654                        double %f0, double %f1, double %f2,
655                        metadata !"round.dynamic",
656                        metadata !"fpexcept.strict") #0
657  ret double %res
658}
659
660define <4 x float> @fmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) #0 {
661; CHECK-LABEL: fmadd_v4f32:
662; CHECK:       # %bb.0:
663; CHECK-NEXT:    xvmaddasp v4, v2, v3
664; CHECK-NEXT:    vmr v2, v4
665; CHECK-NEXT:    blr
666;
667; NOVSX-LABEL: fmadd_v4f32:
668; NOVSX:       # %bb.0:
669; NOVSX-NEXT:    addi r3, r1, -32
670; NOVSX-NEXT:    addi r4, r1, -48
671; NOVSX-NEXT:    stvx v4, 0, r3
672; NOVSX-NEXT:    addi r3, r1, -64
673; NOVSX-NEXT:    stvx v3, 0, r4
674; NOVSX-NEXT:    stvx v2, 0, r3
675; NOVSX-NEXT:    addi r3, r1, -16
676; NOVSX-NEXT:    lfs f0, -20(r1)
677; NOVSX-NEXT:    lfs f1, -36(r1)
678; NOVSX-NEXT:    lfs f2, -52(r1)
679; NOVSX-NEXT:    fmadds f0, f2, f1, f0
680; NOVSX-NEXT:    lfs f1, -40(r1)
681; NOVSX-NEXT:    lfs f2, -56(r1)
682; NOVSX-NEXT:    stfs f0, -4(r1)
683; NOVSX-NEXT:    lfs f0, -24(r1)
684; NOVSX-NEXT:    fmadds f0, f2, f1, f0
685; NOVSX-NEXT:    lfs f1, -44(r1)
686; NOVSX-NEXT:    lfs f2, -60(r1)
687; NOVSX-NEXT:    stfs f0, -8(r1)
688; NOVSX-NEXT:    lfs f0, -28(r1)
689; NOVSX-NEXT:    fmadds f0, f2, f1, f0
690; NOVSX-NEXT:    lfs f1, -48(r1)
691; NOVSX-NEXT:    lfs f2, -64(r1)
692; NOVSX-NEXT:    stfs f0, -12(r1)
693; NOVSX-NEXT:    lfs f0, -32(r1)
694; NOVSX-NEXT:    fmadds f0, f2, f1, f0
695; NOVSX-NEXT:    stfs f0, -16(r1)
696; NOVSX-NEXT:    lvx v2, 0, r3
697; NOVSX-NEXT:    blr
698;
699; SPE-LABEL: fmadd_v4f32:
700; SPE:       # %bb.0:
701; SPE-NEXT:    mflr r0
702; SPE-NEXT:    stw r0, 4(r1)
703; SPE-NEXT:    stwu r1, -96(r1)
704; SPE-NEXT:    .cfi_def_cfa_offset 96
705; SPE-NEXT:    .cfi_offset lr, 4
706; SPE-NEXT:    .cfi_offset r21, -88
707; SPE-NEXT:    .cfi_offset r22, -80
708; SPE-NEXT:    .cfi_offset r23, -72
709; SPE-NEXT:    .cfi_offset r24, -64
710; SPE-NEXT:    .cfi_offset r25, -56
711; SPE-NEXT:    .cfi_offset r26, -48
712; SPE-NEXT:    .cfi_offset r27, -40
713; SPE-NEXT:    .cfi_offset r28, -32
714; SPE-NEXT:    .cfi_offset r29, -24
715; SPE-NEXT:    .cfi_offset r30, -16
716; SPE-NEXT:    evstdd r27, 56(r1) # 8-byte Folded Spill
717; SPE-NEXT:    mr r27, r5
718; SPE-NEXT:    lwz r5, 116(r1)
719; SPE-NEXT:    evstdd r25, 40(r1) # 8-byte Folded Spill
720; SPE-NEXT:    mr r25, r3
721; SPE-NEXT:    evstdd r26, 48(r1) # 8-byte Folded Spill
722; SPE-NEXT:    mr r26, r4
723; SPE-NEXT:    mr r3, r6
724; SPE-NEXT:    mr r4, r10
725; SPE-NEXT:    evstdd r21, 8(r1) # 8-byte Folded Spill
726; SPE-NEXT:    evstdd r22, 16(r1) # 8-byte Folded Spill
727; SPE-NEXT:    evstdd r23, 24(r1) # 8-byte Folded Spill
728; SPE-NEXT:    evstdd r24, 32(r1) # 8-byte Folded Spill
729; SPE-NEXT:    evstdd r28, 64(r1) # 8-byte Folded Spill
730; SPE-NEXT:    mr r28, r7
731; SPE-NEXT:    evstdd r29, 72(r1) # 8-byte Folded Spill
732; SPE-NEXT:    mr r29, r8
733; SPE-NEXT:    evstdd r30, 80(r1) # 8-byte Folded Spill
734; SPE-NEXT:    mr r30, r9
735; SPE-NEXT:    lwz r24, 104(r1)
736; SPE-NEXT:    lwz r23, 108(r1)
737; SPE-NEXT:    lwz r22, 112(r1)
738; SPE-NEXT:    bl fmaf
739; SPE-NEXT:    mr r21, r3
740; SPE-NEXT:    mr r3, r27
741; SPE-NEXT:    mr r4, r30
742; SPE-NEXT:    mr r5, r22
743; SPE-NEXT:    bl fmaf
744; SPE-NEXT:    mr r30, r3
745; SPE-NEXT:    mr r3, r26
746; SPE-NEXT:    mr r4, r29
747; SPE-NEXT:    mr r5, r23
748; SPE-NEXT:    bl fmaf
749; SPE-NEXT:    mr r29, r3
750; SPE-NEXT:    mr r3, r25
751; SPE-NEXT:    mr r4, r28
752; SPE-NEXT:    mr r5, r24
753; SPE-NEXT:    bl fmaf
754; SPE-NEXT:    mr r4, r29
755; SPE-NEXT:    mr r5, r30
756; SPE-NEXT:    mr r6, r21
757; SPE-NEXT:    evldd r30, 80(r1) # 8-byte Folded Reload
758; SPE-NEXT:    evldd r29, 72(r1) # 8-byte Folded Reload
759; SPE-NEXT:    evldd r28, 64(r1) # 8-byte Folded Reload
760; SPE-NEXT:    evldd r27, 56(r1) # 8-byte Folded Reload
761; SPE-NEXT:    evldd r26, 48(r1) # 8-byte Folded Reload
762; SPE-NEXT:    evldd r25, 40(r1) # 8-byte Folded Reload
763; SPE-NEXT:    evldd r24, 32(r1) # 8-byte Folded Reload
764; SPE-NEXT:    evldd r23, 24(r1) # 8-byte Folded Reload
765; SPE-NEXT:    evldd r22, 16(r1) # 8-byte Folded Reload
766; SPE-NEXT:    evldd r21, 8(r1) # 8-byte Folded Reload
767; SPE-NEXT:    lwz r0, 100(r1)
768; SPE-NEXT:    addi r1, r1, 96
769; SPE-NEXT:    mtlr r0
770; SPE-NEXT:    blr
771  %res = call <4 x float> @llvm.experimental.constrained.fma.v4f32(
772                        <4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2,
773                        metadata !"round.dynamic",
774                        metadata !"fpexcept.strict") #0
775  ret <4 x float> %res
776}
777
778define <2 x double> @fmadd_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) #0 {
779; CHECK-LABEL: fmadd_v2f64:
780; CHECK:       # %bb.0:
781; CHECK-NEXT:    xvmaddadp v4, v2, v3
782; CHECK-NEXT:    vmr v2, v4
783; CHECK-NEXT:    blr
784;
785; NOVSX-LABEL: fmadd_v2f64:
786; NOVSX:       # %bb.0:
787; NOVSX-NEXT:    fmadd f2, f2, f4, f6
788; NOVSX-NEXT:    fmadd f1, f1, f3, f5
789; NOVSX-NEXT:    blr
790;
791; SPE-LABEL: fmadd_v2f64:
792; SPE:       # %bb.0:
793; SPE-NEXT:    mflr r0
794; SPE-NEXT:    stw r0, 4(r1)
795; SPE-NEXT:    stwu r1, -64(r1)
796; SPE-NEXT:    .cfi_def_cfa_offset 64
797; SPE-NEXT:    .cfi_offset lr, 4
798; SPE-NEXT:    .cfi_offset r26, -48
799; SPE-NEXT:    .cfi_offset r27, -40
800; SPE-NEXT:    .cfi_offset r28, -32
801; SPE-NEXT:    .cfi_offset r29, -24
802; SPE-NEXT:    .cfi_offset r30, -16
803; SPE-NEXT:    evstdd r26, 16(r1) # 8-byte Folded Spill
804; SPE-NEXT:    evstdd r27, 24(r1) # 8-byte Folded Spill
805; SPE-NEXT:    evstdd r28, 32(r1) # 8-byte Folded Spill
806; SPE-NEXT:    evstdd r29, 40(r1) # 8-byte Folded Spill
807; SPE-NEXT:    evstdd r30, 48(r1) # 8-byte Folded Spill
808; SPE-NEXT:    evmergelo r27, r7, r8
809; SPE-NEXT:    evmergelo r9, r9, r10
810; SPE-NEXT:    evmergelo r4, r5, r6
811; SPE-NEXT:    mr r30, r3
812; SPE-NEXT:    evldd r8, 80(r1)
813; SPE-NEXT:    evmergehi r3, r4, r4
814; SPE-NEXT:    evmergehi r5, r9, r9
815; SPE-NEXT:    mr r6, r9
816; SPE-NEXT:    evldd r29, 88(r1)
817; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
818; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
819; SPE-NEXT:    # kill: def $r5 killed $r5 killed $s5
820; SPE-NEXT:    evmergehi r7, r8, r8
821; SPE-NEXT:    evldd r28, 72(r1)
822; SPE-NEXT:    # kill: def $r7 killed $r7 killed $s7
823; SPE-NEXT:    # kill: def $r8 killed $r8 killed $s8
824; SPE-NEXT:    bl fma
825; SPE-NEXT:    evmergelo r26, r3, r4
826; SPE-NEXT:    evmergehi r3, r27, r27
827; SPE-NEXT:    evmergehi r5, r28, r28
828; SPE-NEXT:    evmergehi r7, r29, r29
829; SPE-NEXT:    mr r4, r27
830; SPE-NEXT:    mr r6, r28
831; SPE-NEXT:    mr r8, r29
832; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
833; SPE-NEXT:    # kill: def $r5 killed $r5 killed $s5
834; SPE-NEXT:    # kill: def $r7 killed $r7 killed $s7
835; SPE-NEXT:    bl fma
836; SPE-NEXT:    li r5, 8
837; SPE-NEXT:    evmergelo r3, r3, r4
838; SPE-NEXT:    evstddx r3, r30, r5
839; SPE-NEXT:    evstdd r26, 0(r30)
840; SPE-NEXT:    evldd r30, 48(r1) # 8-byte Folded Reload
841; SPE-NEXT:    evldd r29, 40(r1) # 8-byte Folded Reload
842; SPE-NEXT:    evldd r28, 32(r1) # 8-byte Folded Reload
843; SPE-NEXT:    evldd r27, 24(r1) # 8-byte Folded Reload
844; SPE-NEXT:    evldd r26, 16(r1) # 8-byte Folded Reload
845; SPE-NEXT:    lwz r0, 68(r1)
846; SPE-NEXT:    addi r1, r1, 64
847; SPE-NEXT:    mtlr r0
848; SPE-NEXT:    blr
849  %res = call <2 x double> @llvm.experimental.constrained.fma.v2f64(
850                        <2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2,
851                        metadata !"round.dynamic",
852                        metadata !"fpexcept.strict") #0
853  ret <2 x double> %res
854}
855
856define float @fmsub_f32(float %f0, float %f1, float %f2) #0 {
857; CHECK-LABEL: fmsub_f32:
858; CHECK:       # %bb.0:
859; CHECK-NEXT:    xsmsubasp f3, f1, f2
860; CHECK-NEXT:    fmr f1, f3
861; CHECK-NEXT:    blr
862;
863; NOVSX-LABEL: fmsub_f32:
864; NOVSX:       # %bb.0:
865; NOVSX-NEXT:    fmsubs f1, f1, f2, f3
866; NOVSX-NEXT:    blr
867;
868; SPE-LABEL: fmsub_f32:
869; SPE:       # %bb.0:
870; SPE-NEXT:    mflr r0
871; SPE-NEXT:    stw r0, 4(r1)
872; SPE-NEXT:    stwu r1, -16(r1)
873; SPE-NEXT:    .cfi_def_cfa_offset 16
874; SPE-NEXT:    .cfi_offset lr, 4
875; SPE-NEXT:    efsneg r5, r5
876; SPE-NEXT:    bl fmaf
877; SPE-NEXT:    lwz r0, 20(r1)
878; SPE-NEXT:    addi r1, r1, 16
879; SPE-NEXT:    mtlr r0
880; SPE-NEXT:    blr
881  %neg = fneg float %f2
882  %res = call float @llvm.experimental.constrained.fma.f32(
883                        float %f0, float %f1, float %neg,
884                        metadata !"round.dynamic",
885                        metadata !"fpexcept.strict") #0
886  ret float %res
887}
888
889define double @fmsub_f64(double %f0, double %f1, double %f2) #0 {
890; CHECK-LABEL: fmsub_f64:
891; CHECK:       # %bb.0:
892; CHECK-NEXT:    xsmsubadp f3, f1, f2
893; CHECK-NEXT:    fmr f1, f3
894; CHECK-NEXT:    blr
895;
896; NOVSX-LABEL: fmsub_f64:
897; NOVSX:       # %bb.0:
898; NOVSX-NEXT:    fmsub f1, f1, f2, f3
899; NOVSX-NEXT:    blr
900;
901; SPE-LABEL: fmsub_f64:
902; SPE:       # %bb.0:
903; SPE-NEXT:    mflr r0
904; SPE-NEXT:    stw r0, 4(r1)
905; SPE-NEXT:    stwu r1, -16(r1)
906; SPE-NEXT:    .cfi_def_cfa_offset 16
907; SPE-NEXT:    .cfi_offset lr, 4
908; SPE-NEXT:    evmergelo r6, r5, r6
909; SPE-NEXT:    evmergelo r4, r3, r4
910; SPE-NEXT:    evmergelo r3, r7, r8
911; SPE-NEXT:    efdneg r8, r3
912; SPE-NEXT:    evmergehi r3, r4, r4
913; SPE-NEXT:    evmergehi r5, r6, r6
914; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
915; SPE-NEXT:    # kill: def $r6 killed $r6 killed $s6
916; SPE-NEXT:    evmergehi r7, r8, r8
917; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
918; SPE-NEXT:    # kill: def $r5 killed $r5 killed $s5
919; SPE-NEXT:    # kill: def $r8 killed $r8 killed $s8
920; SPE-NEXT:    # kill: def $r7 killed $r7 killed $s7
921; SPE-NEXT:    bl fma
922; SPE-NEXT:    evmergelo r4, r3, r4
923; SPE-NEXT:    evmergehi r3, r4, r4
924; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
925; SPE-NEXT:    lwz r0, 20(r1)
926; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
927; SPE-NEXT:    addi r1, r1, 16
928; SPE-NEXT:    mtlr r0
929; SPE-NEXT:    blr
930  %neg = fneg double %f2
931  %res = call double @llvm.experimental.constrained.fma.f64(
932                        double %f0, double %f1, double %neg,
933                        metadata !"round.dynamic",
934                        metadata !"fpexcept.strict") #0
935  ret double %res
936}
937
938define <4 x float> @fmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) #0 {
939; CHECK-LABEL: fmsub_v4f32:
940; CHECK:       # %bb.0:
941; CHECK-NEXT:    xvmsubasp v4, v2, v3
942; CHECK-NEXT:    vmr v2, v4
943; CHECK-NEXT:    blr
944;
945; NOVSX-LABEL: fmsub_v4f32:
946; NOVSX:       # %bb.0:
947; NOVSX-NEXT:    vspltisb v5, -1
948; NOVSX-NEXT:    addi r3, r1, -48
949; NOVSX-NEXT:    addi r4, r1, -64
950; NOVSX-NEXT:    stvx v3, 0, r3
951; NOVSX-NEXT:    addi r3, r1, -32
952; NOVSX-NEXT:    stvx v2, 0, r4
953; NOVSX-NEXT:    vslw v5, v5, v5
954; NOVSX-NEXT:    vsubfp v4, v5, v4
955; NOVSX-NEXT:    stvx v4, 0, r3
956; NOVSX-NEXT:    addi r3, r1, -16
957; NOVSX-NEXT:    lfs f0, -36(r1)
958; NOVSX-NEXT:    lfs f1, -52(r1)
959; NOVSX-NEXT:    lfs f2, -20(r1)
960; NOVSX-NEXT:    fmadds f0, f1, f0, f2
961; NOVSX-NEXT:    lfs f1, -56(r1)
962; NOVSX-NEXT:    lfs f2, -24(r1)
963; NOVSX-NEXT:    stfs f0, -4(r1)
964; NOVSX-NEXT:    lfs f0, -40(r1)
965; NOVSX-NEXT:    fmadds f0, f1, f0, f2
966; NOVSX-NEXT:    lfs f1, -60(r1)
967; NOVSX-NEXT:    lfs f2, -28(r1)
968; NOVSX-NEXT:    stfs f0, -8(r1)
969; NOVSX-NEXT:    lfs f0, -44(r1)
970; NOVSX-NEXT:    fmadds f0, f1, f0, f2
971; NOVSX-NEXT:    lfs f1, -64(r1)
972; NOVSX-NEXT:    lfs f2, -32(r1)
973; NOVSX-NEXT:    stfs f0, -12(r1)
974; NOVSX-NEXT:    lfs f0, -48(r1)
975; NOVSX-NEXT:    fmadds f0, f1, f0, f2
976; NOVSX-NEXT:    stfs f0, -16(r1)
977; NOVSX-NEXT:    lvx v2, 0, r3
978; NOVSX-NEXT:    blr
979;
980; SPE-LABEL: fmsub_v4f32:
981; SPE:       # %bb.0:
982; SPE-NEXT:    mflr r0
983; SPE-NEXT:    stw r0, 4(r1)
984; SPE-NEXT:    stwu r1, -96(r1)
985; SPE-NEXT:    .cfi_def_cfa_offset 96
986; SPE-NEXT:    .cfi_offset lr, 4
987; SPE-NEXT:    .cfi_offset r21, -88
988; SPE-NEXT:    .cfi_offset r22, -80
989; SPE-NEXT:    .cfi_offset r23, -72
990; SPE-NEXT:    .cfi_offset r24, -64
991; SPE-NEXT:    .cfi_offset r25, -56
992; SPE-NEXT:    .cfi_offset r26, -48
993; SPE-NEXT:    .cfi_offset r27, -40
994; SPE-NEXT:    .cfi_offset r28, -32
995; SPE-NEXT:    .cfi_offset r29, -24
996; SPE-NEXT:    .cfi_offset r30, -16
997; SPE-NEXT:    evstdd r25, 40(r1) # 8-byte Folded Spill
998; SPE-NEXT:    mr r25, r3
999; SPE-NEXT:    evstdd r26, 48(r1) # 8-byte Folded Spill
1000; SPE-NEXT:    mr r26, r4
1001; SPE-NEXT:    evstdd r27, 56(r1) # 8-byte Folded Spill
1002; SPE-NEXT:    mr r27, r5
1003; SPE-NEXT:    evstdd r28, 64(r1) # 8-byte Folded Spill
1004; SPE-NEXT:    mr r28, r7
1005; SPE-NEXT:    lwz r3, 112(r1)
1006; SPE-NEXT:    lwz r4, 104(r1)
1007; SPE-NEXT:    lwz r5, 108(r1)
1008; SPE-NEXT:    lwz r7, 116(r1)
1009; SPE-NEXT:    evstdd r22, 16(r1) # 8-byte Folded Spill
1010; SPE-NEXT:    efsneg r22, r3
1011; SPE-NEXT:    evstdd r23, 24(r1) # 8-byte Folded Spill
1012; SPE-NEXT:    efsneg r23, r5
1013; SPE-NEXT:    evstdd r24, 32(r1) # 8-byte Folded Spill
1014; SPE-NEXT:    efsneg r24, r4
1015; SPE-NEXT:    efsneg r5, r7
1016; SPE-NEXT:    mr r3, r6
1017; SPE-NEXT:    mr r4, r10
1018; SPE-NEXT:    evstdd r21, 8(r1) # 8-byte Folded Spill
1019; SPE-NEXT:    evstdd r29, 72(r1) # 8-byte Folded Spill
1020; SPE-NEXT:    mr r29, r8
1021; SPE-NEXT:    evstdd r30, 80(r1) # 8-byte Folded Spill
1022; SPE-NEXT:    mr r30, r9
1023; SPE-NEXT:    bl fmaf
1024; SPE-NEXT:    mr r21, r3
1025; SPE-NEXT:    mr r3, r27
1026; SPE-NEXT:    mr r4, r30
1027; SPE-NEXT:    mr r5, r22
1028; SPE-NEXT:    bl fmaf
1029; SPE-NEXT:    mr r30, r3
1030; SPE-NEXT:    mr r3, r26
1031; SPE-NEXT:    mr r4, r29
1032; SPE-NEXT:    mr r5, r23
1033; SPE-NEXT:    bl fmaf
1034; SPE-NEXT:    mr r29, r3
1035; SPE-NEXT:    mr r3, r25
1036; SPE-NEXT:    mr r4, r28
1037; SPE-NEXT:    mr r5, r24
1038; SPE-NEXT:    bl fmaf
1039; SPE-NEXT:    mr r4, r29
1040; SPE-NEXT:    mr r5, r30
1041; SPE-NEXT:    mr r6, r21
1042; SPE-NEXT:    evldd r30, 80(r1) # 8-byte Folded Reload
1043; SPE-NEXT:    evldd r29, 72(r1) # 8-byte Folded Reload
1044; SPE-NEXT:    evldd r28, 64(r1) # 8-byte Folded Reload
1045; SPE-NEXT:    evldd r27, 56(r1) # 8-byte Folded Reload
1046; SPE-NEXT:    evldd r26, 48(r1) # 8-byte Folded Reload
1047; SPE-NEXT:    evldd r25, 40(r1) # 8-byte Folded Reload
1048; SPE-NEXT:    evldd r24, 32(r1) # 8-byte Folded Reload
1049; SPE-NEXT:    evldd r23, 24(r1) # 8-byte Folded Reload
1050; SPE-NEXT:    evldd r22, 16(r1) # 8-byte Folded Reload
1051; SPE-NEXT:    evldd r21, 8(r1) # 8-byte Folded Reload
1052; SPE-NEXT:    lwz r0, 100(r1)
1053; SPE-NEXT:    addi r1, r1, 96
1054; SPE-NEXT:    mtlr r0
1055; SPE-NEXT:    blr
1056  %neg = fneg <4 x float> %vf2
1057  %res = call <4 x float> @llvm.experimental.constrained.fma.v4f32(
1058                        <4 x float> %vf0, <4 x float> %vf1, <4 x float> %neg,
1059                        metadata !"round.dynamic",
1060                        metadata !"fpexcept.strict") #0
1061  ret <4 x float> %res
1062}
1063
1064define <2 x double> @fmsub_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) #0 {
1065; CHECK-LABEL: fmsub_v2f64:
1066; CHECK:       # %bb.0:
1067; CHECK-NEXT:    xvmsubadp v4, v2, v3
1068; CHECK-NEXT:    vmr v2, v4
1069; CHECK-NEXT:    blr
1070;
1071; NOVSX-LABEL: fmsub_v2f64:
1072; NOVSX:       # %bb.0:
1073; NOVSX-NEXT:    fmsub f2, f2, f4, f6
1074; NOVSX-NEXT:    fmsub f1, f1, f3, f5
1075; NOVSX-NEXT:    blr
1076;
1077; SPE-LABEL: fmsub_v2f64:
1078; SPE:       # %bb.0:
1079; SPE-NEXT:    mflr r0
1080; SPE-NEXT:    stw r0, 4(r1)
1081; SPE-NEXT:    stwu r1, -64(r1)
1082; SPE-NEXT:    .cfi_def_cfa_offset 64
1083; SPE-NEXT:    .cfi_offset lr, 4
1084; SPE-NEXT:    .cfi_offset r26, -48
1085; SPE-NEXT:    .cfi_offset r27, -40
1086; SPE-NEXT:    .cfi_offset r28, -32
1087; SPE-NEXT:    .cfi_offset r29, -24
1088; SPE-NEXT:    .cfi_offset r30, -16
1089; SPE-NEXT:    evstdd r30, 48(r1) # 8-byte Folded Spill
1090; SPE-NEXT:    mr r30, r3
1091; SPE-NEXT:    evldd r3, 80(r1)
1092; SPE-NEXT:    evldd r11, 88(r1)
1093; SPE-NEXT:    evstdd r26, 16(r1) # 8-byte Folded Spill
1094; SPE-NEXT:    evstdd r27, 24(r1) # 8-byte Folded Spill
1095; SPE-NEXT:    efdneg r27, r11
1096; SPE-NEXT:    evstdd r28, 32(r1) # 8-byte Folded Spill
1097; SPE-NEXT:    evstdd r29, 40(r1) # 8-byte Folded Spill
1098; SPE-NEXT:    evmergelo r29, r7, r8
1099; SPE-NEXT:    evmergelo r9, r9, r10
1100; SPE-NEXT:    evmergelo r4, r5, r6
1101; SPE-NEXT:    efdneg r8, r3
1102; SPE-NEXT:    evmergehi r3, r4, r4
1103; SPE-NEXT:    evmergehi r5, r9, r9
1104; SPE-NEXT:    evmergehi r7, r8, r8
1105; SPE-NEXT:    mr r6, r9
1106; SPE-NEXT:    evldd r28, 72(r1)
1107; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
1108; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
1109; SPE-NEXT:    # kill: def $r5 killed $r5 killed $s5
1110; SPE-NEXT:    # kill: def $r7 killed $r7 killed $s7
1111; SPE-NEXT:    # kill: def $r8 killed $r8 killed $s8
1112; SPE-NEXT:    bl fma
1113; SPE-NEXT:    evmergelo r26, r3, r4
1114; SPE-NEXT:    evmergehi r3, r29, r29
1115; SPE-NEXT:    evmergehi r5, r28, r28
1116; SPE-NEXT:    evmergehi r7, r27, r27
1117; SPE-NEXT:    mr r4, r29
1118; SPE-NEXT:    mr r6, r28
1119; SPE-NEXT:    mr r8, r27
1120; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
1121; SPE-NEXT:    # kill: def $r5 killed $r5 killed $s5
1122; SPE-NEXT:    # kill: def $r7 killed $r7 killed $s7
1123; SPE-NEXT:    bl fma
1124; SPE-NEXT:    li r5, 8
1125; SPE-NEXT:    evmergelo r3, r3, r4
1126; SPE-NEXT:    evstddx r3, r30, r5
1127; SPE-NEXT:    evstdd r26, 0(r30)
1128; SPE-NEXT:    evldd r30, 48(r1) # 8-byte Folded Reload
1129; SPE-NEXT:    evldd r29, 40(r1) # 8-byte Folded Reload
1130; SPE-NEXT:    evldd r28, 32(r1) # 8-byte Folded Reload
1131; SPE-NEXT:    evldd r27, 24(r1) # 8-byte Folded Reload
1132; SPE-NEXT:    evldd r26, 16(r1) # 8-byte Folded Reload
1133; SPE-NEXT:    lwz r0, 68(r1)
1134; SPE-NEXT:    addi r1, r1, 64
1135; SPE-NEXT:    mtlr r0
1136; SPE-NEXT:    blr
1137  %neg = fneg <2 x double> %vf2
1138  %res = call <2 x double> @llvm.experimental.constrained.fma.v2f64(
1139                        <2 x double> %vf0, <2 x double> %vf1, <2 x double> %neg,
1140                        metadata !"round.dynamic",
1141                        metadata !"fpexcept.strict") #0
1142  ret <2 x double> %res
1143}
1144
1145define float @fnmadd_f32(float %f0, float %f1, float %f2) #0 {
1146; CHECK-LABEL: fnmadd_f32:
1147; CHECK:       # %bb.0:
1148; CHECK-NEXT:    xsnmaddasp f3, f1, f2
1149; CHECK-NEXT:    fmr f1, f3
1150; CHECK-NEXT:    blr
1151;
1152; NOVSX-LABEL: fnmadd_f32:
1153; NOVSX:       # %bb.0:
1154; NOVSX-NEXT:    fnmadds f1, f1, f2, f3
1155; NOVSX-NEXT:    blr
1156;
1157; SPE-LABEL: fnmadd_f32:
1158; SPE:       # %bb.0:
1159; SPE-NEXT:    mflr r0
1160; SPE-NEXT:    stw r0, 4(r1)
1161; SPE-NEXT:    stwu r1, -16(r1)
1162; SPE-NEXT:    .cfi_def_cfa_offset 16
1163; SPE-NEXT:    .cfi_offset lr, 4
1164; SPE-NEXT:    bl fmaf
1165; SPE-NEXT:    efsneg r3, r3
1166; SPE-NEXT:    lwz r0, 20(r1)
1167; SPE-NEXT:    addi r1, r1, 16
1168; SPE-NEXT:    mtlr r0
1169; SPE-NEXT:    blr
1170  %fma = call float @llvm.experimental.constrained.fma.f32(
1171                        float %f0, float %f1, float %f2,
1172                        metadata !"round.dynamic",
1173                        metadata !"fpexcept.strict") #0
1174  %res = fneg float %fma
1175  ret float %res
1176}
1177
1178define double @fnmadd_f64(double %f0, double %f1, double %f2) #0 {
1179; CHECK-LABEL: fnmadd_f64:
1180; CHECK:       # %bb.0:
1181; CHECK-NEXT:    xsnmaddadp f3, f1, f2
1182; CHECK-NEXT:    fmr f1, f3
1183; CHECK-NEXT:    blr
1184;
1185; NOVSX-LABEL: fnmadd_f64:
1186; NOVSX:       # %bb.0:
1187; NOVSX-NEXT:    fnmadd f1, f1, f2, f3
1188; NOVSX-NEXT:    blr
1189;
1190; SPE-LABEL: fnmadd_f64:
1191; SPE:       # %bb.0:
1192; SPE-NEXT:    mflr r0
1193; SPE-NEXT:    stw r0, 4(r1)
1194; SPE-NEXT:    stwu r1, -16(r1)
1195; SPE-NEXT:    .cfi_def_cfa_offset 16
1196; SPE-NEXT:    .cfi_offset lr, 4
1197; SPE-NEXT:    evmergelo r8, r7, r8
1198; SPE-NEXT:    evmergelo r6, r5, r6
1199; SPE-NEXT:    evmergelo r4, r3, r4
1200; SPE-NEXT:    evmergehi r3, r4, r4
1201; SPE-NEXT:    evmergehi r5, r6, r6
1202; SPE-NEXT:    evmergehi r7, r8, r8
1203; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
1204; SPE-NEXT:    # kill: def $r6 killed $r6 killed $s6
1205; SPE-NEXT:    # kill: def $r8 killed $r8 killed $s8
1206; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
1207; SPE-NEXT:    # kill: def $r5 killed $r5 killed $s5
1208; SPE-NEXT:    # kill: def $r7 killed $r7 killed $s7
1209; SPE-NEXT:    bl fma
1210; SPE-NEXT:    evmergelo r3, r3, r4
1211; SPE-NEXT:    efdneg r4, r3
1212; SPE-NEXT:    evmergehi r3, r4, r4
1213; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
1214; SPE-NEXT:    lwz r0, 20(r1)
1215; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
1216; SPE-NEXT:    addi r1, r1, 16
1217; SPE-NEXT:    mtlr r0
1218; SPE-NEXT:    blr
1219  %fma = call double @llvm.experimental.constrained.fma.f64(
1220                        double %f0, double %f1, double %f2,
1221                        metadata !"round.dynamic",
1222                        metadata !"fpexcept.strict") #0
1223  %res = fneg double %fma
1224  ret double %res
1225}
1226
1227define <4 x float> @fnmadd_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) #0 {
1228; CHECK-LABEL: fnmadd_v4f32:
1229; CHECK:       # %bb.0:
1230; CHECK-NEXT:    xvmaddasp v4, v2, v3
1231; CHECK-NEXT:    xvnegsp v2, v4
1232; CHECK-NEXT:    blr
1233;
1234; NOVSX-LABEL: fnmadd_v4f32:
1235; NOVSX:       # %bb.0:
1236; NOVSX-NEXT:    addi r3, r1, -32
1237; NOVSX-NEXT:    addi r4, r1, -48
1238; NOVSX-NEXT:    stvx v4, 0, r3
1239; NOVSX-NEXT:    addi r3, r1, -64
1240; NOVSX-NEXT:    stvx v3, 0, r4
1241; NOVSX-NEXT:    stvx v2, 0, r3
1242; NOVSX-NEXT:    vspltisb v2, -1
1243; NOVSX-NEXT:    addi r3, r1, -16
1244; NOVSX-NEXT:    lfs f0, -20(r1)
1245; NOVSX-NEXT:    lfs f1, -36(r1)
1246; NOVSX-NEXT:    lfs f2, -52(r1)
1247; NOVSX-NEXT:    vslw v2, v2, v2
1248; NOVSX-NEXT:    fmadds f0, f2, f1, f0
1249; NOVSX-NEXT:    lfs f1, -40(r1)
1250; NOVSX-NEXT:    lfs f2, -56(r1)
1251; NOVSX-NEXT:    stfs f0, -4(r1)
1252; NOVSX-NEXT:    lfs f0, -24(r1)
1253; NOVSX-NEXT:    fmadds f0, f2, f1, f0
1254; NOVSX-NEXT:    lfs f1, -44(r1)
1255; NOVSX-NEXT:    lfs f2, -60(r1)
1256; NOVSX-NEXT:    stfs f0, -8(r1)
1257; NOVSX-NEXT:    lfs f0, -28(r1)
1258; NOVSX-NEXT:    fmadds f0, f2, f1, f0
1259; NOVSX-NEXT:    lfs f1, -48(r1)
1260; NOVSX-NEXT:    lfs f2, -64(r1)
1261; NOVSX-NEXT:    stfs f0, -12(r1)
1262; NOVSX-NEXT:    lfs f0, -32(r1)
1263; NOVSX-NEXT:    fmadds f0, f2, f1, f0
1264; NOVSX-NEXT:    stfs f0, -16(r1)
1265; NOVSX-NEXT:    lvx v3, 0, r3
1266; NOVSX-NEXT:    vsubfp v2, v2, v3
1267; NOVSX-NEXT:    blr
1268;
1269; SPE-LABEL: fnmadd_v4f32:
1270; SPE:       # %bb.0:
1271; SPE-NEXT:    mflr r0
1272; SPE-NEXT:    stw r0, 4(r1)
1273; SPE-NEXT:    stwu r1, -96(r1)
1274; SPE-NEXT:    .cfi_def_cfa_offset 96
1275; SPE-NEXT:    .cfi_offset lr, 4
1276; SPE-NEXT:    .cfi_offset r21, -88
1277; SPE-NEXT:    .cfi_offset r22, -80
1278; SPE-NEXT:    .cfi_offset r23, -72
1279; SPE-NEXT:    .cfi_offset r24, -64
1280; SPE-NEXT:    .cfi_offset r25, -56
1281; SPE-NEXT:    .cfi_offset r26, -48
1282; SPE-NEXT:    .cfi_offset r27, -40
1283; SPE-NEXT:    .cfi_offset r28, -32
1284; SPE-NEXT:    .cfi_offset r29, -24
1285; SPE-NEXT:    .cfi_offset r30, -16
1286; SPE-NEXT:    evstdd r27, 56(r1) # 8-byte Folded Spill
1287; SPE-NEXT:    mr r27, r5
1288; SPE-NEXT:    lwz r5, 116(r1)
1289; SPE-NEXT:    evstdd r25, 40(r1) # 8-byte Folded Spill
1290; SPE-NEXT:    mr r25, r3
1291; SPE-NEXT:    evstdd r26, 48(r1) # 8-byte Folded Spill
1292; SPE-NEXT:    mr r26, r4
1293; SPE-NEXT:    mr r3, r6
1294; SPE-NEXT:    mr r4, r10
1295; SPE-NEXT:    evstdd r21, 8(r1) # 8-byte Folded Spill
1296; SPE-NEXT:    evstdd r22, 16(r1) # 8-byte Folded Spill
1297; SPE-NEXT:    evstdd r23, 24(r1) # 8-byte Folded Spill
1298; SPE-NEXT:    evstdd r24, 32(r1) # 8-byte Folded Spill
1299; SPE-NEXT:    evstdd r28, 64(r1) # 8-byte Folded Spill
1300; SPE-NEXT:    mr r28, r7
1301; SPE-NEXT:    evstdd r29, 72(r1) # 8-byte Folded Spill
1302; SPE-NEXT:    mr r29, r8
1303; SPE-NEXT:    evstdd r30, 80(r1) # 8-byte Folded Spill
1304; SPE-NEXT:    mr r30, r9
1305; SPE-NEXT:    lwz r24, 104(r1)
1306; SPE-NEXT:    lwz r23, 108(r1)
1307; SPE-NEXT:    lwz r22, 112(r1)
1308; SPE-NEXT:    bl fmaf
1309; SPE-NEXT:    mr r21, r3
1310; SPE-NEXT:    mr r3, r27
1311; SPE-NEXT:    mr r4, r30
1312; SPE-NEXT:    mr r5, r22
1313; SPE-NEXT:    bl fmaf
1314; SPE-NEXT:    mr r30, r3
1315; SPE-NEXT:    mr r3, r26
1316; SPE-NEXT:    mr r4, r29
1317; SPE-NEXT:    mr r5, r23
1318; SPE-NEXT:    bl fmaf
1319; SPE-NEXT:    mr r29, r3
1320; SPE-NEXT:    mr r3, r25
1321; SPE-NEXT:    mr r4, r28
1322; SPE-NEXT:    mr r5, r24
1323; SPE-NEXT:    bl fmaf
1324; SPE-NEXT:    efsneg r4, r29
1325; SPE-NEXT:    efsneg r5, r30
1326; SPE-NEXT:    efsneg r3, r3
1327; SPE-NEXT:    efsneg r6, r21
1328; SPE-NEXT:    evldd r30, 80(r1) # 8-byte Folded Reload
1329; SPE-NEXT:    evldd r29, 72(r1) # 8-byte Folded Reload
1330; SPE-NEXT:    evldd r28, 64(r1) # 8-byte Folded Reload
1331; SPE-NEXT:    evldd r27, 56(r1) # 8-byte Folded Reload
1332; SPE-NEXT:    evldd r26, 48(r1) # 8-byte Folded Reload
1333; SPE-NEXT:    evldd r25, 40(r1) # 8-byte Folded Reload
1334; SPE-NEXT:    evldd r24, 32(r1) # 8-byte Folded Reload
1335; SPE-NEXT:    evldd r23, 24(r1) # 8-byte Folded Reload
1336; SPE-NEXT:    evldd r22, 16(r1) # 8-byte Folded Reload
1337; SPE-NEXT:    evldd r21, 8(r1) # 8-byte Folded Reload
1338; SPE-NEXT:    lwz r0, 100(r1)
1339; SPE-NEXT:    addi r1, r1, 96
1340; SPE-NEXT:    mtlr r0
1341; SPE-NEXT:    blr
1342  %fma = call <4 x float> @llvm.experimental.constrained.fma.v4f32(
1343                        <4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2,
1344                        metadata !"round.dynamic",
1345                        metadata !"fpexcept.strict") #0
1346  %res = fneg <4 x float> %fma
1347  ret <4 x float> %res
1348}
1349
1350define <2 x double> @fnmadd_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) #0 {
1351; CHECK-LABEL: fnmadd_v2f64:
1352; CHECK:       # %bb.0:
1353; CHECK-NEXT:    xvnmaddadp v4, v2, v3
1354; CHECK-NEXT:    vmr v2, v4
1355; CHECK-NEXT:    blr
1356;
1357; NOVSX-LABEL: fnmadd_v2f64:
1358; NOVSX:       # %bb.0:
1359; NOVSX-NEXT:    fnmadd f2, f2, f4, f6
1360; NOVSX-NEXT:    fnmadd f1, f1, f3, f5
1361; NOVSX-NEXT:    blr
1362;
1363; SPE-LABEL: fnmadd_v2f64:
1364; SPE:       # %bb.0:
1365; SPE-NEXT:    mflr r0
1366; SPE-NEXT:    stw r0, 4(r1)
1367; SPE-NEXT:    stwu r1, -64(r1)
1368; SPE-NEXT:    .cfi_def_cfa_offset 64
1369; SPE-NEXT:    .cfi_offset lr, 4
1370; SPE-NEXT:    .cfi_offset r26, -48
1371; SPE-NEXT:    .cfi_offset r27, -40
1372; SPE-NEXT:    .cfi_offset r28, -32
1373; SPE-NEXT:    .cfi_offset r29, -24
1374; SPE-NEXT:    .cfi_offset r30, -16
1375; SPE-NEXT:    evstdd r26, 16(r1) # 8-byte Folded Spill
1376; SPE-NEXT:    evstdd r27, 24(r1) # 8-byte Folded Spill
1377; SPE-NEXT:    evstdd r28, 32(r1) # 8-byte Folded Spill
1378; SPE-NEXT:    evstdd r29, 40(r1) # 8-byte Folded Spill
1379; SPE-NEXT:    evstdd r30, 48(r1) # 8-byte Folded Spill
1380; SPE-NEXT:    evmergelo r27, r7, r8
1381; SPE-NEXT:    evmergelo r9, r9, r10
1382; SPE-NEXT:    evmergelo r4, r5, r6
1383; SPE-NEXT:    mr r30, r3
1384; SPE-NEXT:    evldd r8, 80(r1)
1385; SPE-NEXT:    evmergehi r3, r4, r4
1386; SPE-NEXT:    evmergehi r5, r9, r9
1387; SPE-NEXT:    mr r6, r9
1388; SPE-NEXT:    evldd r29, 88(r1)
1389; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
1390; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
1391; SPE-NEXT:    # kill: def $r5 killed $r5 killed $s5
1392; SPE-NEXT:    evmergehi r7, r8, r8
1393; SPE-NEXT:    evldd r28, 72(r1)
1394; SPE-NEXT:    # kill: def $r7 killed $r7 killed $s7
1395; SPE-NEXT:    # kill: def $r8 killed $r8 killed $s8
1396; SPE-NEXT:    bl fma
1397; SPE-NEXT:    evmergelo r26, r3, r4
1398; SPE-NEXT:    evmergehi r3, r27, r27
1399; SPE-NEXT:    evmergehi r5, r28, r28
1400; SPE-NEXT:    evmergehi r7, r29, r29
1401; SPE-NEXT:    mr r4, r27
1402; SPE-NEXT:    mr r6, r28
1403; SPE-NEXT:    mr r8, r29
1404; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
1405; SPE-NEXT:    # kill: def $r5 killed $r5 killed $s5
1406; SPE-NEXT:    # kill: def $r7 killed $r7 killed $s7
1407; SPE-NEXT:    bl fma
1408; SPE-NEXT:    evmergelo r3, r3, r4
1409; SPE-NEXT:    li r5, 8
1410; SPE-NEXT:    efdneg r3, r3
1411; SPE-NEXT:    evstddx r3, r30, r5
1412; SPE-NEXT:    efdneg r3, r26
1413; SPE-NEXT:    evstdd r3, 0(r30)
1414; SPE-NEXT:    evldd r30, 48(r1) # 8-byte Folded Reload
1415; SPE-NEXT:    evldd r29, 40(r1) # 8-byte Folded Reload
1416; SPE-NEXT:    evldd r28, 32(r1) # 8-byte Folded Reload
1417; SPE-NEXT:    evldd r27, 24(r1) # 8-byte Folded Reload
1418; SPE-NEXT:    evldd r26, 16(r1) # 8-byte Folded Reload
1419; SPE-NEXT:    lwz r0, 68(r1)
1420; SPE-NEXT:    addi r1, r1, 64
1421; SPE-NEXT:    mtlr r0
1422; SPE-NEXT:    blr
1423  %fma = call <2 x double> @llvm.experimental.constrained.fma.v2f64(
1424                        <2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2,
1425                        metadata !"round.dynamic",
1426                        metadata !"fpexcept.strict") #0
1427  %res = fneg <2 x double> %fma
1428  ret <2 x double> %res
1429}
1430
1431define float @fnmsub_f32(float %f0, float %f1, float %f2) #0 {
1432; CHECK-LABEL: fnmsub_f32:
1433; CHECK:       # %bb.0:
1434; CHECK-NEXT:    xsnmsubasp f3, f1, f2
1435; CHECK-NEXT:    fmr f1, f3
1436; CHECK-NEXT:    blr
1437;
1438; NOVSX-LABEL: fnmsub_f32:
1439; NOVSX:       # %bb.0:
1440; NOVSX-NEXT:    fnmsubs f1, f1, f2, f3
1441; NOVSX-NEXT:    blr
1442;
1443; SPE-LABEL: fnmsub_f32:
1444; SPE:       # %bb.0:
1445; SPE-NEXT:    mflr r0
1446; SPE-NEXT:    stw r0, 4(r1)
1447; SPE-NEXT:    stwu r1, -16(r1)
1448; SPE-NEXT:    .cfi_def_cfa_offset 16
1449; SPE-NEXT:    .cfi_offset lr, 4
1450; SPE-NEXT:    efsneg r5, r5
1451; SPE-NEXT:    bl fmaf
1452; SPE-NEXT:    efsneg r3, r3
1453; SPE-NEXT:    lwz r0, 20(r1)
1454; SPE-NEXT:    addi r1, r1, 16
1455; SPE-NEXT:    mtlr r0
1456; SPE-NEXT:    blr
1457  %neg = fneg float %f2
1458  %fma = call float @llvm.experimental.constrained.fma.f32(
1459                        float %f0, float %f1, float %neg,
1460                        metadata !"round.dynamic",
1461                        metadata !"fpexcept.strict") #0
1462  %res = fneg float %fma
1463  ret float %res
1464}
1465
1466define double @fnmsub_f64(double %f0, double %f1, double %f2) #0 {
1467; CHECK-LABEL: fnmsub_f64:
1468; CHECK:       # %bb.0:
1469; CHECK-NEXT:    xsnmsubadp f3, f1, f2
1470; CHECK-NEXT:    fmr f1, f3
1471; CHECK-NEXT:    blr
1472;
1473; NOVSX-LABEL: fnmsub_f64:
1474; NOVSX:       # %bb.0:
1475; NOVSX-NEXT:    fnmsub f1, f1, f2, f3
1476; NOVSX-NEXT:    blr
1477;
1478; SPE-LABEL: fnmsub_f64:
1479; SPE:       # %bb.0:
1480; SPE-NEXT:    mflr r0
1481; SPE-NEXT:    stw r0, 4(r1)
1482; SPE-NEXT:    stwu r1, -16(r1)
1483; SPE-NEXT:    .cfi_def_cfa_offset 16
1484; SPE-NEXT:    .cfi_offset lr, 4
1485; SPE-NEXT:    evmergelo r6, r5, r6
1486; SPE-NEXT:    evmergelo r4, r3, r4
1487; SPE-NEXT:    evmergelo r3, r7, r8
1488; SPE-NEXT:    efdneg r8, r3
1489; SPE-NEXT:    evmergehi r3, r4, r4
1490; SPE-NEXT:    evmergehi r5, r6, r6
1491; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
1492; SPE-NEXT:    # kill: def $r6 killed $r6 killed $s6
1493; SPE-NEXT:    evmergehi r7, r8, r8
1494; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
1495; SPE-NEXT:    # kill: def $r5 killed $r5 killed $s5
1496; SPE-NEXT:    # kill: def $r8 killed $r8 killed $s8
1497; SPE-NEXT:    # kill: def $r7 killed $r7 killed $s7
1498; SPE-NEXT:    bl fma
1499; SPE-NEXT:    evmergelo r3, r3, r4
1500; SPE-NEXT:    efdneg r4, r3
1501; SPE-NEXT:    evmergehi r3, r4, r4
1502; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
1503; SPE-NEXT:    lwz r0, 20(r1)
1504; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
1505; SPE-NEXT:    addi r1, r1, 16
1506; SPE-NEXT:    mtlr r0
1507; SPE-NEXT:    blr
1508  %neg = fneg double %f2
1509  %fma = call double @llvm.experimental.constrained.fma.f64(
1510                        double %f0, double %f1, double %neg,
1511                        metadata !"round.dynamic",
1512                        metadata !"fpexcept.strict") #0
1513  %res = fneg double %fma
1514  ret double %res
1515}
1516
1517define <4 x float> @fnmsub_v4f32(<4 x float> %vf0, <4 x float> %vf1, <4 x float> %vf2) #0 {
1518; CHECK-LABEL: fnmsub_v4f32:
1519; CHECK:       # %bb.0:
1520; CHECK-NEXT:    xvnmsubasp v4, v2, v3
1521; CHECK-NEXT:    vmr v2, v4
1522; CHECK-NEXT:    blr
1523;
1524; NOVSX-LABEL: fnmsub_v4f32:
1525; NOVSX:       # %bb.0:
1526; NOVSX-NEXT:    vspltisb v5, -1
1527; NOVSX-NEXT:    addi r3, r1, -48
1528; NOVSX-NEXT:    addi r4, r1, -64
1529; NOVSX-NEXT:    stvx v3, 0, r3
1530; NOVSX-NEXT:    addi r3, r1, -32
1531; NOVSX-NEXT:    stvx v2, 0, r4
1532; NOVSX-NEXT:    vslw v5, v5, v5
1533; NOVSX-NEXT:    vsubfp v4, v5, v4
1534; NOVSX-NEXT:    stvx v4, 0, r3
1535; NOVSX-NEXT:    addi r3, r1, -16
1536; NOVSX-NEXT:    lfs f0, -36(r1)
1537; NOVSX-NEXT:    lfs f1, -52(r1)
1538; NOVSX-NEXT:    lfs f2, -20(r1)
1539; NOVSX-NEXT:    fmadds f0, f1, f0, f2
1540; NOVSX-NEXT:    lfs f1, -56(r1)
1541; NOVSX-NEXT:    lfs f2, -24(r1)
1542; NOVSX-NEXT:    stfs f0, -4(r1)
1543; NOVSX-NEXT:    lfs f0, -40(r1)
1544; NOVSX-NEXT:    fmadds f0, f1, f0, f2
1545; NOVSX-NEXT:    lfs f1, -60(r1)
1546; NOVSX-NEXT:    lfs f2, -28(r1)
1547; NOVSX-NEXT:    stfs f0, -8(r1)
1548; NOVSX-NEXT:    lfs f0, -44(r1)
1549; NOVSX-NEXT:    fmadds f0, f1, f0, f2
1550; NOVSX-NEXT:    lfs f1, -64(r1)
1551; NOVSX-NEXT:    lfs f2, -32(r1)
1552; NOVSX-NEXT:    stfs f0, -12(r1)
1553; NOVSX-NEXT:    lfs f0, -48(r1)
1554; NOVSX-NEXT:    fmadds f0, f1, f0, f2
1555; NOVSX-NEXT:    stfs f0, -16(r1)
1556; NOVSX-NEXT:    lvx v2, 0, r3
1557; NOVSX-NEXT:    vsubfp v2, v5, v2
1558; NOVSX-NEXT:    blr
1559;
1560; SPE-LABEL: fnmsub_v4f32:
1561; SPE:       # %bb.0:
1562; SPE-NEXT:    mflr r0
1563; SPE-NEXT:    stw r0, 4(r1)
1564; SPE-NEXT:    stwu r1, -96(r1)
1565; SPE-NEXT:    .cfi_def_cfa_offset 96
1566; SPE-NEXT:    .cfi_offset lr, 4
1567; SPE-NEXT:    .cfi_offset r21, -88
1568; SPE-NEXT:    .cfi_offset r22, -80
1569; SPE-NEXT:    .cfi_offset r23, -72
1570; SPE-NEXT:    .cfi_offset r24, -64
1571; SPE-NEXT:    .cfi_offset r25, -56
1572; SPE-NEXT:    .cfi_offset r26, -48
1573; SPE-NEXT:    .cfi_offset r27, -40
1574; SPE-NEXT:    .cfi_offset r28, -32
1575; SPE-NEXT:    .cfi_offset r29, -24
1576; SPE-NEXT:    .cfi_offset r30, -16
1577; SPE-NEXT:    evstdd r25, 40(r1) # 8-byte Folded Spill
1578; SPE-NEXT:    mr r25, r3
1579; SPE-NEXT:    evstdd r26, 48(r1) # 8-byte Folded Spill
1580; SPE-NEXT:    mr r26, r4
1581; SPE-NEXT:    evstdd r27, 56(r1) # 8-byte Folded Spill
1582; SPE-NEXT:    mr r27, r5
1583; SPE-NEXT:    evstdd r28, 64(r1) # 8-byte Folded Spill
1584; SPE-NEXT:    mr r28, r7
1585; SPE-NEXT:    lwz r3, 112(r1)
1586; SPE-NEXT:    lwz r4, 104(r1)
1587; SPE-NEXT:    lwz r5, 108(r1)
1588; SPE-NEXT:    lwz r7, 116(r1)
1589; SPE-NEXT:    evstdd r22, 16(r1) # 8-byte Folded Spill
1590; SPE-NEXT:    efsneg r22, r3
1591; SPE-NEXT:    evstdd r23, 24(r1) # 8-byte Folded Spill
1592; SPE-NEXT:    efsneg r23, r5
1593; SPE-NEXT:    evstdd r24, 32(r1) # 8-byte Folded Spill
1594; SPE-NEXT:    efsneg r24, r4
1595; SPE-NEXT:    efsneg r5, r7
1596; SPE-NEXT:    mr r3, r6
1597; SPE-NEXT:    mr r4, r10
1598; SPE-NEXT:    evstdd r21, 8(r1) # 8-byte Folded Spill
1599; SPE-NEXT:    evstdd r29, 72(r1) # 8-byte Folded Spill
1600; SPE-NEXT:    mr r29, r8
1601; SPE-NEXT:    evstdd r30, 80(r1) # 8-byte Folded Spill
1602; SPE-NEXT:    mr r30, r9
1603; SPE-NEXT:    bl fmaf
1604; SPE-NEXT:    mr r21, r3
1605; SPE-NEXT:    mr r3, r27
1606; SPE-NEXT:    mr r4, r30
1607; SPE-NEXT:    mr r5, r22
1608; SPE-NEXT:    bl fmaf
1609; SPE-NEXT:    mr r30, r3
1610; SPE-NEXT:    mr r3, r26
1611; SPE-NEXT:    mr r4, r29
1612; SPE-NEXT:    mr r5, r23
1613; SPE-NEXT:    bl fmaf
1614; SPE-NEXT:    mr r29, r3
1615; SPE-NEXT:    mr r3, r25
1616; SPE-NEXT:    mr r4, r28
1617; SPE-NEXT:    mr r5, r24
1618; SPE-NEXT:    bl fmaf
1619; SPE-NEXT:    efsneg r4, r29
1620; SPE-NEXT:    efsneg r5, r30
1621; SPE-NEXT:    efsneg r3, r3
1622; SPE-NEXT:    efsneg r6, r21
1623; SPE-NEXT:    evldd r30, 80(r1) # 8-byte Folded Reload
1624; SPE-NEXT:    evldd r29, 72(r1) # 8-byte Folded Reload
1625; SPE-NEXT:    evldd r28, 64(r1) # 8-byte Folded Reload
1626; SPE-NEXT:    evldd r27, 56(r1) # 8-byte Folded Reload
1627; SPE-NEXT:    evldd r26, 48(r1) # 8-byte Folded Reload
1628; SPE-NEXT:    evldd r25, 40(r1) # 8-byte Folded Reload
1629; SPE-NEXT:    evldd r24, 32(r1) # 8-byte Folded Reload
1630; SPE-NEXT:    evldd r23, 24(r1) # 8-byte Folded Reload
1631; SPE-NEXT:    evldd r22, 16(r1) # 8-byte Folded Reload
1632; SPE-NEXT:    evldd r21, 8(r1) # 8-byte Folded Reload
1633; SPE-NEXT:    lwz r0, 100(r1)
1634; SPE-NEXT:    addi r1, r1, 96
1635; SPE-NEXT:    mtlr r0
1636; SPE-NEXT:    blr
1637  %neg = fneg <4 x float> %vf2
1638  %fma = call <4 x float> @llvm.experimental.constrained.fma.v4f32(
1639                        <4 x float> %vf0, <4 x float> %vf1, <4 x float> %neg,
1640                        metadata !"round.dynamic",
1641                        metadata !"fpexcept.strict") #0
1642  %res = fneg <4 x float> %fma
1643  ret <4 x float> %res
1644}
1645
1646define <2 x double> @fnmsub_v2f64(<2 x double> %vf0, <2 x double> %vf1, <2 x double> %vf2) #0 {
1647; CHECK-LABEL: fnmsub_v2f64:
1648; CHECK:       # %bb.0:
1649; CHECK-NEXT:    xvnmsubadp v4, v2, v3
1650; CHECK-NEXT:    vmr v2, v4
1651; CHECK-NEXT:    blr
1652;
1653; NOVSX-LABEL: fnmsub_v2f64:
1654; NOVSX:       # %bb.0:
1655; NOVSX-NEXT:    fnmsub f2, f2, f4, f6
1656; NOVSX-NEXT:    fnmsub f1, f1, f3, f5
1657; NOVSX-NEXT:    blr
1658;
1659; SPE-LABEL: fnmsub_v2f64:
1660; SPE:       # %bb.0:
1661; SPE-NEXT:    mflr r0
1662; SPE-NEXT:    stw r0, 4(r1)
1663; SPE-NEXT:    stwu r1, -64(r1)
1664; SPE-NEXT:    .cfi_def_cfa_offset 64
1665; SPE-NEXT:    .cfi_offset lr, 4
1666; SPE-NEXT:    .cfi_offset r26, -48
1667; SPE-NEXT:    .cfi_offset r27, -40
1668; SPE-NEXT:    .cfi_offset r28, -32
1669; SPE-NEXT:    .cfi_offset r29, -24
1670; SPE-NEXT:    .cfi_offset r30, -16
1671; SPE-NEXT:    evstdd r30, 48(r1) # 8-byte Folded Spill
1672; SPE-NEXT:    mr r30, r3
1673; SPE-NEXT:    evldd r3, 80(r1)
1674; SPE-NEXT:    evldd r11, 88(r1)
1675; SPE-NEXT:    evstdd r26, 16(r1) # 8-byte Folded Spill
1676; SPE-NEXT:    evstdd r27, 24(r1) # 8-byte Folded Spill
1677; SPE-NEXT:    efdneg r27, r11
1678; SPE-NEXT:    evstdd r28, 32(r1) # 8-byte Folded Spill
1679; SPE-NEXT:    evstdd r29, 40(r1) # 8-byte Folded Spill
1680; SPE-NEXT:    evmergelo r29, r7, r8
1681; SPE-NEXT:    evmergelo r9, r9, r10
1682; SPE-NEXT:    evmergelo r4, r5, r6
1683; SPE-NEXT:    efdneg r8, r3
1684; SPE-NEXT:    evmergehi r3, r4, r4
1685; SPE-NEXT:    evmergehi r5, r9, r9
1686; SPE-NEXT:    evmergehi r7, r8, r8
1687; SPE-NEXT:    mr r6, r9
1688; SPE-NEXT:    evldd r28, 72(r1)
1689; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
1690; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
1691; SPE-NEXT:    # kill: def $r5 killed $r5 killed $s5
1692; SPE-NEXT:    # kill: def $r7 killed $r7 killed $s7
1693; SPE-NEXT:    # kill: def $r8 killed $r8 killed $s8
1694; SPE-NEXT:    bl fma
1695; SPE-NEXT:    evmergelo r26, r3, r4
1696; SPE-NEXT:    evmergehi r3, r29, r29
1697; SPE-NEXT:    evmergehi r5, r28, r28
1698; SPE-NEXT:    evmergehi r7, r27, r27
1699; SPE-NEXT:    mr r4, r29
1700; SPE-NEXT:    mr r6, r28
1701; SPE-NEXT:    mr r8, r27
1702; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
1703; SPE-NEXT:    # kill: def $r5 killed $r5 killed $s5
1704; SPE-NEXT:    # kill: def $r7 killed $r7 killed $s7
1705; SPE-NEXT:    bl fma
1706; SPE-NEXT:    evmergelo r3, r3, r4
1707; SPE-NEXT:    li r5, 8
1708; SPE-NEXT:    efdneg r3, r3
1709; SPE-NEXT:    evstddx r3, r30, r5
1710; SPE-NEXT:    efdneg r3, r26
1711; SPE-NEXT:    evstdd r3, 0(r30)
1712; SPE-NEXT:    evldd r30, 48(r1) # 8-byte Folded Reload
1713; SPE-NEXT:    evldd r29, 40(r1) # 8-byte Folded Reload
1714; SPE-NEXT:    evldd r28, 32(r1) # 8-byte Folded Reload
1715; SPE-NEXT:    evldd r27, 24(r1) # 8-byte Folded Reload
1716; SPE-NEXT:    evldd r26, 16(r1) # 8-byte Folded Reload
1717; SPE-NEXT:    lwz r0, 68(r1)
1718; SPE-NEXT:    addi r1, r1, 64
1719; SPE-NEXT:    mtlr r0
1720; SPE-NEXT:    blr
1721  %neg = fneg <2 x double> %vf2
1722  %fma = call <2 x double> @llvm.experimental.constrained.fma.v2f64(
1723                        <2 x double> %vf0, <2 x double> %vf1, <2 x double> %neg,
1724                        metadata !"round.dynamic",
1725                        metadata !"fpexcept.strict") #0
1726  %res = fneg <2 x double> %fma
1727  ret <2 x double> %res
1728}
1729
1730define float @fsqrt_f32(float %f1) #0 {
1731; CHECK-LABEL: fsqrt_f32:
1732; CHECK:       # %bb.0:
1733; CHECK-NEXT:    xssqrtsp f1, f1
1734; CHECK-NEXT:    blr
1735;
1736; NOVSX-LABEL: fsqrt_f32:
1737; NOVSX:       # %bb.0:
1738; NOVSX-NEXT:    fsqrts f1, f1
1739; NOVSX-NEXT:    blr
1740;
1741; SPE-LABEL: fsqrt_f32:
1742; SPE:       # %bb.0:
1743; SPE-NEXT:    mflr r0
1744; SPE-NEXT:    stw r0, 4(r1)
1745; SPE-NEXT:    stwu r1, -16(r1)
1746; SPE-NEXT:    .cfi_def_cfa_offset 16
1747; SPE-NEXT:    .cfi_offset lr, 4
1748; SPE-NEXT:    bl sqrtf
1749; SPE-NEXT:    lwz r0, 20(r1)
1750; SPE-NEXT:    addi r1, r1, 16
1751; SPE-NEXT:    mtlr r0
1752; SPE-NEXT:    blr
1753  %res = call float @llvm.experimental.constrained.sqrt.f32(
1754                        float %f1,
1755                        metadata !"round.dynamic",
1756                        metadata !"fpexcept.strict") #0
1757  ret float %res
1758}
1759
1760define double @fsqrt_f64(double %f1) #0 {
1761; CHECK-LABEL: fsqrt_f64:
1762; CHECK:       # %bb.0:
1763; CHECK-NEXT:    xssqrtdp f1, f1
1764; CHECK-NEXT:    blr
1765;
1766; NOVSX-LABEL: fsqrt_f64:
1767; NOVSX:       # %bb.0:
1768; NOVSX-NEXT:    fsqrt f1, f1
1769; NOVSX-NEXT:    blr
1770;
1771; SPE-LABEL: fsqrt_f64:
1772; SPE:       # %bb.0:
1773; SPE-NEXT:    mflr r0
1774; SPE-NEXT:    stw r0, 4(r1)
1775; SPE-NEXT:    stwu r1, -16(r1)
1776; SPE-NEXT:    .cfi_def_cfa_offset 16
1777; SPE-NEXT:    .cfi_offset lr, 4
1778; SPE-NEXT:    evmergelo r4, r3, r4
1779; SPE-NEXT:    evmergehi r3, r4, r4
1780; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
1781; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
1782; SPE-NEXT:    bl sqrt
1783; SPE-NEXT:    evmergelo r4, r3, r4
1784; SPE-NEXT:    evmergehi r3, r4, r4
1785; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
1786; SPE-NEXT:    lwz r0, 20(r1)
1787; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
1788; SPE-NEXT:    addi r1, r1, 16
1789; SPE-NEXT:    mtlr r0
1790; SPE-NEXT:    blr
1791  %res = call double @llvm.experimental.constrained.sqrt.f64(
1792                        double %f1,
1793                        metadata !"round.dynamic",
1794                        metadata !"fpexcept.strict") #0
1795  ret double %res
1796}
1797
1798define <4 x float> @fsqrt_v4f32(<4 x float> %vf1) #0 {
1799; CHECK-LABEL: fsqrt_v4f32:
1800; CHECK:       # %bb.0:
1801; CHECK-NEXT:    xvsqrtsp v2, v2
1802; CHECK-NEXT:    blr
1803;
1804; NOVSX-LABEL: fsqrt_v4f32:
1805; NOVSX:       # %bb.0:
1806; NOVSX-NEXT:    addi r3, r1, -32
1807; NOVSX-NEXT:    stvx v2, 0, r3
1808; NOVSX-NEXT:    addi r3, r1, -16
1809; NOVSX-NEXT:    lfs f0, -20(r1)
1810; NOVSX-NEXT:    fsqrts f0, f0
1811; NOVSX-NEXT:    stfs f0, -4(r1)
1812; NOVSX-NEXT:    lfs f0, -24(r1)
1813; NOVSX-NEXT:    fsqrts f0, f0
1814; NOVSX-NEXT:    stfs f0, -8(r1)
1815; NOVSX-NEXT:    lfs f0, -28(r1)
1816; NOVSX-NEXT:    fsqrts f0, f0
1817; NOVSX-NEXT:    stfs f0, -12(r1)
1818; NOVSX-NEXT:    lfs f0, -32(r1)
1819; NOVSX-NEXT:    fsqrts f0, f0
1820; NOVSX-NEXT:    stfs f0, -16(r1)
1821; NOVSX-NEXT:    lvx v2, 0, r3
1822; NOVSX-NEXT:    blr
1823;
1824; SPE-LABEL: fsqrt_v4f32:
1825; SPE:       # %bb.0:
1826; SPE-NEXT:    mflr r0
1827; SPE-NEXT:    stw r0, 4(r1)
1828; SPE-NEXT:    stwu r1, -48(r1)
1829; SPE-NEXT:    .cfi_def_cfa_offset 48
1830; SPE-NEXT:    .cfi_offset lr, 4
1831; SPE-NEXT:    .cfi_offset r27, -40
1832; SPE-NEXT:    .cfi_offset r28, -32
1833; SPE-NEXT:    .cfi_offset r29, -24
1834; SPE-NEXT:    .cfi_offset r30, -16
1835; SPE-NEXT:    evstdd r28, 16(r1) # 8-byte Folded Spill
1836; SPE-NEXT:    mr r28, r3
1837; SPE-NEXT:    mr r3, r6
1838; SPE-NEXT:    evstdd r27, 8(r1) # 8-byte Folded Spill
1839; SPE-NEXT:    evstdd r29, 24(r1) # 8-byte Folded Spill
1840; SPE-NEXT:    mr r29, r4
1841; SPE-NEXT:    evstdd r30, 32(r1) # 8-byte Folded Spill
1842; SPE-NEXT:    mr r30, r5
1843; SPE-NEXT:    bl sqrtf
1844; SPE-NEXT:    mr r27, r3
1845; SPE-NEXT:    mr r3, r30
1846; SPE-NEXT:    bl sqrtf
1847; SPE-NEXT:    mr r30, r3
1848; SPE-NEXT:    mr r3, r29
1849; SPE-NEXT:    bl sqrtf
1850; SPE-NEXT:    mr r29, r3
1851; SPE-NEXT:    mr r3, r28
1852; SPE-NEXT:    bl sqrtf
1853; SPE-NEXT:    mr r4, r29
1854; SPE-NEXT:    mr r5, r30
1855; SPE-NEXT:    mr r6, r27
1856; SPE-NEXT:    evldd r30, 32(r1) # 8-byte Folded Reload
1857; SPE-NEXT:    evldd r29, 24(r1) # 8-byte Folded Reload
1858; SPE-NEXT:    evldd r28, 16(r1) # 8-byte Folded Reload
1859; SPE-NEXT:    evldd r27, 8(r1) # 8-byte Folded Reload
1860; SPE-NEXT:    lwz r0, 52(r1)
1861; SPE-NEXT:    addi r1, r1, 48
1862; SPE-NEXT:    mtlr r0
1863; SPE-NEXT:    blr
1864  %res = call <4 x float> @llvm.experimental.constrained.sqrt.v4f32(
1865                        <4 x float> %vf1,
1866                        metadata !"round.dynamic",
1867                        metadata !"fpexcept.strict") #0
1868  ret <4 x float> %res
1869}
1870
1871define <2 x double> @fsqrt_v2f64(<2 x double> %vf1) #0 {
1872; CHECK-LABEL: fsqrt_v2f64:
1873; CHECK:       # %bb.0:
1874; CHECK-NEXT:    xvsqrtdp v2, v2
1875; CHECK-NEXT:    blr
1876;
1877; NOVSX-LABEL: fsqrt_v2f64:
1878; NOVSX:       # %bb.0:
1879; NOVSX-NEXT:    fsqrt f2, f2
1880; NOVSX-NEXT:    fsqrt f1, f1
1881; NOVSX-NEXT:    blr
1882;
1883; SPE-LABEL: fsqrt_v2f64:
1884; SPE:       # %bb.0:
1885; SPE-NEXT:    mflr r0
1886; SPE-NEXT:    stw r0, 4(r1)
1887; SPE-NEXT:    stwu r1, -48(r1)
1888; SPE-NEXT:    .cfi_def_cfa_offset 48
1889; SPE-NEXT:    .cfi_offset lr, 4
1890; SPE-NEXT:    .cfi_offset r28, -32
1891; SPE-NEXT:    .cfi_offset r29, -24
1892; SPE-NEXT:    .cfi_offset r30, -16
1893; SPE-NEXT:    evstdd r28, 16(r1) # 8-byte Folded Spill
1894; SPE-NEXT:    evstdd r29, 24(r1) # 8-byte Folded Spill
1895; SPE-NEXT:    evstdd r30, 32(r1) # 8-byte Folded Spill
1896; SPE-NEXT:    evmergelo r29, r7, r8
1897; SPE-NEXT:    evmergelo r4, r5, r6
1898; SPE-NEXT:    mr r30, r3
1899; SPE-NEXT:    evmergehi r3, r4, r4
1900; SPE-NEXT:    # kill: def $r4 killed $r4 killed $s4
1901; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
1902; SPE-NEXT:    bl sqrt
1903; SPE-NEXT:    evmergelo r28, r3, r4
1904; SPE-NEXT:    evmergehi r3, r29, r29
1905; SPE-NEXT:    mr r4, r29
1906; SPE-NEXT:    # kill: def $r3 killed $r3 killed $s3
1907; SPE-NEXT:    bl sqrt
1908; SPE-NEXT:    li r5, 8
1909; SPE-NEXT:    evmergelo r3, r3, r4
1910; SPE-NEXT:    evstddx r3, r30, r5
1911; SPE-NEXT:    evstdd r28, 0(r30)
1912; SPE-NEXT:    evldd r30, 32(r1) # 8-byte Folded Reload
1913; SPE-NEXT:    evldd r29, 24(r1) # 8-byte Folded Reload
1914; SPE-NEXT:    evldd r28, 16(r1) # 8-byte Folded Reload
1915; SPE-NEXT:    lwz r0, 52(r1)
1916; SPE-NEXT:    addi r1, r1, 48
1917; SPE-NEXT:    mtlr r0
1918; SPE-NEXT:    blr
1919  %res = call <2 x double> @llvm.experimental.constrained.sqrt.v2f64(
1920                        <2 x double> %vf1,
1921                        metadata !"round.dynamic",
1922                        metadata !"fpexcept.strict") #0
1923  ret <2 x double> %res
1924}
1925
1926attributes #0 = { strictfp }
1927