1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \
3; RUN:   -target-abi=ilp32f | FileCheck -check-prefix=RV32IF %s
4; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \
5; RUN:   -target-abi=lp64f | FileCheck -check-prefix=RV64IF %s
6; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \
7; RUN:   | FileCheck -check-prefix=RV32I %s
8; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \
9; RUN:   | FileCheck -check-prefix=RV64I %s
10
11; These tests are each targeted at a particular RISC-V FPU instruction.
12; Compares and conversions can be found in float-fcmp.ll and float-convert.ll
13; respectively. Some other float-*.ll files in this folder exercise LLVM IR
14; instructions that don't directly match a RISC-V instruction.
15
16define float @fadd_s(float %a, float %b) nounwind {
17; RV32IF-LABEL: fadd_s:
18; RV32IF:       # %bb.0:
19; RV32IF-NEXT:    fadd.s fa0, fa0, fa1
20; RV32IF-NEXT:    ret
21;
22; RV64IF-LABEL: fadd_s:
23; RV64IF:       # %bb.0:
24; RV64IF-NEXT:    fadd.s fa0, fa0, fa1
25; RV64IF-NEXT:    ret
26;
27; RV32I-LABEL: fadd_s:
28; RV32I:       # %bb.0:
29; RV32I-NEXT:    addi sp, sp, -16
30; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
31; RV32I-NEXT:    call __addsf3@plt
32; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
33; RV32I-NEXT:    addi sp, sp, 16
34; RV32I-NEXT:    ret
35;
36; RV64I-LABEL: fadd_s:
37; RV64I:       # %bb.0:
38; RV64I-NEXT:    addi sp, sp, -16
39; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
40; RV64I-NEXT:    call __addsf3@plt
41; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
42; RV64I-NEXT:    addi sp, sp, 16
43; RV64I-NEXT:    ret
44  %1 = fadd float %a, %b
45  ret float %1
46}
47
48define float @fsub_s(float %a, float %b) nounwind {
49; RV32IF-LABEL: fsub_s:
50; RV32IF:       # %bb.0:
51; RV32IF-NEXT:    fsub.s fa0, fa0, fa1
52; RV32IF-NEXT:    ret
53;
54; RV64IF-LABEL: fsub_s:
55; RV64IF:       # %bb.0:
56; RV64IF-NEXT:    fsub.s fa0, fa0, fa1
57; RV64IF-NEXT:    ret
58;
59; RV32I-LABEL: fsub_s:
60; RV32I:       # %bb.0:
61; RV32I-NEXT:    addi sp, sp, -16
62; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
63; RV32I-NEXT:    call __subsf3@plt
64; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
65; RV32I-NEXT:    addi sp, sp, 16
66; RV32I-NEXT:    ret
67;
68; RV64I-LABEL: fsub_s:
69; RV64I:       # %bb.0:
70; RV64I-NEXT:    addi sp, sp, -16
71; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
72; RV64I-NEXT:    call __subsf3@plt
73; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
74; RV64I-NEXT:    addi sp, sp, 16
75; RV64I-NEXT:    ret
76  %1 = fsub float %a, %b
77  ret float %1
78}
79
80define float @fmul_s(float %a, float %b) nounwind {
81; RV32IF-LABEL: fmul_s:
82; RV32IF:       # %bb.0:
83; RV32IF-NEXT:    fmul.s fa0, fa0, fa1
84; RV32IF-NEXT:    ret
85;
86; RV64IF-LABEL: fmul_s:
87; RV64IF:       # %bb.0:
88; RV64IF-NEXT:    fmul.s fa0, fa0, fa1
89; RV64IF-NEXT:    ret
90;
91; RV32I-LABEL: fmul_s:
92; RV32I:       # %bb.0:
93; RV32I-NEXT:    addi sp, sp, -16
94; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
95; RV32I-NEXT:    call __mulsf3@plt
96; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
97; RV32I-NEXT:    addi sp, sp, 16
98; RV32I-NEXT:    ret
99;
100; RV64I-LABEL: fmul_s:
101; RV64I:       # %bb.0:
102; RV64I-NEXT:    addi sp, sp, -16
103; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
104; RV64I-NEXT:    call __mulsf3@plt
105; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
106; RV64I-NEXT:    addi sp, sp, 16
107; RV64I-NEXT:    ret
108  %1 = fmul float %a, %b
109  ret float %1
110}
111
112define float @fdiv_s(float %a, float %b) nounwind {
113; RV32IF-LABEL: fdiv_s:
114; RV32IF:       # %bb.0:
115; RV32IF-NEXT:    fdiv.s fa0, fa0, fa1
116; RV32IF-NEXT:    ret
117;
118; RV64IF-LABEL: fdiv_s:
119; RV64IF:       # %bb.0:
120; RV64IF-NEXT:    fdiv.s fa0, fa0, fa1
121; RV64IF-NEXT:    ret
122;
123; RV32I-LABEL: fdiv_s:
124; RV32I:       # %bb.0:
125; RV32I-NEXT:    addi sp, sp, -16
126; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
127; RV32I-NEXT:    call __divsf3@plt
128; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
129; RV32I-NEXT:    addi sp, sp, 16
130; RV32I-NEXT:    ret
131;
132; RV64I-LABEL: fdiv_s:
133; RV64I:       # %bb.0:
134; RV64I-NEXT:    addi sp, sp, -16
135; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
136; RV64I-NEXT:    call __divsf3@plt
137; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
138; RV64I-NEXT:    addi sp, sp, 16
139; RV64I-NEXT:    ret
140  %1 = fdiv float %a, %b
141  ret float %1
142}
143
144declare float @llvm.sqrt.f32(float)
145
146define float @fsqrt_s(float %a) nounwind {
147; RV32IF-LABEL: fsqrt_s:
148; RV32IF:       # %bb.0:
149; RV32IF-NEXT:    fsqrt.s fa0, fa0
150; RV32IF-NEXT:    ret
151;
152; RV64IF-LABEL: fsqrt_s:
153; RV64IF:       # %bb.0:
154; RV64IF-NEXT:    fsqrt.s fa0, fa0
155; RV64IF-NEXT:    ret
156;
157; RV32I-LABEL: fsqrt_s:
158; RV32I:       # %bb.0:
159; RV32I-NEXT:    addi sp, sp, -16
160; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
161; RV32I-NEXT:    call sqrtf@plt
162; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
163; RV32I-NEXT:    addi sp, sp, 16
164; RV32I-NEXT:    ret
165;
166; RV64I-LABEL: fsqrt_s:
167; RV64I:       # %bb.0:
168; RV64I-NEXT:    addi sp, sp, -16
169; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
170; RV64I-NEXT:    call sqrtf@plt
171; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
172; RV64I-NEXT:    addi sp, sp, 16
173; RV64I-NEXT:    ret
174  %1 = call float @llvm.sqrt.f32(float %a)
175  ret float %1
176}
177
178declare float @llvm.copysign.f32(float, float)
179
180define float @fsgnj_s(float %a, float %b) nounwind {
181; RV32IF-LABEL: fsgnj_s:
182; RV32IF:       # %bb.0:
183; RV32IF-NEXT:    fsgnj.s fa0, fa0, fa1
184; RV32IF-NEXT:    ret
185;
186; RV64IF-LABEL: fsgnj_s:
187; RV64IF:       # %bb.0:
188; RV64IF-NEXT:    fsgnj.s fa0, fa0, fa1
189; RV64IF-NEXT:    ret
190;
191; RV32I-LABEL: fsgnj_s:
192; RV32I:       # %bb.0:
193; RV32I-NEXT:    lui a2, 524288
194; RV32I-NEXT:    and a1, a1, a2
195; RV32I-NEXT:    slli a0, a0, 1
196; RV32I-NEXT:    srli a0, a0, 1
197; RV32I-NEXT:    or a0, a0, a1
198; RV32I-NEXT:    ret
199;
200; RV64I-LABEL: fsgnj_s:
201; RV64I:       # %bb.0:
202; RV64I-NEXT:    lui a2, 524288
203; RV64I-NEXT:    and a1, a1, a2
204; RV64I-NEXT:    slli a0, a0, 33
205; RV64I-NEXT:    srli a0, a0, 33
206; RV64I-NEXT:    or a0, a0, a1
207; RV64I-NEXT:    ret
208  %1 = call float @llvm.copysign.f32(float %a, float %b)
209  ret float %1
210}
211
212; This function performs extra work to ensure that
213; DAGCombiner::visitBITCAST doesn't replace the fneg with an xor.
214define i32 @fneg_s(float %a, float %b) nounwind {
215; RV32IF-LABEL: fneg_s:
216; RV32IF:       # %bb.0:
217; RV32IF-NEXT:    fadd.s ft0, fa0, fa0
218; RV32IF-NEXT:    fneg.s ft1, ft0
219; RV32IF-NEXT:    feq.s a0, ft0, ft1
220; RV32IF-NEXT:    ret
221;
222; RV64IF-LABEL: fneg_s:
223; RV64IF:       # %bb.0:
224; RV64IF-NEXT:    fadd.s ft0, fa0, fa0
225; RV64IF-NEXT:    fneg.s ft1, ft0
226; RV64IF-NEXT:    feq.s a0, ft0, ft1
227; RV64IF-NEXT:    ret
228;
229; RV32I-LABEL: fneg_s:
230; RV32I:       # %bb.0:
231; RV32I-NEXT:    addi sp, sp, -16
232; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
233; RV32I-NEXT:    mv a1, a0
234; RV32I-NEXT:    call __addsf3@plt
235; RV32I-NEXT:    lui a1, 524288
236; RV32I-NEXT:    xor a1, a0, a1
237; RV32I-NEXT:    call __eqsf2@plt
238; RV32I-NEXT:    seqz a0, a0
239; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
240; RV32I-NEXT:    addi sp, sp, 16
241; RV32I-NEXT:    ret
242;
243; RV64I-LABEL: fneg_s:
244; RV64I:       # %bb.0:
245; RV64I-NEXT:    addi sp, sp, -16
246; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
247; RV64I-NEXT:    mv a1, a0
248; RV64I-NEXT:    call __addsf3@plt
249; RV64I-NEXT:    lui a1, 524288
250; RV64I-NEXT:    xor a1, a0, a1
251; RV64I-NEXT:    call __eqsf2@plt
252; RV64I-NEXT:    seqz a0, a0
253; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
254; RV64I-NEXT:    addi sp, sp, 16
255; RV64I-NEXT:    ret
256  %1 = fadd float %a, %a
257  %2 = fneg float %1
258  %3 = fcmp oeq float %1, %2
259  %4 = zext i1 %3 to i32
260  ret i32 %4
261}
262
263; This function performs extra work to ensure that
264; DAGCombiner::visitBITCAST doesn't replace the fneg with an xor.
265define float @fsgnjn_s(float %a, float %b) nounwind {
266; RV32IF-LABEL: fsgnjn_s:
267; RV32IF:       # %bb.0:
268; RV32IF-NEXT:    fadd.s ft0, fa0, fa1
269; RV32IF-NEXT:    fsgnjn.s fa0, fa0, ft0
270; RV32IF-NEXT:    ret
271;
272; RV64IF-LABEL: fsgnjn_s:
273; RV64IF:       # %bb.0:
274; RV64IF-NEXT:    fadd.s ft0, fa0, fa1
275; RV64IF-NEXT:    fsgnjn.s fa0, fa0, ft0
276; RV64IF-NEXT:    ret
277;
278; RV32I-LABEL: fsgnjn_s:
279; RV32I:       # %bb.0:
280; RV32I-NEXT:    addi sp, sp, -16
281; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
282; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
283; RV32I-NEXT:    mv s0, a0
284; RV32I-NEXT:    call __addsf3@plt
285; RV32I-NEXT:    not a0, a0
286; RV32I-NEXT:    lui a1, 524288
287; RV32I-NEXT:    and a0, a0, a1
288; RV32I-NEXT:    slli a1, s0, 1
289; RV32I-NEXT:    srli a1, a1, 1
290; RV32I-NEXT:    or a0, a1, a0
291; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
292; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
293; RV32I-NEXT:    addi sp, sp, 16
294; RV32I-NEXT:    ret
295;
296; RV64I-LABEL: fsgnjn_s:
297; RV64I:       # %bb.0:
298; RV64I-NEXT:    addi sp, sp, -16
299; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
300; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
301; RV64I-NEXT:    mv s0, a0
302; RV64I-NEXT:    call __addsf3@plt
303; RV64I-NEXT:    not a0, a0
304; RV64I-NEXT:    lui a1, 524288
305; RV64I-NEXT:    and a0, a0, a1
306; RV64I-NEXT:    slli a1, s0, 33
307; RV64I-NEXT:    srli a1, a1, 33
308; RV64I-NEXT:    or a0, a1, a0
309; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
310; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
311; RV64I-NEXT:    addi sp, sp, 16
312; RV64I-NEXT:    ret
313  %1 = fadd float %a, %b
314  %2 = fneg float %1
315  %3 = call float @llvm.copysign.f32(float %a, float %2)
316  ret float %3
317}
318
319declare float @llvm.fabs.f32(float)
320
321; This function performs extra work to ensure that
322; DAGCombiner::visitBITCAST doesn't replace the fabs with an and.
323define float @fabs_s(float %a, float %b) nounwind {
324; RV32IF-LABEL: fabs_s:
325; RV32IF:       # %bb.0:
326; RV32IF-NEXT:    fadd.s ft0, fa0, fa1
327; RV32IF-NEXT:    fabs.s ft1, ft0
328; RV32IF-NEXT:    fadd.s fa0, ft1, ft0
329; RV32IF-NEXT:    ret
330;
331; RV64IF-LABEL: fabs_s:
332; RV64IF:       # %bb.0:
333; RV64IF-NEXT:    fadd.s ft0, fa0, fa1
334; RV64IF-NEXT:    fabs.s ft1, ft0
335; RV64IF-NEXT:    fadd.s fa0, ft1, ft0
336; RV64IF-NEXT:    ret
337;
338; RV32I-LABEL: fabs_s:
339; RV32I:       # %bb.0:
340; RV32I-NEXT:    addi sp, sp, -16
341; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
342; RV32I-NEXT:    call __addsf3@plt
343; RV32I-NEXT:    mv a1, a0
344; RV32I-NEXT:    slli a0, a0, 1
345; RV32I-NEXT:    srli a0, a0, 1
346; RV32I-NEXT:    call __addsf3@plt
347; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
348; RV32I-NEXT:    addi sp, sp, 16
349; RV32I-NEXT:    ret
350;
351; RV64I-LABEL: fabs_s:
352; RV64I:       # %bb.0:
353; RV64I-NEXT:    addi sp, sp, -16
354; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
355; RV64I-NEXT:    call __addsf3@plt
356; RV64I-NEXT:    mv a1, a0
357; RV64I-NEXT:    slli a0, a0, 33
358; RV64I-NEXT:    srli a0, a0, 33
359; RV64I-NEXT:    call __addsf3@plt
360; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
361; RV64I-NEXT:    addi sp, sp, 16
362; RV64I-NEXT:    ret
363  %1 = fadd float %a, %b
364  %2 = call float @llvm.fabs.f32(float %1)
365  %3 = fadd float %2, %1
366  ret float %3
367}
368
369declare float @llvm.minnum.f32(float, float)
370
371define float @fmin_s(float %a, float %b) nounwind {
372; RV32IF-LABEL: fmin_s:
373; RV32IF:       # %bb.0:
374; RV32IF-NEXT:    fmin.s fa0, fa0, fa1
375; RV32IF-NEXT:    ret
376;
377; RV64IF-LABEL: fmin_s:
378; RV64IF:       # %bb.0:
379; RV64IF-NEXT:    fmin.s fa0, fa0, fa1
380; RV64IF-NEXT:    ret
381;
382; RV32I-LABEL: fmin_s:
383; RV32I:       # %bb.0:
384; RV32I-NEXT:    addi sp, sp, -16
385; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
386; RV32I-NEXT:    call fminf@plt
387; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
388; RV32I-NEXT:    addi sp, sp, 16
389; RV32I-NEXT:    ret
390;
391; RV64I-LABEL: fmin_s:
392; RV64I:       # %bb.0:
393; RV64I-NEXT:    addi sp, sp, -16
394; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
395; RV64I-NEXT:    call fminf@plt
396; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
397; RV64I-NEXT:    addi sp, sp, 16
398; RV64I-NEXT:    ret
399  %1 = call float @llvm.minnum.f32(float %a, float %b)
400  ret float %1
401}
402
403declare float @llvm.maxnum.f32(float, float)
404
405define float @fmax_s(float %a, float %b) nounwind {
406; RV32IF-LABEL: fmax_s:
407; RV32IF:       # %bb.0:
408; RV32IF-NEXT:    fmax.s fa0, fa0, fa1
409; RV32IF-NEXT:    ret
410;
411; RV64IF-LABEL: fmax_s:
412; RV64IF:       # %bb.0:
413; RV64IF-NEXT:    fmax.s fa0, fa0, fa1
414; RV64IF-NEXT:    ret
415;
416; RV32I-LABEL: fmax_s:
417; RV32I:       # %bb.0:
418; RV32I-NEXT:    addi sp, sp, -16
419; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
420; RV32I-NEXT:    call fmaxf@plt
421; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
422; RV32I-NEXT:    addi sp, sp, 16
423; RV32I-NEXT:    ret
424;
425; RV64I-LABEL: fmax_s:
426; RV64I:       # %bb.0:
427; RV64I-NEXT:    addi sp, sp, -16
428; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
429; RV64I-NEXT:    call fmaxf@plt
430; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
431; RV64I-NEXT:    addi sp, sp, 16
432; RV64I-NEXT:    ret
433  %1 = call float @llvm.maxnum.f32(float %a, float %b)
434  ret float %1
435}
436
437declare float @llvm.fma.f32(float, float, float)
438
439define float @fmadd_s(float %a, float %b, float %c) nounwind {
440; RV32IF-LABEL: fmadd_s:
441; RV32IF:       # %bb.0:
442; RV32IF-NEXT:    fmadd.s fa0, fa0, fa1, fa2
443; RV32IF-NEXT:    ret
444;
445; RV64IF-LABEL: fmadd_s:
446; RV64IF:       # %bb.0:
447; RV64IF-NEXT:    fmadd.s fa0, fa0, fa1, fa2
448; RV64IF-NEXT:    ret
449;
450; RV32I-LABEL: fmadd_s:
451; RV32I:       # %bb.0:
452; RV32I-NEXT:    addi sp, sp, -16
453; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
454; RV32I-NEXT:    call fmaf@plt
455; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
456; RV32I-NEXT:    addi sp, sp, 16
457; RV32I-NEXT:    ret
458;
459; RV64I-LABEL: fmadd_s:
460; RV64I:       # %bb.0:
461; RV64I-NEXT:    addi sp, sp, -16
462; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
463; RV64I-NEXT:    call fmaf@plt
464; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
465; RV64I-NEXT:    addi sp, sp, 16
466; RV64I-NEXT:    ret
467  %1 = call float @llvm.fma.f32(float %a, float %b, float %c)
468  ret float %1
469}
470
471define float @fmsub_s(float %a, float %b, float %c) nounwind {
472; RV32IF-LABEL: fmsub_s:
473; RV32IF:       # %bb.0:
474; RV32IF-NEXT:    fmv.w.x ft0, zero
475; RV32IF-NEXT:    fadd.s ft0, fa2, ft0
476; RV32IF-NEXT:    fmsub.s fa0, fa0, fa1, ft0
477; RV32IF-NEXT:    ret
478;
479; RV64IF-LABEL: fmsub_s:
480; RV64IF:       # %bb.0:
481; RV64IF-NEXT:    fmv.w.x ft0, zero
482; RV64IF-NEXT:    fadd.s ft0, fa2, ft0
483; RV64IF-NEXT:    fmsub.s fa0, fa0, fa1, ft0
484; RV64IF-NEXT:    ret
485;
486; RV32I-LABEL: fmsub_s:
487; RV32I:       # %bb.0:
488; RV32I-NEXT:    addi sp, sp, -16
489; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
490; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
491; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
492; RV32I-NEXT:    mv s0, a1
493; RV32I-NEXT:    mv s1, a0
494; RV32I-NEXT:    mv a0, a2
495; RV32I-NEXT:    li a1, 0
496; RV32I-NEXT:    call __addsf3@plt
497; RV32I-NEXT:    lui a1, 524288
498; RV32I-NEXT:    xor a2, a0, a1
499; RV32I-NEXT:    mv a0, s1
500; RV32I-NEXT:    mv a1, s0
501; RV32I-NEXT:    call fmaf@plt
502; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
503; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
504; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
505; RV32I-NEXT:    addi sp, sp, 16
506; RV32I-NEXT:    ret
507;
508; RV64I-LABEL: fmsub_s:
509; RV64I:       # %bb.0:
510; RV64I-NEXT:    addi sp, sp, -32
511; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
512; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
513; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
514; RV64I-NEXT:    mv s0, a1
515; RV64I-NEXT:    mv s1, a0
516; RV64I-NEXT:    mv a0, a2
517; RV64I-NEXT:    li a1, 0
518; RV64I-NEXT:    call __addsf3@plt
519; RV64I-NEXT:    lui a1, 524288
520; RV64I-NEXT:    xor a2, a0, a1
521; RV64I-NEXT:    mv a0, s1
522; RV64I-NEXT:    mv a1, s0
523; RV64I-NEXT:    call fmaf@plt
524; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
525; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
526; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
527; RV64I-NEXT:    addi sp, sp, 32
528; RV64I-NEXT:    ret
529  %c_ = fadd float 0.0, %c ; avoid negation using xor
530  %negc = fsub float -0.0, %c_
531  %1 = call float @llvm.fma.f32(float %a, float %b, float %negc)
532  ret float %1
533}
534
535define float @fnmadd_s(float %a, float %b, float %c) nounwind {
536; RV32IF-LABEL: fnmadd_s:
537; RV32IF:       # %bb.0:
538; RV32IF-NEXT:    fmv.w.x ft0, zero
539; RV32IF-NEXT:    fadd.s ft1, fa0, ft0
540; RV32IF-NEXT:    fadd.s ft0, fa2, ft0
541; RV32IF-NEXT:    fnmadd.s fa0, ft1, fa1, ft0
542; RV32IF-NEXT:    ret
543;
544; RV64IF-LABEL: fnmadd_s:
545; RV64IF:       # %bb.0:
546; RV64IF-NEXT:    fmv.w.x ft0, zero
547; RV64IF-NEXT:    fadd.s ft1, fa0, ft0
548; RV64IF-NEXT:    fadd.s ft0, fa2, ft0
549; RV64IF-NEXT:    fnmadd.s fa0, ft1, fa1, ft0
550; RV64IF-NEXT:    ret
551;
552; RV32I-LABEL: fnmadd_s:
553; RV32I:       # %bb.0:
554; RV32I-NEXT:    addi sp, sp, -16
555; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
556; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
557; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
558; RV32I-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
559; RV32I-NEXT:    mv s0, a2
560; RV32I-NEXT:    mv s2, a1
561; RV32I-NEXT:    li a1, 0
562; RV32I-NEXT:    call __addsf3@plt
563; RV32I-NEXT:    mv s1, a0
564; RV32I-NEXT:    mv a0, s0
565; RV32I-NEXT:    li a1, 0
566; RV32I-NEXT:    call __addsf3@plt
567; RV32I-NEXT:    lui a2, 524288
568; RV32I-NEXT:    xor a1, s1, a2
569; RV32I-NEXT:    xor a2, a0, a2
570; RV32I-NEXT:    mv a0, a1
571; RV32I-NEXT:    mv a1, s2
572; RV32I-NEXT:    call fmaf@plt
573; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
574; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
575; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
576; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
577; RV32I-NEXT:    addi sp, sp, 16
578; RV32I-NEXT:    ret
579;
580; RV64I-LABEL: fnmadd_s:
581; RV64I:       # %bb.0:
582; RV64I-NEXT:    addi sp, sp, -32
583; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
584; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
585; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
586; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
587; RV64I-NEXT:    mv s0, a2
588; RV64I-NEXT:    mv s2, a1
589; RV64I-NEXT:    li a1, 0
590; RV64I-NEXT:    call __addsf3@plt
591; RV64I-NEXT:    mv s1, a0
592; RV64I-NEXT:    mv a0, s0
593; RV64I-NEXT:    li a1, 0
594; RV64I-NEXT:    call __addsf3@plt
595; RV64I-NEXT:    lui a2, 524288
596; RV64I-NEXT:    xor a1, s1, a2
597; RV64I-NEXT:    xor a2, a0, a2
598; RV64I-NEXT:    mv a0, a1
599; RV64I-NEXT:    mv a1, s2
600; RV64I-NEXT:    call fmaf@plt
601; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
602; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
603; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
604; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
605; RV64I-NEXT:    addi sp, sp, 32
606; RV64I-NEXT:    ret
607  %a_ = fadd float 0.0, %a
608  %c_ = fadd float 0.0, %c
609  %nega = fsub float -0.0, %a_
610  %negc = fsub float -0.0, %c_
611  %1 = call float @llvm.fma.f32(float %nega, float %b, float %negc)
612  ret float %1
613}
614
615define float @fnmadd_s_2(float %a, float %b, float %c) nounwind {
616; RV32IF-LABEL: fnmadd_s_2:
617; RV32IF:       # %bb.0:
618; RV32IF-NEXT:    fmv.w.x ft0, zero
619; RV32IF-NEXT:    fadd.s ft1, fa1, ft0
620; RV32IF-NEXT:    fadd.s ft0, fa2, ft0
621; RV32IF-NEXT:    fnmadd.s fa0, ft1, fa0, ft0
622; RV32IF-NEXT:    ret
623;
624; RV64IF-LABEL: fnmadd_s_2:
625; RV64IF:       # %bb.0:
626; RV64IF-NEXT:    fmv.w.x ft0, zero
627; RV64IF-NEXT:    fadd.s ft1, fa1, ft0
628; RV64IF-NEXT:    fadd.s ft0, fa2, ft0
629; RV64IF-NEXT:    fnmadd.s fa0, ft1, fa0, ft0
630; RV64IF-NEXT:    ret
631;
632; RV32I-LABEL: fnmadd_s_2:
633; RV32I:       # %bb.0:
634; RV32I-NEXT:    addi sp, sp, -16
635; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
636; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
637; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
638; RV32I-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
639; RV32I-NEXT:    mv s0, a2
640; RV32I-NEXT:    mv s2, a0
641; RV32I-NEXT:    mv a0, a1
642; RV32I-NEXT:    li a1, 0
643; RV32I-NEXT:    call __addsf3@plt
644; RV32I-NEXT:    mv s1, a0
645; RV32I-NEXT:    mv a0, s0
646; RV32I-NEXT:    li a1, 0
647; RV32I-NEXT:    call __addsf3@plt
648; RV32I-NEXT:    lui a2, 524288
649; RV32I-NEXT:    xor a1, s1, a2
650; RV32I-NEXT:    xor a2, a0, a2
651; RV32I-NEXT:    mv a0, s2
652; RV32I-NEXT:    call fmaf@plt
653; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
654; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
655; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
656; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
657; RV32I-NEXT:    addi sp, sp, 16
658; RV32I-NEXT:    ret
659;
660; RV64I-LABEL: fnmadd_s_2:
661; RV64I:       # %bb.0:
662; RV64I-NEXT:    addi sp, sp, -32
663; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
664; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
665; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
666; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
667; RV64I-NEXT:    mv s0, a2
668; RV64I-NEXT:    mv s2, a0
669; RV64I-NEXT:    mv a0, a1
670; RV64I-NEXT:    li a1, 0
671; RV64I-NEXT:    call __addsf3@plt
672; RV64I-NEXT:    mv s1, a0
673; RV64I-NEXT:    mv a0, s0
674; RV64I-NEXT:    li a1, 0
675; RV64I-NEXT:    call __addsf3@plt
676; RV64I-NEXT:    lui a2, 524288
677; RV64I-NEXT:    xor a1, s1, a2
678; RV64I-NEXT:    xor a2, a0, a2
679; RV64I-NEXT:    mv a0, s2
680; RV64I-NEXT:    call fmaf@plt
681; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
682; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
683; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
684; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
685; RV64I-NEXT:    addi sp, sp, 32
686; RV64I-NEXT:    ret
687  %b_ = fadd float 0.0, %b
688  %c_ = fadd float 0.0, %c
689  %negb = fsub float -0.0, %b_
690  %negc = fsub float -0.0, %c_
691  %1 = call float @llvm.fma.f32(float %a, float %negb, float %negc)
692  ret float %1
693}
694
695define float @fnmsub_s(float %a, float %b, float %c) nounwind {
696; RV32IF-LABEL: fnmsub_s:
697; RV32IF:       # %bb.0:
698; RV32IF-NEXT:    fmv.w.x ft0, zero
699; RV32IF-NEXT:    fadd.s ft0, fa0, ft0
700; RV32IF-NEXT:    fnmsub.s fa0, ft0, fa1, fa2
701; RV32IF-NEXT:    ret
702;
703; RV64IF-LABEL: fnmsub_s:
704; RV64IF:       # %bb.0:
705; RV64IF-NEXT:    fmv.w.x ft0, zero
706; RV64IF-NEXT:    fadd.s ft0, fa0, ft0
707; RV64IF-NEXT:    fnmsub.s fa0, ft0, fa1, fa2
708; RV64IF-NEXT:    ret
709;
710; RV32I-LABEL: fnmsub_s:
711; RV32I:       # %bb.0:
712; RV32I-NEXT:    addi sp, sp, -16
713; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
714; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
715; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
716; RV32I-NEXT:    mv s0, a2
717; RV32I-NEXT:    mv s1, a1
718; RV32I-NEXT:    li a1, 0
719; RV32I-NEXT:    call __addsf3@plt
720; RV32I-NEXT:    lui a1, 524288
721; RV32I-NEXT:    xor a0, a0, a1
722; RV32I-NEXT:    mv a1, s1
723; RV32I-NEXT:    mv a2, s0
724; RV32I-NEXT:    call fmaf@plt
725; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
726; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
727; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
728; RV32I-NEXT:    addi sp, sp, 16
729; RV32I-NEXT:    ret
730;
731; RV64I-LABEL: fnmsub_s:
732; RV64I:       # %bb.0:
733; RV64I-NEXT:    addi sp, sp, -32
734; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
735; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
736; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
737; RV64I-NEXT:    mv s0, a2
738; RV64I-NEXT:    mv s1, a1
739; RV64I-NEXT:    li a1, 0
740; RV64I-NEXT:    call __addsf3@plt
741; RV64I-NEXT:    lui a1, 524288
742; RV64I-NEXT:    xor a0, a0, a1
743; RV64I-NEXT:    mv a1, s1
744; RV64I-NEXT:    mv a2, s0
745; RV64I-NEXT:    call fmaf@plt
746; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
747; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
748; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
749; RV64I-NEXT:    addi sp, sp, 32
750; RV64I-NEXT:    ret
751  %a_ = fadd float 0.0, %a
752  %nega = fsub float -0.0, %a_
753  %1 = call float @llvm.fma.f32(float %nega, float %b, float %c)
754  ret float %1
755}
756
757define float @fnmsub_s_2(float %a, float %b, float %c) nounwind {
758; RV32IF-LABEL: fnmsub_s_2:
759; RV32IF:       # %bb.0:
760; RV32IF-NEXT:    fmv.w.x ft0, zero
761; RV32IF-NEXT:    fadd.s ft0, fa1, ft0
762; RV32IF-NEXT:    fnmsub.s fa0, ft0, fa0, fa2
763; RV32IF-NEXT:    ret
764;
765; RV64IF-LABEL: fnmsub_s_2:
766; RV64IF:       # %bb.0:
767; RV64IF-NEXT:    fmv.w.x ft0, zero
768; RV64IF-NEXT:    fadd.s ft0, fa1, ft0
769; RV64IF-NEXT:    fnmsub.s fa0, ft0, fa0, fa2
770; RV64IF-NEXT:    ret
771;
772; RV32I-LABEL: fnmsub_s_2:
773; RV32I:       # %bb.0:
774; RV32I-NEXT:    addi sp, sp, -16
775; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
776; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
777; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
778; RV32I-NEXT:    mv s0, a2
779; RV32I-NEXT:    mv s1, a0
780; RV32I-NEXT:    mv a0, a1
781; RV32I-NEXT:    li a1, 0
782; RV32I-NEXT:    call __addsf3@plt
783; RV32I-NEXT:    lui a1, 524288
784; RV32I-NEXT:    xor a1, a0, a1
785; RV32I-NEXT:    mv a0, s1
786; RV32I-NEXT:    mv a2, s0
787; RV32I-NEXT:    call fmaf@plt
788; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
789; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
790; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
791; RV32I-NEXT:    addi sp, sp, 16
792; RV32I-NEXT:    ret
793;
794; RV64I-LABEL: fnmsub_s_2:
795; RV64I:       # %bb.0:
796; RV64I-NEXT:    addi sp, sp, -32
797; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
798; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
799; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
800; RV64I-NEXT:    mv s0, a2
801; RV64I-NEXT:    mv s1, a0
802; RV64I-NEXT:    mv a0, a1
803; RV64I-NEXT:    li a1, 0
804; RV64I-NEXT:    call __addsf3@plt
805; RV64I-NEXT:    lui a1, 524288
806; RV64I-NEXT:    xor a1, a0, a1
807; RV64I-NEXT:    mv a0, s1
808; RV64I-NEXT:    mv a2, s0
809; RV64I-NEXT:    call fmaf@plt
810; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
811; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
812; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
813; RV64I-NEXT:    addi sp, sp, 32
814; RV64I-NEXT:    ret
815  %b_ = fadd float 0.0, %b
816  %negb = fsub float -0.0, %b_
817  %1 = call float @llvm.fma.f32(float %a, float %negb, float %c)
818  ret float %1
819}
820
821define float @fmadd_s_contract(float %a, float %b, float %c) nounwind {
822; RV32IF-LABEL: fmadd_s_contract:
823; RV32IF:       # %bb.0:
824; RV32IF-NEXT:    fmadd.s fa0, fa0, fa1, fa2
825; RV32IF-NEXT:    ret
826;
827; RV64IF-LABEL: fmadd_s_contract:
828; RV64IF:       # %bb.0:
829; RV64IF-NEXT:    fmadd.s fa0, fa0, fa1, fa2
830; RV64IF-NEXT:    ret
831;
832; RV32I-LABEL: fmadd_s_contract:
833; RV32I:       # %bb.0:
834; RV32I-NEXT:    addi sp, sp, -16
835; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
836; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
837; RV32I-NEXT:    mv s0, a2
838; RV32I-NEXT:    call __mulsf3@plt
839; RV32I-NEXT:    mv a1, s0
840; RV32I-NEXT:    call __addsf3@plt
841; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
842; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
843; RV32I-NEXT:    addi sp, sp, 16
844; RV32I-NEXT:    ret
845;
846; RV64I-LABEL: fmadd_s_contract:
847; RV64I:       # %bb.0:
848; RV64I-NEXT:    addi sp, sp, -16
849; RV64I-NEXT:    sd ra, 8(sp) # 8-byte Folded Spill
850; RV64I-NEXT:    sd s0, 0(sp) # 8-byte Folded Spill
851; RV64I-NEXT:    mv s0, a2
852; RV64I-NEXT:    call __mulsf3@plt
853; RV64I-NEXT:    mv a1, s0
854; RV64I-NEXT:    call __addsf3@plt
855; RV64I-NEXT:    ld ra, 8(sp) # 8-byte Folded Reload
856; RV64I-NEXT:    ld s0, 0(sp) # 8-byte Folded Reload
857; RV64I-NEXT:    addi sp, sp, 16
858; RV64I-NEXT:    ret
859  %1 = fmul contract float %a, %b
860  %2 = fadd contract float %1, %c
861  ret float %2
862}
863
864define float @fmsub_s_contract(float %a, float %b, float %c) nounwind {
865; RV32IF-LABEL: fmsub_s_contract:
866; RV32IF:       # %bb.0:
867; RV32IF-NEXT:    fmv.w.x ft0, zero
868; RV32IF-NEXT:    fadd.s ft0, fa2, ft0
869; RV32IF-NEXT:    fmsub.s fa0, fa0, fa1, ft0
870; RV32IF-NEXT:    ret
871;
872; RV64IF-LABEL: fmsub_s_contract:
873; RV64IF:       # %bb.0:
874; RV64IF-NEXT:    fmv.w.x ft0, zero
875; RV64IF-NEXT:    fadd.s ft0, fa2, ft0
876; RV64IF-NEXT:    fmsub.s fa0, fa0, fa1, ft0
877; RV64IF-NEXT:    ret
878;
879; RV32I-LABEL: fmsub_s_contract:
880; RV32I:       # %bb.0:
881; RV32I-NEXT:    addi sp, sp, -16
882; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
883; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
884; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
885; RV32I-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
886; RV32I-NEXT:    mv s2, a1
887; RV32I-NEXT:    mv s1, a0
888; RV32I-NEXT:    mv a0, a2
889; RV32I-NEXT:    li a1, 0
890; RV32I-NEXT:    call __addsf3@plt
891; RV32I-NEXT:    mv s0, a0
892; RV32I-NEXT:    mv a0, s1
893; RV32I-NEXT:    mv a1, s2
894; RV32I-NEXT:    call __mulsf3@plt
895; RV32I-NEXT:    mv a1, s0
896; RV32I-NEXT:    call __subsf3@plt
897; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
898; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
899; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
900; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
901; RV32I-NEXT:    addi sp, sp, 16
902; RV32I-NEXT:    ret
903;
904; RV64I-LABEL: fmsub_s_contract:
905; RV64I:       # %bb.0:
906; RV64I-NEXT:    addi sp, sp, -32
907; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
908; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
909; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
910; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
911; RV64I-NEXT:    mv s2, a1
912; RV64I-NEXT:    mv s1, a0
913; RV64I-NEXT:    mv a0, a2
914; RV64I-NEXT:    li a1, 0
915; RV64I-NEXT:    call __addsf3@plt
916; RV64I-NEXT:    mv s0, a0
917; RV64I-NEXT:    mv a0, s1
918; RV64I-NEXT:    mv a1, s2
919; RV64I-NEXT:    call __mulsf3@plt
920; RV64I-NEXT:    mv a1, s0
921; RV64I-NEXT:    call __subsf3@plt
922; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
923; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
924; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
925; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
926; RV64I-NEXT:    addi sp, sp, 32
927; RV64I-NEXT:    ret
928  %c_ = fadd float 0.0, %c ; avoid negation using xor
929  %1 = fmul contract float %a, %b
930  %2 = fsub contract float %1, %c_
931  ret float %2
932}
933
934define float @fnmadd_s_contract(float %a, float %b, float %c) nounwind {
935; RV32IF-LABEL: fnmadd_s_contract:
936; RV32IF:       # %bb.0:
937; RV32IF-NEXT:    fmv.w.x ft0, zero
938; RV32IF-NEXT:    fadd.s ft1, fa0, ft0
939; RV32IF-NEXT:    fadd.s ft2, fa1, ft0
940; RV32IF-NEXT:    fadd.s ft0, fa2, ft0
941; RV32IF-NEXT:    fnmadd.s fa0, ft1, ft2, ft0
942; RV32IF-NEXT:    ret
943;
944; RV64IF-LABEL: fnmadd_s_contract:
945; RV64IF:       # %bb.0:
946; RV64IF-NEXT:    fmv.w.x ft0, zero
947; RV64IF-NEXT:    fadd.s ft1, fa0, ft0
948; RV64IF-NEXT:    fadd.s ft2, fa1, ft0
949; RV64IF-NEXT:    fadd.s ft0, fa2, ft0
950; RV64IF-NEXT:    fnmadd.s fa0, ft1, ft2, ft0
951; RV64IF-NEXT:    ret
952;
953; RV32I-LABEL: fnmadd_s_contract:
954; RV32I:       # %bb.0:
955; RV32I-NEXT:    addi sp, sp, -32
956; RV32I-NEXT:    sw ra, 28(sp) # 4-byte Folded Spill
957; RV32I-NEXT:    sw s0, 24(sp) # 4-byte Folded Spill
958; RV32I-NEXT:    sw s1, 20(sp) # 4-byte Folded Spill
959; RV32I-NEXT:    sw s2, 16(sp) # 4-byte Folded Spill
960; RV32I-NEXT:    sw s3, 12(sp) # 4-byte Folded Spill
961; RV32I-NEXT:    mv s2, a2
962; RV32I-NEXT:    mv s1, a1
963; RV32I-NEXT:    li a1, 0
964; RV32I-NEXT:    call __addsf3@plt
965; RV32I-NEXT:    mv s3, a0
966; RV32I-NEXT:    mv a0, s1
967; RV32I-NEXT:    li a1, 0
968; RV32I-NEXT:    call __addsf3@plt
969; RV32I-NEXT:    mv s1, a0
970; RV32I-NEXT:    mv a0, s2
971; RV32I-NEXT:    li a1, 0
972; RV32I-NEXT:    call __addsf3@plt
973; RV32I-NEXT:    mv s0, a0
974; RV32I-NEXT:    mv a0, s3
975; RV32I-NEXT:    mv a1, s1
976; RV32I-NEXT:    call __mulsf3@plt
977; RV32I-NEXT:    lui a1, 524288
978; RV32I-NEXT:    xor a0, a0, a1
979; RV32I-NEXT:    mv a1, s0
980; RV32I-NEXT:    call __subsf3@plt
981; RV32I-NEXT:    lw ra, 28(sp) # 4-byte Folded Reload
982; RV32I-NEXT:    lw s0, 24(sp) # 4-byte Folded Reload
983; RV32I-NEXT:    lw s1, 20(sp) # 4-byte Folded Reload
984; RV32I-NEXT:    lw s2, 16(sp) # 4-byte Folded Reload
985; RV32I-NEXT:    lw s3, 12(sp) # 4-byte Folded Reload
986; RV32I-NEXT:    addi sp, sp, 32
987; RV32I-NEXT:    ret
988;
989; RV64I-LABEL: fnmadd_s_contract:
990; RV64I:       # %bb.0:
991; RV64I-NEXT:    addi sp, sp, -48
992; RV64I-NEXT:    sd ra, 40(sp) # 8-byte Folded Spill
993; RV64I-NEXT:    sd s0, 32(sp) # 8-byte Folded Spill
994; RV64I-NEXT:    sd s1, 24(sp) # 8-byte Folded Spill
995; RV64I-NEXT:    sd s2, 16(sp) # 8-byte Folded Spill
996; RV64I-NEXT:    sd s3, 8(sp) # 8-byte Folded Spill
997; RV64I-NEXT:    mv s2, a2
998; RV64I-NEXT:    mv s1, a1
999; RV64I-NEXT:    li a1, 0
1000; RV64I-NEXT:    call __addsf3@plt
1001; RV64I-NEXT:    mv s3, a0
1002; RV64I-NEXT:    mv a0, s1
1003; RV64I-NEXT:    li a1, 0
1004; RV64I-NEXT:    call __addsf3@plt
1005; RV64I-NEXT:    mv s1, a0
1006; RV64I-NEXT:    mv a0, s2
1007; RV64I-NEXT:    li a1, 0
1008; RV64I-NEXT:    call __addsf3@plt
1009; RV64I-NEXT:    mv s0, a0
1010; RV64I-NEXT:    mv a0, s3
1011; RV64I-NEXT:    mv a1, s1
1012; RV64I-NEXT:    call __mulsf3@plt
1013; RV64I-NEXT:    lui a1, 524288
1014; RV64I-NEXT:    xor a0, a0, a1
1015; RV64I-NEXT:    mv a1, s0
1016; RV64I-NEXT:    call __subsf3@plt
1017; RV64I-NEXT:    ld ra, 40(sp) # 8-byte Folded Reload
1018; RV64I-NEXT:    ld s0, 32(sp) # 8-byte Folded Reload
1019; RV64I-NEXT:    ld s1, 24(sp) # 8-byte Folded Reload
1020; RV64I-NEXT:    ld s2, 16(sp) # 8-byte Folded Reload
1021; RV64I-NEXT:    ld s3, 8(sp) # 8-byte Folded Reload
1022; RV64I-NEXT:    addi sp, sp, 48
1023; RV64I-NEXT:    ret
1024  %a_ = fadd float 0.0, %a ; avoid negation using xor
1025  %b_ = fadd float 0.0, %b ; avoid negation using xor
1026  %c_ = fadd float 0.0, %c ; avoid negation using xor
1027  %1 = fmul contract float %a_, %b_
1028  %2 = fneg float %1
1029  %3 = fsub contract float %2, %c_
1030  ret float %3
1031}
1032
1033define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind {
1034; RV32IF-LABEL: fnmsub_s_contract:
1035; RV32IF:       # %bb.0:
1036; RV32IF-NEXT:    fmv.w.x ft0, zero
1037; RV32IF-NEXT:    fadd.s ft1, fa0, ft0
1038; RV32IF-NEXT:    fadd.s ft0, fa1, ft0
1039; RV32IF-NEXT:    fnmsub.s fa0, ft1, ft0, fa2
1040; RV32IF-NEXT:    ret
1041;
1042; RV64IF-LABEL: fnmsub_s_contract:
1043; RV64IF:       # %bb.0:
1044; RV64IF-NEXT:    fmv.w.x ft0, zero
1045; RV64IF-NEXT:    fadd.s ft1, fa0, ft0
1046; RV64IF-NEXT:    fadd.s ft0, fa1, ft0
1047; RV64IF-NEXT:    fnmsub.s fa0, ft1, ft0, fa2
1048; RV64IF-NEXT:    ret
1049;
1050; RV32I-LABEL: fnmsub_s_contract:
1051; RV32I:       # %bb.0:
1052; RV32I-NEXT:    addi sp, sp, -16
1053; RV32I-NEXT:    sw ra, 12(sp) # 4-byte Folded Spill
1054; RV32I-NEXT:    sw s0, 8(sp) # 4-byte Folded Spill
1055; RV32I-NEXT:    sw s1, 4(sp) # 4-byte Folded Spill
1056; RV32I-NEXT:    sw s2, 0(sp) # 4-byte Folded Spill
1057; RV32I-NEXT:    mv s2, a2
1058; RV32I-NEXT:    mv s1, a1
1059; RV32I-NEXT:    li a1, 0
1060; RV32I-NEXT:    call __addsf3@plt
1061; RV32I-NEXT:    mv s0, a0
1062; RV32I-NEXT:    mv a0, s1
1063; RV32I-NEXT:    li a1, 0
1064; RV32I-NEXT:    call __addsf3@plt
1065; RV32I-NEXT:    mv a1, a0
1066; RV32I-NEXT:    mv a0, s0
1067; RV32I-NEXT:    call __mulsf3@plt
1068; RV32I-NEXT:    mv a1, a0
1069; RV32I-NEXT:    mv a0, s2
1070; RV32I-NEXT:    call __subsf3@plt
1071; RV32I-NEXT:    lw ra, 12(sp) # 4-byte Folded Reload
1072; RV32I-NEXT:    lw s0, 8(sp) # 4-byte Folded Reload
1073; RV32I-NEXT:    lw s1, 4(sp) # 4-byte Folded Reload
1074; RV32I-NEXT:    lw s2, 0(sp) # 4-byte Folded Reload
1075; RV32I-NEXT:    addi sp, sp, 16
1076; RV32I-NEXT:    ret
1077;
1078; RV64I-LABEL: fnmsub_s_contract:
1079; RV64I:       # %bb.0:
1080; RV64I-NEXT:    addi sp, sp, -32
1081; RV64I-NEXT:    sd ra, 24(sp) # 8-byte Folded Spill
1082; RV64I-NEXT:    sd s0, 16(sp) # 8-byte Folded Spill
1083; RV64I-NEXT:    sd s1, 8(sp) # 8-byte Folded Spill
1084; RV64I-NEXT:    sd s2, 0(sp) # 8-byte Folded Spill
1085; RV64I-NEXT:    mv s2, a2
1086; RV64I-NEXT:    mv s1, a1
1087; RV64I-NEXT:    li a1, 0
1088; RV64I-NEXT:    call __addsf3@plt
1089; RV64I-NEXT:    mv s0, a0
1090; RV64I-NEXT:    mv a0, s1
1091; RV64I-NEXT:    li a1, 0
1092; RV64I-NEXT:    call __addsf3@plt
1093; RV64I-NEXT:    mv a1, a0
1094; RV64I-NEXT:    mv a0, s0
1095; RV64I-NEXT:    call __mulsf3@plt
1096; RV64I-NEXT:    mv a1, a0
1097; RV64I-NEXT:    mv a0, s2
1098; RV64I-NEXT:    call __subsf3@plt
1099; RV64I-NEXT:    ld ra, 24(sp) # 8-byte Folded Reload
1100; RV64I-NEXT:    ld s0, 16(sp) # 8-byte Folded Reload
1101; RV64I-NEXT:    ld s1, 8(sp) # 8-byte Folded Reload
1102; RV64I-NEXT:    ld s2, 0(sp) # 8-byte Folded Reload
1103; RV64I-NEXT:    addi sp, sp, 32
1104; RV64I-NEXT:    ret
1105  %a_ = fadd float 0.0, %a ; avoid negation using xor
1106  %b_ = fadd float 0.0, %b ; avoid negation using xor
1107  %1 = fmul contract float %a_, %b_
1108  %2 = fsub contract float %c, %1
1109  ret float %2
1110}
1111