1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mtriple=riscv32 -mattr=+f -verify-machineinstrs < %s \ 3; RUN: | FileCheck -check-prefix=RV32IF %s 4; RUN: llc -mtriple=riscv64 -mattr=+f -verify-machineinstrs < %s \ 5; RUN: | FileCheck -check-prefix=RV64IF %s 6; RUN: llc -mtriple=riscv32 -verify-machineinstrs < %s \ 7; RUN: | FileCheck -check-prefix=RV32I %s 8; RUN: llc -mtriple=riscv64 -verify-machineinstrs < %s \ 9; RUN: | FileCheck -check-prefix=RV64I %s 10 11; These tests are each targeted at a particular RISC-V FPU instruction. Most 12; other files in this folder exercise LLVM IR instructions that don't directly 13; match a RISC-V instruction. 14 15define float @fadd_s(float %a, float %b) nounwind { 16; RV32IF-LABEL: fadd_s: 17; RV32IF: # %bb.0: 18; RV32IF-NEXT: fmv.w.x ft0, a1 19; RV32IF-NEXT: fmv.w.x ft1, a0 20; RV32IF-NEXT: fadd.s ft0, ft1, ft0 21; RV32IF-NEXT: fmv.x.w a0, ft0 22; RV32IF-NEXT: ret 23; 24; RV64IF-LABEL: fadd_s: 25; RV64IF: # %bb.0: 26; RV64IF-NEXT: fmv.w.x ft0, a1 27; RV64IF-NEXT: fmv.w.x ft1, a0 28; RV64IF-NEXT: fadd.s ft0, ft1, ft0 29; RV64IF-NEXT: fmv.x.w a0, ft0 30; RV64IF-NEXT: ret 31; 32; RV32I-LABEL: fadd_s: 33; RV32I: # %bb.0: 34; RV32I-NEXT: addi sp, sp, -16 35; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 36; RV32I-NEXT: call __addsf3@plt 37; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 38; RV32I-NEXT: addi sp, sp, 16 39; RV32I-NEXT: ret 40; 41; RV64I-LABEL: fadd_s: 42; RV64I: # %bb.0: 43; RV64I-NEXT: addi sp, sp, -16 44; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 45; RV64I-NEXT: call __addsf3@plt 46; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 47; RV64I-NEXT: addi sp, sp, 16 48; RV64I-NEXT: ret 49 %1 = fadd float %a, %b 50 ret float %1 51} 52 53define float @fsub_s(float %a, float %b) nounwind { 54; RV32IF-LABEL: fsub_s: 55; RV32IF: # %bb.0: 56; RV32IF-NEXT: fmv.w.x ft0, a1 57; RV32IF-NEXT: fmv.w.x ft1, a0 58; RV32IF-NEXT: fsub.s ft0, ft1, ft0 59; RV32IF-NEXT: fmv.x.w a0, ft0 60; RV32IF-NEXT: ret 61; 62; RV64IF-LABEL: fsub_s: 63; RV64IF: # %bb.0: 64; RV64IF-NEXT: fmv.w.x ft0, a1 65; RV64IF-NEXT: fmv.w.x ft1, a0 66; RV64IF-NEXT: fsub.s ft0, ft1, ft0 67; RV64IF-NEXT: fmv.x.w a0, ft0 68; RV64IF-NEXT: ret 69; 70; RV32I-LABEL: fsub_s: 71; RV32I: # %bb.0: 72; RV32I-NEXT: addi sp, sp, -16 73; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 74; RV32I-NEXT: call __subsf3@plt 75; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 76; RV32I-NEXT: addi sp, sp, 16 77; RV32I-NEXT: ret 78; 79; RV64I-LABEL: fsub_s: 80; RV64I: # %bb.0: 81; RV64I-NEXT: addi sp, sp, -16 82; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 83; RV64I-NEXT: call __subsf3@plt 84; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 85; RV64I-NEXT: addi sp, sp, 16 86; RV64I-NEXT: ret 87 %1 = fsub float %a, %b 88 ret float %1 89} 90 91define float @fmul_s(float %a, float %b) nounwind { 92; RV32IF-LABEL: fmul_s: 93; RV32IF: # %bb.0: 94; RV32IF-NEXT: fmv.w.x ft0, a1 95; RV32IF-NEXT: fmv.w.x ft1, a0 96; RV32IF-NEXT: fmul.s ft0, ft1, ft0 97; RV32IF-NEXT: fmv.x.w a0, ft0 98; RV32IF-NEXT: ret 99; 100; RV64IF-LABEL: fmul_s: 101; RV64IF: # %bb.0: 102; RV64IF-NEXT: fmv.w.x ft0, a1 103; RV64IF-NEXT: fmv.w.x ft1, a0 104; RV64IF-NEXT: fmul.s ft0, ft1, ft0 105; RV64IF-NEXT: fmv.x.w a0, ft0 106; RV64IF-NEXT: ret 107; 108; RV32I-LABEL: fmul_s: 109; RV32I: # %bb.0: 110; RV32I-NEXT: addi sp, sp, -16 111; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 112; RV32I-NEXT: call __mulsf3@plt 113; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 114; RV32I-NEXT: addi sp, sp, 16 115; RV32I-NEXT: ret 116; 117; RV64I-LABEL: fmul_s: 118; RV64I: # %bb.0: 119; RV64I-NEXT: addi sp, sp, -16 120; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 121; RV64I-NEXT: call __mulsf3@plt 122; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 123; RV64I-NEXT: addi sp, sp, 16 124; RV64I-NEXT: ret 125 %1 = fmul float %a, %b 126 ret float %1 127} 128 129define float @fdiv_s(float %a, float %b) nounwind { 130; RV32IF-LABEL: fdiv_s: 131; RV32IF: # %bb.0: 132; RV32IF-NEXT: fmv.w.x ft0, a1 133; RV32IF-NEXT: fmv.w.x ft1, a0 134; RV32IF-NEXT: fdiv.s ft0, ft1, ft0 135; RV32IF-NEXT: fmv.x.w a0, ft0 136; RV32IF-NEXT: ret 137; 138; RV64IF-LABEL: fdiv_s: 139; RV64IF: # %bb.0: 140; RV64IF-NEXT: fmv.w.x ft0, a1 141; RV64IF-NEXT: fmv.w.x ft1, a0 142; RV64IF-NEXT: fdiv.s ft0, ft1, ft0 143; RV64IF-NEXT: fmv.x.w a0, ft0 144; RV64IF-NEXT: ret 145; 146; RV32I-LABEL: fdiv_s: 147; RV32I: # %bb.0: 148; RV32I-NEXT: addi sp, sp, -16 149; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 150; RV32I-NEXT: call __divsf3@plt 151; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 152; RV32I-NEXT: addi sp, sp, 16 153; RV32I-NEXT: ret 154; 155; RV64I-LABEL: fdiv_s: 156; RV64I: # %bb.0: 157; RV64I-NEXT: addi sp, sp, -16 158; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 159; RV64I-NEXT: call __divsf3@plt 160; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 161; RV64I-NEXT: addi sp, sp, 16 162; RV64I-NEXT: ret 163 %1 = fdiv float %a, %b 164 ret float %1 165} 166 167declare float @llvm.sqrt.f32(float) 168 169define float @fsqrt_s(float %a) nounwind { 170; RV32IF-LABEL: fsqrt_s: 171; RV32IF: # %bb.0: 172; RV32IF-NEXT: fmv.w.x ft0, a0 173; RV32IF-NEXT: fsqrt.s ft0, ft0 174; RV32IF-NEXT: fmv.x.w a0, ft0 175; RV32IF-NEXT: ret 176; 177; RV64IF-LABEL: fsqrt_s: 178; RV64IF: # %bb.0: 179; RV64IF-NEXT: fmv.w.x ft0, a0 180; RV64IF-NEXT: fsqrt.s ft0, ft0 181; RV64IF-NEXT: fmv.x.w a0, ft0 182; RV64IF-NEXT: ret 183; 184; RV32I-LABEL: fsqrt_s: 185; RV32I: # %bb.0: 186; RV32I-NEXT: addi sp, sp, -16 187; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 188; RV32I-NEXT: call sqrtf@plt 189; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 190; RV32I-NEXT: addi sp, sp, 16 191; RV32I-NEXT: ret 192; 193; RV64I-LABEL: fsqrt_s: 194; RV64I: # %bb.0: 195; RV64I-NEXT: addi sp, sp, -16 196; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 197; RV64I-NEXT: call sqrtf@plt 198; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 199; RV64I-NEXT: addi sp, sp, 16 200; RV64I-NEXT: ret 201 %1 = call float @llvm.sqrt.f32(float %a) 202 ret float %1 203} 204 205declare float @llvm.copysign.f32(float, float) 206 207define float @fsgnj_s(float %a, float %b) nounwind { 208; RV32IF-LABEL: fsgnj_s: 209; RV32IF: # %bb.0: 210; RV32IF-NEXT: fmv.w.x ft0, a1 211; RV32IF-NEXT: fmv.w.x ft1, a0 212; RV32IF-NEXT: fsgnj.s ft0, ft1, ft0 213; RV32IF-NEXT: fmv.x.w a0, ft0 214; RV32IF-NEXT: ret 215; 216; RV64IF-LABEL: fsgnj_s: 217; RV64IF: # %bb.0: 218; RV64IF-NEXT: fmv.w.x ft0, a1 219; RV64IF-NEXT: fmv.w.x ft1, a0 220; RV64IF-NEXT: fsgnj.s ft0, ft1, ft0 221; RV64IF-NEXT: fmv.x.w a0, ft0 222; RV64IF-NEXT: ret 223; 224; RV32I-LABEL: fsgnj_s: 225; RV32I: # %bb.0: 226; RV32I-NEXT: lui a2, 524288 227; RV32I-NEXT: and a1, a1, a2 228; RV32I-NEXT: addi a2, a2, -1 229; RV32I-NEXT: and a0, a0, a2 230; RV32I-NEXT: or a0, a0, a1 231; RV32I-NEXT: ret 232; 233; RV64I-LABEL: fsgnj_s: 234; RV64I: # %bb.0: 235; RV64I-NEXT: lui a2, 524288 236; RV64I-NEXT: and a1, a1, a2 237; RV64I-NEXT: addiw a2, a2, -1 238; RV64I-NEXT: and a0, a0, a2 239; RV64I-NEXT: or a0, a0, a1 240; RV64I-NEXT: ret 241 %1 = call float @llvm.copysign.f32(float %a, float %b) 242 ret float %1 243} 244 245; This function performs extra work to ensure that 246; DAGCombiner::visitBITCAST doesn't replace the fneg with an xor. 247define i32 @fneg_s(float %a, float %b) nounwind { 248; RV32IF-LABEL: fneg_s: 249; RV32IF: # %bb.0: 250; RV32IF-NEXT: fmv.w.x ft0, a0 251; RV32IF-NEXT: fadd.s ft0, ft0, ft0 252; RV32IF-NEXT: fneg.s ft1, ft0 253; RV32IF-NEXT: feq.s a0, ft0, ft1 254; RV32IF-NEXT: ret 255; 256; RV64IF-LABEL: fneg_s: 257; RV64IF: # %bb.0: 258; RV64IF-NEXT: fmv.w.x ft0, a0 259; RV64IF-NEXT: fadd.s ft0, ft0, ft0 260; RV64IF-NEXT: fneg.s ft1, ft0 261; RV64IF-NEXT: feq.s a0, ft0, ft1 262; RV64IF-NEXT: ret 263; 264; RV32I-LABEL: fneg_s: 265; RV32I: # %bb.0: 266; RV32I-NEXT: addi sp, sp, -16 267; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 268; RV32I-NEXT: mv a1, a0 269; RV32I-NEXT: call __addsf3@plt 270; RV32I-NEXT: lui a1, 524288 271; RV32I-NEXT: xor a1, a0, a1 272; RV32I-NEXT: call __eqsf2@plt 273; RV32I-NEXT: seqz a0, a0 274; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 275; RV32I-NEXT: addi sp, sp, 16 276; RV32I-NEXT: ret 277; 278; RV64I-LABEL: fneg_s: 279; RV64I: # %bb.0: 280; RV64I-NEXT: addi sp, sp, -16 281; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 282; RV64I-NEXT: mv a1, a0 283; RV64I-NEXT: call __addsf3@plt 284; RV64I-NEXT: lui a1, 524288 285; RV64I-NEXT: xor a1, a0, a1 286; RV64I-NEXT: call __eqsf2@plt 287; RV64I-NEXT: seqz a0, a0 288; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 289; RV64I-NEXT: addi sp, sp, 16 290; RV64I-NEXT: ret 291 %1 = fadd float %a, %a 292 %2 = fneg float %1 293 %3 = fcmp oeq float %1, %2 294 %4 = zext i1 %3 to i32 295 ret i32 %4 296} 297 298; This function performs extra work to ensure that 299; DAGCombiner::visitBITCAST doesn't replace the fneg with an xor. 300define float @fsgnjn_s(float %a, float %b) nounwind { 301; RV32IF-LABEL: fsgnjn_s: 302; RV32IF: # %bb.0: 303; RV32IF-NEXT: fmv.w.x ft0, a1 304; RV32IF-NEXT: fmv.w.x ft1, a0 305; RV32IF-NEXT: fadd.s ft0, ft1, ft0 306; RV32IF-NEXT: fsgnjn.s ft0, ft1, ft0 307; RV32IF-NEXT: fmv.x.w a0, ft0 308; RV32IF-NEXT: ret 309; 310; RV64IF-LABEL: fsgnjn_s: 311; RV64IF: # %bb.0: 312; RV64IF-NEXT: fmv.w.x ft0, a1 313; RV64IF-NEXT: fmv.w.x ft1, a0 314; RV64IF-NEXT: fadd.s ft0, ft1, ft0 315; RV64IF-NEXT: fsgnjn.s ft0, ft1, ft0 316; RV64IF-NEXT: fmv.x.w a0, ft0 317; RV64IF-NEXT: ret 318; 319; RV32I-LABEL: fsgnjn_s: 320; RV32I: # %bb.0: 321; RV32I-NEXT: addi sp, sp, -16 322; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 323; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 324; RV32I-NEXT: mv s0, a0 325; RV32I-NEXT: call __addsf3@plt 326; RV32I-NEXT: not a0, a0 327; RV32I-NEXT: lui a1, 524288 328; RV32I-NEXT: addi a2, a1, -1 329; RV32I-NEXT: and a2, s0, a2 330; RV32I-NEXT: and a0, a0, a1 331; RV32I-NEXT: or a0, a2, a0 332; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 333; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 334; RV32I-NEXT: addi sp, sp, 16 335; RV32I-NEXT: ret 336; 337; RV64I-LABEL: fsgnjn_s: 338; RV64I: # %bb.0: 339; RV64I-NEXT: addi sp, sp, -16 340; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 341; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill 342; RV64I-NEXT: mv s0, a0 343; RV64I-NEXT: call __addsf3@plt 344; RV64I-NEXT: not a0, a0 345; RV64I-NEXT: lui a1, 524288 346; RV64I-NEXT: addiw a2, a1, -1 347; RV64I-NEXT: and a2, s0, a2 348; RV64I-NEXT: and a0, a0, a1 349; RV64I-NEXT: or a0, a2, a0 350; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 351; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload 352; RV64I-NEXT: addi sp, sp, 16 353; RV64I-NEXT: ret 354 %1 = fadd float %a, %b 355 %2 = fneg float %1 356 %3 = call float @llvm.copysign.f32(float %a, float %2) 357 ret float %3 358} 359 360declare float @llvm.fabs.f32(float) 361 362; This function performs extra work to ensure that 363; DAGCombiner::visitBITCAST doesn't replace the fabs with an and. 364define float @fabs_s(float %a, float %b) nounwind { 365; RV32IF-LABEL: fabs_s: 366; RV32IF: # %bb.0: 367; RV32IF-NEXT: fmv.w.x ft0, a1 368; RV32IF-NEXT: fmv.w.x ft1, a0 369; RV32IF-NEXT: fadd.s ft0, ft1, ft0 370; RV32IF-NEXT: fabs.s ft1, ft0 371; RV32IF-NEXT: fadd.s ft0, ft1, ft0 372; RV32IF-NEXT: fmv.x.w a0, ft0 373; RV32IF-NEXT: ret 374; 375; RV64IF-LABEL: fabs_s: 376; RV64IF: # %bb.0: 377; RV64IF-NEXT: fmv.w.x ft0, a1 378; RV64IF-NEXT: fmv.w.x ft1, a0 379; RV64IF-NEXT: fadd.s ft0, ft1, ft0 380; RV64IF-NEXT: fabs.s ft1, ft0 381; RV64IF-NEXT: fadd.s ft0, ft1, ft0 382; RV64IF-NEXT: fmv.x.w a0, ft0 383; RV64IF-NEXT: ret 384; 385; RV32I-LABEL: fabs_s: 386; RV32I: # %bb.0: 387; RV32I-NEXT: addi sp, sp, -16 388; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 389; RV32I-NEXT: call __addsf3@plt 390; RV32I-NEXT: mv a1, a0 391; RV32I-NEXT: lui a0, 524288 392; RV32I-NEXT: addi a0, a0, -1 393; RV32I-NEXT: and a0, a1, a0 394; RV32I-NEXT: call __addsf3@plt 395; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 396; RV32I-NEXT: addi sp, sp, 16 397; RV32I-NEXT: ret 398; 399; RV64I-LABEL: fabs_s: 400; RV64I: # %bb.0: 401; RV64I-NEXT: addi sp, sp, -16 402; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 403; RV64I-NEXT: call __addsf3@plt 404; RV64I-NEXT: mv a1, a0 405; RV64I-NEXT: lui a0, 524288 406; RV64I-NEXT: addiw a0, a0, -1 407; RV64I-NEXT: and a0, a1, a0 408; RV64I-NEXT: call __addsf3@plt 409; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 410; RV64I-NEXT: addi sp, sp, 16 411; RV64I-NEXT: ret 412 %1 = fadd float %a, %b 413 %2 = call float @llvm.fabs.f32(float %1) 414 %3 = fadd float %2, %1 415 ret float %3 416} 417 418declare float @llvm.minnum.f32(float, float) 419 420define float @fmin_s(float %a, float %b) nounwind { 421; RV32IF-LABEL: fmin_s: 422; RV32IF: # %bb.0: 423; RV32IF-NEXT: fmv.w.x ft0, a1 424; RV32IF-NEXT: fmv.w.x ft1, a0 425; RV32IF-NEXT: fmin.s ft0, ft1, ft0 426; RV32IF-NEXT: fmv.x.w a0, ft0 427; RV32IF-NEXT: ret 428; 429; RV64IF-LABEL: fmin_s: 430; RV64IF: # %bb.0: 431; RV64IF-NEXT: fmv.w.x ft0, a1 432; RV64IF-NEXT: fmv.w.x ft1, a0 433; RV64IF-NEXT: fmin.s ft0, ft1, ft0 434; RV64IF-NEXT: fmv.x.w a0, ft0 435; RV64IF-NEXT: ret 436; 437; RV32I-LABEL: fmin_s: 438; RV32I: # %bb.0: 439; RV32I-NEXT: addi sp, sp, -16 440; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 441; RV32I-NEXT: call fminf@plt 442; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 443; RV32I-NEXT: addi sp, sp, 16 444; RV32I-NEXT: ret 445; 446; RV64I-LABEL: fmin_s: 447; RV64I: # %bb.0: 448; RV64I-NEXT: addi sp, sp, -16 449; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 450; RV64I-NEXT: call fminf@plt 451; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 452; RV64I-NEXT: addi sp, sp, 16 453; RV64I-NEXT: ret 454 %1 = call float @llvm.minnum.f32(float %a, float %b) 455 ret float %1 456} 457 458declare float @llvm.maxnum.f32(float, float) 459 460define float @fmax_s(float %a, float %b) nounwind { 461; RV32IF-LABEL: fmax_s: 462; RV32IF: # %bb.0: 463; RV32IF-NEXT: fmv.w.x ft0, a1 464; RV32IF-NEXT: fmv.w.x ft1, a0 465; RV32IF-NEXT: fmax.s ft0, ft1, ft0 466; RV32IF-NEXT: fmv.x.w a0, ft0 467; RV32IF-NEXT: ret 468; 469; RV64IF-LABEL: fmax_s: 470; RV64IF: # %bb.0: 471; RV64IF-NEXT: fmv.w.x ft0, a1 472; RV64IF-NEXT: fmv.w.x ft1, a0 473; RV64IF-NEXT: fmax.s ft0, ft1, ft0 474; RV64IF-NEXT: fmv.x.w a0, ft0 475; RV64IF-NEXT: ret 476; 477; RV32I-LABEL: fmax_s: 478; RV32I: # %bb.0: 479; RV32I-NEXT: addi sp, sp, -16 480; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 481; RV32I-NEXT: call fmaxf@plt 482; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 483; RV32I-NEXT: addi sp, sp, 16 484; RV32I-NEXT: ret 485; 486; RV64I-LABEL: fmax_s: 487; RV64I: # %bb.0: 488; RV64I-NEXT: addi sp, sp, -16 489; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 490; RV64I-NEXT: call fmaxf@plt 491; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 492; RV64I-NEXT: addi sp, sp, 16 493; RV64I-NEXT: ret 494 %1 = call float @llvm.maxnum.f32(float %a, float %b) 495 ret float %1 496} 497 498define i32 @feq_s(float %a, float %b) nounwind { 499; RV32IF-LABEL: feq_s: 500; RV32IF: # %bb.0: 501; RV32IF-NEXT: fmv.w.x ft0, a1 502; RV32IF-NEXT: fmv.w.x ft1, a0 503; RV32IF-NEXT: feq.s a0, ft1, ft0 504; RV32IF-NEXT: ret 505; 506; RV64IF-LABEL: feq_s: 507; RV64IF: # %bb.0: 508; RV64IF-NEXT: fmv.w.x ft0, a1 509; RV64IF-NEXT: fmv.w.x ft1, a0 510; RV64IF-NEXT: feq.s a0, ft1, ft0 511; RV64IF-NEXT: ret 512; 513; RV32I-LABEL: feq_s: 514; RV32I: # %bb.0: 515; RV32I-NEXT: addi sp, sp, -16 516; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 517; RV32I-NEXT: call __eqsf2@plt 518; RV32I-NEXT: seqz a0, a0 519; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 520; RV32I-NEXT: addi sp, sp, 16 521; RV32I-NEXT: ret 522; 523; RV64I-LABEL: feq_s: 524; RV64I: # %bb.0: 525; RV64I-NEXT: addi sp, sp, -16 526; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 527; RV64I-NEXT: call __eqsf2@plt 528; RV64I-NEXT: seqz a0, a0 529; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 530; RV64I-NEXT: addi sp, sp, 16 531; RV64I-NEXT: ret 532 %1 = fcmp oeq float %a, %b 533 %2 = zext i1 %1 to i32 534 ret i32 %2 535} 536 537define i32 @flt_s(float %a, float %b) nounwind { 538; RV32IF-LABEL: flt_s: 539; RV32IF: # %bb.0: 540; RV32IF-NEXT: fmv.w.x ft0, a1 541; RV32IF-NEXT: fmv.w.x ft1, a0 542; RV32IF-NEXT: flt.s a0, ft1, ft0 543; RV32IF-NEXT: ret 544; 545; RV64IF-LABEL: flt_s: 546; RV64IF: # %bb.0: 547; RV64IF-NEXT: fmv.w.x ft0, a1 548; RV64IF-NEXT: fmv.w.x ft1, a0 549; RV64IF-NEXT: flt.s a0, ft1, ft0 550; RV64IF-NEXT: ret 551; 552; RV32I-LABEL: flt_s: 553; RV32I: # %bb.0: 554; RV32I-NEXT: addi sp, sp, -16 555; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 556; RV32I-NEXT: call __ltsf2@plt 557; RV32I-NEXT: slti a0, a0, 0 558; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 559; RV32I-NEXT: addi sp, sp, 16 560; RV32I-NEXT: ret 561; 562; RV64I-LABEL: flt_s: 563; RV64I: # %bb.0: 564; RV64I-NEXT: addi sp, sp, -16 565; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 566; RV64I-NEXT: call __ltsf2@plt 567; RV64I-NEXT: slti a0, a0, 0 568; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 569; RV64I-NEXT: addi sp, sp, 16 570; RV64I-NEXT: ret 571 %1 = fcmp olt float %a, %b 572 %2 = zext i1 %1 to i32 573 ret i32 %2 574} 575 576define i32 @fle_s(float %a, float %b) nounwind { 577; RV32IF-LABEL: fle_s: 578; RV32IF: # %bb.0: 579; RV32IF-NEXT: fmv.w.x ft0, a1 580; RV32IF-NEXT: fmv.w.x ft1, a0 581; RV32IF-NEXT: fle.s a0, ft1, ft0 582; RV32IF-NEXT: ret 583; 584; RV64IF-LABEL: fle_s: 585; RV64IF: # %bb.0: 586; RV64IF-NEXT: fmv.w.x ft0, a1 587; RV64IF-NEXT: fmv.w.x ft1, a0 588; RV64IF-NEXT: fle.s a0, ft1, ft0 589; RV64IF-NEXT: ret 590; 591; RV32I-LABEL: fle_s: 592; RV32I: # %bb.0: 593; RV32I-NEXT: addi sp, sp, -16 594; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 595; RV32I-NEXT: call __lesf2@plt 596; RV32I-NEXT: slti a0, a0, 1 597; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 598; RV32I-NEXT: addi sp, sp, 16 599; RV32I-NEXT: ret 600; 601; RV64I-LABEL: fle_s: 602; RV64I: # %bb.0: 603; RV64I-NEXT: addi sp, sp, -16 604; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 605; RV64I-NEXT: call __lesf2@plt 606; RV64I-NEXT: slti a0, a0, 1 607; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 608; RV64I-NEXT: addi sp, sp, 16 609; RV64I-NEXT: ret 610 %1 = fcmp ole float %a, %b 611 %2 = zext i1 %1 to i32 612 ret i32 %2 613} 614 615declare float @llvm.fma.f32(float, float, float) 616 617define float @fmadd_s(float %a, float %b, float %c) nounwind { 618; RV32IF-LABEL: fmadd_s: 619; RV32IF: # %bb.0: 620; RV32IF-NEXT: fmv.w.x ft0, a2 621; RV32IF-NEXT: fmv.w.x ft1, a1 622; RV32IF-NEXT: fmv.w.x ft2, a0 623; RV32IF-NEXT: fmadd.s ft0, ft2, ft1, ft0 624; RV32IF-NEXT: fmv.x.w a0, ft0 625; RV32IF-NEXT: ret 626; 627; RV64IF-LABEL: fmadd_s: 628; RV64IF: # %bb.0: 629; RV64IF-NEXT: fmv.w.x ft0, a2 630; RV64IF-NEXT: fmv.w.x ft1, a1 631; RV64IF-NEXT: fmv.w.x ft2, a0 632; RV64IF-NEXT: fmadd.s ft0, ft2, ft1, ft0 633; RV64IF-NEXT: fmv.x.w a0, ft0 634; RV64IF-NEXT: ret 635; 636; RV32I-LABEL: fmadd_s: 637; RV32I: # %bb.0: 638; RV32I-NEXT: addi sp, sp, -16 639; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 640; RV32I-NEXT: call fmaf@plt 641; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 642; RV32I-NEXT: addi sp, sp, 16 643; RV32I-NEXT: ret 644; 645; RV64I-LABEL: fmadd_s: 646; RV64I: # %bb.0: 647; RV64I-NEXT: addi sp, sp, -16 648; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 649; RV64I-NEXT: call fmaf@plt 650; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 651; RV64I-NEXT: addi sp, sp, 16 652; RV64I-NEXT: ret 653 %1 = call float @llvm.fma.f32(float %a, float %b, float %c) 654 ret float %1 655} 656 657define float @fmsub_s(float %a, float %b, float %c) nounwind { 658; RV32IF-LABEL: fmsub_s: 659; RV32IF: # %bb.0: 660; RV32IF-NEXT: fmv.w.x ft0, a1 661; RV32IF-NEXT: fmv.w.x ft1, a0 662; RV32IF-NEXT: fmv.w.x ft2, a2 663; RV32IF-NEXT: fmv.w.x ft3, zero 664; RV32IF-NEXT: fadd.s ft2, ft2, ft3 665; RV32IF-NEXT: fmsub.s ft0, ft1, ft0, ft2 666; RV32IF-NEXT: fmv.x.w a0, ft0 667; RV32IF-NEXT: ret 668; 669; RV64IF-LABEL: fmsub_s: 670; RV64IF: # %bb.0: 671; RV64IF-NEXT: fmv.w.x ft0, a1 672; RV64IF-NEXT: fmv.w.x ft1, a0 673; RV64IF-NEXT: fmv.w.x ft2, a2 674; RV64IF-NEXT: fmv.w.x ft3, zero 675; RV64IF-NEXT: fadd.s ft2, ft2, ft3 676; RV64IF-NEXT: fmsub.s ft0, ft1, ft0, ft2 677; RV64IF-NEXT: fmv.x.w a0, ft0 678; RV64IF-NEXT: ret 679; 680; RV32I-LABEL: fmsub_s: 681; RV32I: # %bb.0: 682; RV32I-NEXT: addi sp, sp, -16 683; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 684; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 685; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill 686; RV32I-NEXT: mv s0, a1 687; RV32I-NEXT: mv s1, a0 688; RV32I-NEXT: mv a0, a2 689; RV32I-NEXT: li a1, 0 690; RV32I-NEXT: call __addsf3@plt 691; RV32I-NEXT: lui a1, 524288 692; RV32I-NEXT: xor a2, a0, a1 693; RV32I-NEXT: mv a0, s1 694; RV32I-NEXT: mv a1, s0 695; RV32I-NEXT: call fmaf@plt 696; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 697; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 698; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload 699; RV32I-NEXT: addi sp, sp, 16 700; RV32I-NEXT: ret 701; 702; RV64I-LABEL: fmsub_s: 703; RV64I: # %bb.0: 704; RV64I-NEXT: addi sp, sp, -32 705; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill 706; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill 707; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill 708; RV64I-NEXT: mv s0, a1 709; RV64I-NEXT: mv s1, a0 710; RV64I-NEXT: mv a0, a2 711; RV64I-NEXT: li a1, 0 712; RV64I-NEXT: call __addsf3@plt 713; RV64I-NEXT: lui a1, 524288 714; RV64I-NEXT: xor a2, a0, a1 715; RV64I-NEXT: mv a0, s1 716; RV64I-NEXT: mv a1, s0 717; RV64I-NEXT: call fmaf@plt 718; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload 719; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload 720; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload 721; RV64I-NEXT: addi sp, sp, 32 722; RV64I-NEXT: ret 723 %c_ = fadd float 0.0, %c ; avoid negation using xor 724 %negc = fsub float -0.0, %c_ 725 %1 = call float @llvm.fma.f32(float %a, float %b, float %negc) 726 ret float %1 727} 728 729define float @fnmadd_s(float %a, float %b, float %c) nounwind { 730; RV32IF-LABEL: fnmadd_s: 731; RV32IF: # %bb.0: 732; RV32IF-NEXT: fmv.w.x ft0, a1 733; RV32IF-NEXT: fmv.w.x ft1, a2 734; RV32IF-NEXT: fmv.w.x ft2, a0 735; RV32IF-NEXT: fmv.w.x ft3, zero 736; RV32IF-NEXT: fadd.s ft2, ft2, ft3 737; RV32IF-NEXT: fadd.s ft1, ft1, ft3 738; RV32IF-NEXT: fnmadd.s ft0, ft2, ft0, ft1 739; RV32IF-NEXT: fmv.x.w a0, ft0 740; RV32IF-NEXT: ret 741; 742; RV64IF-LABEL: fnmadd_s: 743; RV64IF: # %bb.0: 744; RV64IF-NEXT: fmv.w.x ft0, a1 745; RV64IF-NEXT: fmv.w.x ft1, a2 746; RV64IF-NEXT: fmv.w.x ft2, a0 747; RV64IF-NEXT: fmv.w.x ft3, zero 748; RV64IF-NEXT: fadd.s ft2, ft2, ft3 749; RV64IF-NEXT: fadd.s ft1, ft1, ft3 750; RV64IF-NEXT: fnmadd.s ft0, ft2, ft0, ft1 751; RV64IF-NEXT: fmv.x.w a0, ft0 752; RV64IF-NEXT: ret 753; 754; RV32I-LABEL: fnmadd_s: 755; RV32I: # %bb.0: 756; RV32I-NEXT: addi sp, sp, -16 757; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 758; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 759; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill 760; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill 761; RV32I-NEXT: mv s0, a2 762; RV32I-NEXT: mv s2, a1 763; RV32I-NEXT: li a1, 0 764; RV32I-NEXT: call __addsf3@plt 765; RV32I-NEXT: mv s1, a0 766; RV32I-NEXT: mv a0, s0 767; RV32I-NEXT: li a1, 0 768; RV32I-NEXT: call __addsf3@plt 769; RV32I-NEXT: lui a2, 524288 770; RV32I-NEXT: xor a1, s1, a2 771; RV32I-NEXT: xor a2, a0, a2 772; RV32I-NEXT: mv a0, a1 773; RV32I-NEXT: mv a1, s2 774; RV32I-NEXT: call fmaf@plt 775; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 776; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 777; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload 778; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload 779; RV32I-NEXT: addi sp, sp, 16 780; RV32I-NEXT: ret 781; 782; RV64I-LABEL: fnmadd_s: 783; RV64I: # %bb.0: 784; RV64I-NEXT: addi sp, sp, -32 785; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill 786; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill 787; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill 788; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill 789; RV64I-NEXT: mv s0, a2 790; RV64I-NEXT: mv s2, a1 791; RV64I-NEXT: li a1, 0 792; RV64I-NEXT: call __addsf3@plt 793; RV64I-NEXT: mv s1, a0 794; RV64I-NEXT: mv a0, s0 795; RV64I-NEXT: li a1, 0 796; RV64I-NEXT: call __addsf3@plt 797; RV64I-NEXT: lui a2, 524288 798; RV64I-NEXT: xor a1, s1, a2 799; RV64I-NEXT: xor a2, a0, a2 800; RV64I-NEXT: mv a0, a1 801; RV64I-NEXT: mv a1, s2 802; RV64I-NEXT: call fmaf@plt 803; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload 804; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload 805; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload 806; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload 807; RV64I-NEXT: addi sp, sp, 32 808; RV64I-NEXT: ret 809 %a_ = fadd float 0.0, %a 810 %c_ = fadd float 0.0, %c 811 %nega = fsub float -0.0, %a_ 812 %negc = fsub float -0.0, %c_ 813 %1 = call float @llvm.fma.f32(float %nega, float %b, float %negc) 814 ret float %1 815} 816 817define float @fnmadd_s_2(float %a, float %b, float %c) nounwind { 818; RV32IF-LABEL: fnmadd_s_2: 819; RV32IF: # %bb.0: 820; RV32IF-NEXT: fmv.w.x ft0, a0 821; RV32IF-NEXT: fmv.w.x ft1, a2 822; RV32IF-NEXT: fmv.w.x ft2, a1 823; RV32IF-NEXT: fmv.w.x ft3, zero 824; RV32IF-NEXT: fadd.s ft2, ft2, ft3 825; RV32IF-NEXT: fadd.s ft1, ft1, ft3 826; RV32IF-NEXT: fnmadd.s ft0, ft2, ft0, ft1 827; RV32IF-NEXT: fmv.x.w a0, ft0 828; RV32IF-NEXT: ret 829; 830; RV64IF-LABEL: fnmadd_s_2: 831; RV64IF: # %bb.0: 832; RV64IF-NEXT: fmv.w.x ft0, a0 833; RV64IF-NEXT: fmv.w.x ft1, a2 834; RV64IF-NEXT: fmv.w.x ft2, a1 835; RV64IF-NEXT: fmv.w.x ft3, zero 836; RV64IF-NEXT: fadd.s ft2, ft2, ft3 837; RV64IF-NEXT: fadd.s ft1, ft1, ft3 838; RV64IF-NEXT: fnmadd.s ft0, ft2, ft0, ft1 839; RV64IF-NEXT: fmv.x.w a0, ft0 840; RV64IF-NEXT: ret 841; 842; RV32I-LABEL: fnmadd_s_2: 843; RV32I: # %bb.0: 844; RV32I-NEXT: addi sp, sp, -16 845; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 846; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 847; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill 848; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill 849; RV32I-NEXT: mv s0, a2 850; RV32I-NEXT: mv s2, a0 851; RV32I-NEXT: mv a0, a1 852; RV32I-NEXT: li a1, 0 853; RV32I-NEXT: call __addsf3@plt 854; RV32I-NEXT: mv s1, a0 855; RV32I-NEXT: mv a0, s0 856; RV32I-NEXT: li a1, 0 857; RV32I-NEXT: call __addsf3@plt 858; RV32I-NEXT: lui a2, 524288 859; RV32I-NEXT: xor a1, s1, a2 860; RV32I-NEXT: xor a2, a0, a2 861; RV32I-NEXT: mv a0, s2 862; RV32I-NEXT: call fmaf@plt 863; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 864; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 865; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload 866; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload 867; RV32I-NEXT: addi sp, sp, 16 868; RV32I-NEXT: ret 869; 870; RV64I-LABEL: fnmadd_s_2: 871; RV64I: # %bb.0: 872; RV64I-NEXT: addi sp, sp, -32 873; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill 874; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill 875; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill 876; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill 877; RV64I-NEXT: mv s0, a2 878; RV64I-NEXT: mv s2, a0 879; RV64I-NEXT: mv a0, a1 880; RV64I-NEXT: li a1, 0 881; RV64I-NEXT: call __addsf3@plt 882; RV64I-NEXT: mv s1, a0 883; RV64I-NEXT: mv a0, s0 884; RV64I-NEXT: li a1, 0 885; RV64I-NEXT: call __addsf3@plt 886; RV64I-NEXT: lui a2, 524288 887; RV64I-NEXT: xor a1, s1, a2 888; RV64I-NEXT: xor a2, a0, a2 889; RV64I-NEXT: mv a0, s2 890; RV64I-NEXT: call fmaf@plt 891; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload 892; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload 893; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload 894; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload 895; RV64I-NEXT: addi sp, sp, 32 896; RV64I-NEXT: ret 897 %b_ = fadd float 0.0, %b 898 %c_ = fadd float 0.0, %c 899 %negb = fsub float -0.0, %b_ 900 %negc = fsub float -0.0, %c_ 901 %1 = call float @llvm.fma.f32(float %a, float %negb, float %negc) 902 ret float %1 903} 904 905define float @fnmsub_s(float %a, float %b, float %c) nounwind { 906; RV32IF-LABEL: fnmsub_s: 907; RV32IF: # %bb.0: 908; RV32IF-NEXT: fmv.w.x ft0, a2 909; RV32IF-NEXT: fmv.w.x ft1, a1 910; RV32IF-NEXT: fmv.w.x ft2, a0 911; RV32IF-NEXT: fmv.w.x ft3, zero 912; RV32IF-NEXT: fadd.s ft2, ft2, ft3 913; RV32IF-NEXT: fnmsub.s ft0, ft2, ft1, ft0 914; RV32IF-NEXT: fmv.x.w a0, ft0 915; RV32IF-NEXT: ret 916; 917; RV64IF-LABEL: fnmsub_s: 918; RV64IF: # %bb.0: 919; RV64IF-NEXT: fmv.w.x ft0, a2 920; RV64IF-NEXT: fmv.w.x ft1, a1 921; RV64IF-NEXT: fmv.w.x ft2, a0 922; RV64IF-NEXT: fmv.w.x ft3, zero 923; RV64IF-NEXT: fadd.s ft2, ft2, ft3 924; RV64IF-NEXT: fnmsub.s ft0, ft2, ft1, ft0 925; RV64IF-NEXT: fmv.x.w a0, ft0 926; RV64IF-NEXT: ret 927; 928; RV32I-LABEL: fnmsub_s: 929; RV32I: # %bb.0: 930; RV32I-NEXT: addi sp, sp, -16 931; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 932; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 933; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill 934; RV32I-NEXT: mv s0, a2 935; RV32I-NEXT: mv s1, a1 936; RV32I-NEXT: li a1, 0 937; RV32I-NEXT: call __addsf3@plt 938; RV32I-NEXT: lui a1, 524288 939; RV32I-NEXT: xor a0, a0, a1 940; RV32I-NEXT: mv a1, s1 941; RV32I-NEXT: mv a2, s0 942; RV32I-NEXT: call fmaf@plt 943; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 944; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 945; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload 946; RV32I-NEXT: addi sp, sp, 16 947; RV32I-NEXT: ret 948; 949; RV64I-LABEL: fnmsub_s: 950; RV64I: # %bb.0: 951; RV64I-NEXT: addi sp, sp, -32 952; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill 953; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill 954; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill 955; RV64I-NEXT: mv s0, a2 956; RV64I-NEXT: mv s1, a1 957; RV64I-NEXT: li a1, 0 958; RV64I-NEXT: call __addsf3@plt 959; RV64I-NEXT: lui a1, 524288 960; RV64I-NEXT: xor a0, a0, a1 961; RV64I-NEXT: mv a1, s1 962; RV64I-NEXT: mv a2, s0 963; RV64I-NEXT: call fmaf@plt 964; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload 965; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload 966; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload 967; RV64I-NEXT: addi sp, sp, 32 968; RV64I-NEXT: ret 969 %a_ = fadd float 0.0, %a 970 %nega = fsub float -0.0, %a_ 971 %1 = call float @llvm.fma.f32(float %nega, float %b, float %c) 972 ret float %1 973} 974 975define float @fnmsub_s_2(float %a, float %b, float %c) nounwind { 976; RV32IF-LABEL: fnmsub_s_2: 977; RV32IF: # %bb.0: 978; RV32IF-NEXT: fmv.w.x ft0, a2 979; RV32IF-NEXT: fmv.w.x ft1, a0 980; RV32IF-NEXT: fmv.w.x ft2, a1 981; RV32IF-NEXT: fmv.w.x ft3, zero 982; RV32IF-NEXT: fadd.s ft2, ft2, ft3 983; RV32IF-NEXT: fnmsub.s ft0, ft2, ft1, ft0 984; RV32IF-NEXT: fmv.x.w a0, ft0 985; RV32IF-NEXT: ret 986; 987; RV64IF-LABEL: fnmsub_s_2: 988; RV64IF: # %bb.0: 989; RV64IF-NEXT: fmv.w.x ft0, a2 990; RV64IF-NEXT: fmv.w.x ft1, a0 991; RV64IF-NEXT: fmv.w.x ft2, a1 992; RV64IF-NEXT: fmv.w.x ft3, zero 993; RV64IF-NEXT: fadd.s ft2, ft2, ft3 994; RV64IF-NEXT: fnmsub.s ft0, ft2, ft1, ft0 995; RV64IF-NEXT: fmv.x.w a0, ft0 996; RV64IF-NEXT: ret 997; 998; RV32I-LABEL: fnmsub_s_2: 999; RV32I: # %bb.0: 1000; RV32I-NEXT: addi sp, sp, -16 1001; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 1002; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 1003; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill 1004; RV32I-NEXT: mv s0, a2 1005; RV32I-NEXT: mv s1, a0 1006; RV32I-NEXT: mv a0, a1 1007; RV32I-NEXT: li a1, 0 1008; RV32I-NEXT: call __addsf3@plt 1009; RV32I-NEXT: lui a1, 524288 1010; RV32I-NEXT: xor a1, a0, a1 1011; RV32I-NEXT: mv a0, s1 1012; RV32I-NEXT: mv a2, s0 1013; RV32I-NEXT: call fmaf@plt 1014; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 1015; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 1016; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload 1017; RV32I-NEXT: addi sp, sp, 16 1018; RV32I-NEXT: ret 1019; 1020; RV64I-LABEL: fnmsub_s_2: 1021; RV64I: # %bb.0: 1022; RV64I-NEXT: addi sp, sp, -32 1023; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill 1024; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill 1025; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill 1026; RV64I-NEXT: mv s0, a2 1027; RV64I-NEXT: mv s1, a0 1028; RV64I-NEXT: mv a0, a1 1029; RV64I-NEXT: li a1, 0 1030; RV64I-NEXT: call __addsf3@plt 1031; RV64I-NEXT: lui a1, 524288 1032; RV64I-NEXT: xor a1, a0, a1 1033; RV64I-NEXT: mv a0, s1 1034; RV64I-NEXT: mv a2, s0 1035; RV64I-NEXT: call fmaf@plt 1036; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload 1037; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload 1038; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload 1039; RV64I-NEXT: addi sp, sp, 32 1040; RV64I-NEXT: ret 1041 %b_ = fadd float 0.0, %b 1042 %negb = fsub float -0.0, %b_ 1043 %1 = call float @llvm.fma.f32(float %a, float %negb, float %c) 1044 ret float %1 1045} 1046 1047define float @fmadd_s_contract(float %a, float %b, float %c) nounwind { 1048; RV32IF-LABEL: fmadd_s_contract: 1049; RV32IF: # %bb.0: 1050; RV32IF-NEXT: fmv.w.x ft0, a2 1051; RV32IF-NEXT: fmv.w.x ft1, a1 1052; RV32IF-NEXT: fmv.w.x ft2, a0 1053; RV32IF-NEXT: fmadd.s ft0, ft2, ft1, ft0 1054; RV32IF-NEXT: fmv.x.w a0, ft0 1055; RV32IF-NEXT: ret 1056; 1057; RV64IF-LABEL: fmadd_s_contract: 1058; RV64IF: # %bb.0: 1059; RV64IF-NEXT: fmv.w.x ft0, a2 1060; RV64IF-NEXT: fmv.w.x ft1, a1 1061; RV64IF-NEXT: fmv.w.x ft2, a0 1062; RV64IF-NEXT: fmadd.s ft0, ft2, ft1, ft0 1063; RV64IF-NEXT: fmv.x.w a0, ft0 1064; RV64IF-NEXT: ret 1065; 1066; RV32I-LABEL: fmadd_s_contract: 1067; RV32I: # %bb.0: 1068; RV32I-NEXT: addi sp, sp, -16 1069; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 1070; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 1071; RV32I-NEXT: mv s0, a2 1072; RV32I-NEXT: call __mulsf3@plt 1073; RV32I-NEXT: mv a1, s0 1074; RV32I-NEXT: call __addsf3@plt 1075; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 1076; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 1077; RV32I-NEXT: addi sp, sp, 16 1078; RV32I-NEXT: ret 1079; 1080; RV64I-LABEL: fmadd_s_contract: 1081; RV64I: # %bb.0: 1082; RV64I-NEXT: addi sp, sp, -16 1083; RV64I-NEXT: sd ra, 8(sp) # 8-byte Folded Spill 1084; RV64I-NEXT: sd s0, 0(sp) # 8-byte Folded Spill 1085; RV64I-NEXT: mv s0, a2 1086; RV64I-NEXT: call __mulsf3@plt 1087; RV64I-NEXT: mv a1, s0 1088; RV64I-NEXT: call __addsf3@plt 1089; RV64I-NEXT: ld ra, 8(sp) # 8-byte Folded Reload 1090; RV64I-NEXT: ld s0, 0(sp) # 8-byte Folded Reload 1091; RV64I-NEXT: addi sp, sp, 16 1092; RV64I-NEXT: ret 1093 %1 = fmul contract float %a, %b 1094 %2 = fadd contract float %1, %c 1095 ret float %2 1096} 1097 1098define float @fmsub_s_contract(float %a, float %b, float %c) nounwind { 1099; RV32IF-LABEL: fmsub_s_contract: 1100; RV32IF: # %bb.0: 1101; RV32IF-NEXT: fmv.w.x ft0, a1 1102; RV32IF-NEXT: fmv.w.x ft1, a0 1103; RV32IF-NEXT: fmv.w.x ft2, a2 1104; RV32IF-NEXT: fmv.w.x ft3, zero 1105; RV32IF-NEXT: fadd.s ft2, ft2, ft3 1106; RV32IF-NEXT: fmsub.s ft0, ft1, ft0, ft2 1107; RV32IF-NEXT: fmv.x.w a0, ft0 1108; RV32IF-NEXT: ret 1109; 1110; RV64IF-LABEL: fmsub_s_contract: 1111; RV64IF: # %bb.0: 1112; RV64IF-NEXT: fmv.w.x ft0, a1 1113; RV64IF-NEXT: fmv.w.x ft1, a0 1114; RV64IF-NEXT: fmv.w.x ft2, a2 1115; RV64IF-NEXT: fmv.w.x ft3, zero 1116; RV64IF-NEXT: fadd.s ft2, ft2, ft3 1117; RV64IF-NEXT: fmsub.s ft0, ft1, ft0, ft2 1118; RV64IF-NEXT: fmv.x.w a0, ft0 1119; RV64IF-NEXT: ret 1120; 1121; RV32I-LABEL: fmsub_s_contract: 1122; RV32I: # %bb.0: 1123; RV32I-NEXT: addi sp, sp, -16 1124; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 1125; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 1126; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill 1127; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill 1128; RV32I-NEXT: mv s2, a1 1129; RV32I-NEXT: mv s1, a0 1130; RV32I-NEXT: mv a0, a2 1131; RV32I-NEXT: li a1, 0 1132; RV32I-NEXT: call __addsf3@plt 1133; RV32I-NEXT: mv s0, a0 1134; RV32I-NEXT: mv a0, s1 1135; RV32I-NEXT: mv a1, s2 1136; RV32I-NEXT: call __mulsf3@plt 1137; RV32I-NEXT: mv a1, s0 1138; RV32I-NEXT: call __subsf3@plt 1139; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 1140; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 1141; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload 1142; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload 1143; RV32I-NEXT: addi sp, sp, 16 1144; RV32I-NEXT: ret 1145; 1146; RV64I-LABEL: fmsub_s_contract: 1147; RV64I: # %bb.0: 1148; RV64I-NEXT: addi sp, sp, -32 1149; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill 1150; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill 1151; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill 1152; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill 1153; RV64I-NEXT: mv s2, a1 1154; RV64I-NEXT: mv s1, a0 1155; RV64I-NEXT: mv a0, a2 1156; RV64I-NEXT: li a1, 0 1157; RV64I-NEXT: call __addsf3@plt 1158; RV64I-NEXT: mv s0, a0 1159; RV64I-NEXT: mv a0, s1 1160; RV64I-NEXT: mv a1, s2 1161; RV64I-NEXT: call __mulsf3@plt 1162; RV64I-NEXT: mv a1, s0 1163; RV64I-NEXT: call __subsf3@plt 1164; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload 1165; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload 1166; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload 1167; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload 1168; RV64I-NEXT: addi sp, sp, 32 1169; RV64I-NEXT: ret 1170 %c_ = fadd float 0.0, %c ; avoid negation using xor 1171 %1 = fmul contract float %a, %b 1172 %2 = fsub contract float %1, %c_ 1173 ret float %2 1174} 1175 1176define float @fnmadd_s_contract(float %a, float %b, float %c) nounwind { 1177; RV32IF-LABEL: fnmadd_s_contract: 1178; RV32IF: # %bb.0: 1179; RV32IF-NEXT: fmv.w.x ft0, a2 1180; RV32IF-NEXT: fmv.w.x ft1, a1 1181; RV32IF-NEXT: fmv.w.x ft2, a0 1182; RV32IF-NEXT: fmv.w.x ft3, zero 1183; RV32IF-NEXT: fadd.s ft2, ft2, ft3 1184; RV32IF-NEXT: fadd.s ft1, ft1, ft3 1185; RV32IF-NEXT: fadd.s ft0, ft0, ft3 1186; RV32IF-NEXT: fnmadd.s ft0, ft2, ft1, ft0 1187; RV32IF-NEXT: fmv.x.w a0, ft0 1188; RV32IF-NEXT: ret 1189; 1190; RV64IF-LABEL: fnmadd_s_contract: 1191; RV64IF: # %bb.0: 1192; RV64IF-NEXT: fmv.w.x ft0, a2 1193; RV64IF-NEXT: fmv.w.x ft1, a1 1194; RV64IF-NEXT: fmv.w.x ft2, a0 1195; RV64IF-NEXT: fmv.w.x ft3, zero 1196; RV64IF-NEXT: fadd.s ft2, ft2, ft3 1197; RV64IF-NEXT: fadd.s ft1, ft1, ft3 1198; RV64IF-NEXT: fadd.s ft0, ft0, ft3 1199; RV64IF-NEXT: fnmadd.s ft0, ft2, ft1, ft0 1200; RV64IF-NEXT: fmv.x.w a0, ft0 1201; RV64IF-NEXT: ret 1202; 1203; RV32I-LABEL: fnmadd_s_contract: 1204; RV32I: # %bb.0: 1205; RV32I-NEXT: addi sp, sp, -32 1206; RV32I-NEXT: sw ra, 28(sp) # 4-byte Folded Spill 1207; RV32I-NEXT: sw s0, 24(sp) # 4-byte Folded Spill 1208; RV32I-NEXT: sw s1, 20(sp) # 4-byte Folded Spill 1209; RV32I-NEXT: sw s2, 16(sp) # 4-byte Folded Spill 1210; RV32I-NEXT: sw s3, 12(sp) # 4-byte Folded Spill 1211; RV32I-NEXT: mv s2, a2 1212; RV32I-NEXT: mv s1, a1 1213; RV32I-NEXT: li a1, 0 1214; RV32I-NEXT: call __addsf3@plt 1215; RV32I-NEXT: mv s3, a0 1216; RV32I-NEXT: mv a0, s1 1217; RV32I-NEXT: li a1, 0 1218; RV32I-NEXT: call __addsf3@plt 1219; RV32I-NEXT: mv s1, a0 1220; RV32I-NEXT: mv a0, s2 1221; RV32I-NEXT: li a1, 0 1222; RV32I-NEXT: call __addsf3@plt 1223; RV32I-NEXT: mv s0, a0 1224; RV32I-NEXT: mv a0, s3 1225; RV32I-NEXT: mv a1, s1 1226; RV32I-NEXT: call __mulsf3@plt 1227; RV32I-NEXT: lui a1, 524288 1228; RV32I-NEXT: xor a0, a0, a1 1229; RV32I-NEXT: mv a1, s0 1230; RV32I-NEXT: call __subsf3@plt 1231; RV32I-NEXT: lw ra, 28(sp) # 4-byte Folded Reload 1232; RV32I-NEXT: lw s0, 24(sp) # 4-byte Folded Reload 1233; RV32I-NEXT: lw s1, 20(sp) # 4-byte Folded Reload 1234; RV32I-NEXT: lw s2, 16(sp) # 4-byte Folded Reload 1235; RV32I-NEXT: lw s3, 12(sp) # 4-byte Folded Reload 1236; RV32I-NEXT: addi sp, sp, 32 1237; RV32I-NEXT: ret 1238; 1239; RV64I-LABEL: fnmadd_s_contract: 1240; RV64I: # %bb.0: 1241; RV64I-NEXT: addi sp, sp, -48 1242; RV64I-NEXT: sd ra, 40(sp) # 8-byte Folded Spill 1243; RV64I-NEXT: sd s0, 32(sp) # 8-byte Folded Spill 1244; RV64I-NEXT: sd s1, 24(sp) # 8-byte Folded Spill 1245; RV64I-NEXT: sd s2, 16(sp) # 8-byte Folded Spill 1246; RV64I-NEXT: sd s3, 8(sp) # 8-byte Folded Spill 1247; RV64I-NEXT: mv s2, a2 1248; RV64I-NEXT: mv s1, a1 1249; RV64I-NEXT: li a1, 0 1250; RV64I-NEXT: call __addsf3@plt 1251; RV64I-NEXT: mv s3, a0 1252; RV64I-NEXT: mv a0, s1 1253; RV64I-NEXT: li a1, 0 1254; RV64I-NEXT: call __addsf3@plt 1255; RV64I-NEXT: mv s1, a0 1256; RV64I-NEXT: mv a0, s2 1257; RV64I-NEXT: li a1, 0 1258; RV64I-NEXT: call __addsf3@plt 1259; RV64I-NEXT: mv s0, a0 1260; RV64I-NEXT: mv a0, s3 1261; RV64I-NEXT: mv a1, s1 1262; RV64I-NEXT: call __mulsf3@plt 1263; RV64I-NEXT: lui a1, 524288 1264; RV64I-NEXT: xor a0, a0, a1 1265; RV64I-NEXT: mv a1, s0 1266; RV64I-NEXT: call __subsf3@plt 1267; RV64I-NEXT: ld ra, 40(sp) # 8-byte Folded Reload 1268; RV64I-NEXT: ld s0, 32(sp) # 8-byte Folded Reload 1269; RV64I-NEXT: ld s1, 24(sp) # 8-byte Folded Reload 1270; RV64I-NEXT: ld s2, 16(sp) # 8-byte Folded Reload 1271; RV64I-NEXT: ld s3, 8(sp) # 8-byte Folded Reload 1272; RV64I-NEXT: addi sp, sp, 48 1273; RV64I-NEXT: ret 1274 %a_ = fadd float 0.0, %a ; avoid negation using xor 1275 %b_ = fadd float 0.0, %b ; avoid negation using xor 1276 %c_ = fadd float 0.0, %c ; avoid negation using xor 1277 %1 = fmul contract float %a_, %b_ 1278 %2 = fneg float %1 1279 %3 = fsub contract float %2, %c_ 1280 ret float %3 1281} 1282 1283define float @fnmsub_s_contract(float %a, float %b, float %c) nounwind { 1284; RV32IF-LABEL: fnmsub_s_contract: 1285; RV32IF: # %bb.0: 1286; RV32IF-NEXT: fmv.w.x ft0, a2 1287; RV32IF-NEXT: fmv.w.x ft1, a1 1288; RV32IF-NEXT: fmv.w.x ft2, a0 1289; RV32IF-NEXT: fmv.w.x ft3, zero 1290; RV32IF-NEXT: fadd.s ft2, ft2, ft3 1291; RV32IF-NEXT: fadd.s ft1, ft1, ft3 1292; RV32IF-NEXT: fnmsub.s ft0, ft2, ft1, ft0 1293; RV32IF-NEXT: fmv.x.w a0, ft0 1294; RV32IF-NEXT: ret 1295; 1296; RV64IF-LABEL: fnmsub_s_contract: 1297; RV64IF: # %bb.0: 1298; RV64IF-NEXT: fmv.w.x ft0, a2 1299; RV64IF-NEXT: fmv.w.x ft1, a1 1300; RV64IF-NEXT: fmv.w.x ft2, a0 1301; RV64IF-NEXT: fmv.w.x ft3, zero 1302; RV64IF-NEXT: fadd.s ft2, ft2, ft3 1303; RV64IF-NEXT: fadd.s ft1, ft1, ft3 1304; RV64IF-NEXT: fnmsub.s ft0, ft2, ft1, ft0 1305; RV64IF-NEXT: fmv.x.w a0, ft0 1306; RV64IF-NEXT: ret 1307; 1308; RV32I-LABEL: fnmsub_s_contract: 1309; RV32I: # %bb.0: 1310; RV32I-NEXT: addi sp, sp, -16 1311; RV32I-NEXT: sw ra, 12(sp) # 4-byte Folded Spill 1312; RV32I-NEXT: sw s0, 8(sp) # 4-byte Folded Spill 1313; RV32I-NEXT: sw s1, 4(sp) # 4-byte Folded Spill 1314; RV32I-NEXT: sw s2, 0(sp) # 4-byte Folded Spill 1315; RV32I-NEXT: mv s2, a2 1316; RV32I-NEXT: mv s1, a1 1317; RV32I-NEXT: li a1, 0 1318; RV32I-NEXT: call __addsf3@plt 1319; RV32I-NEXT: mv s0, a0 1320; RV32I-NEXT: mv a0, s1 1321; RV32I-NEXT: li a1, 0 1322; RV32I-NEXT: call __addsf3@plt 1323; RV32I-NEXT: mv a1, a0 1324; RV32I-NEXT: mv a0, s0 1325; RV32I-NEXT: call __mulsf3@plt 1326; RV32I-NEXT: mv a1, a0 1327; RV32I-NEXT: mv a0, s2 1328; RV32I-NEXT: call __subsf3@plt 1329; RV32I-NEXT: lw ra, 12(sp) # 4-byte Folded Reload 1330; RV32I-NEXT: lw s0, 8(sp) # 4-byte Folded Reload 1331; RV32I-NEXT: lw s1, 4(sp) # 4-byte Folded Reload 1332; RV32I-NEXT: lw s2, 0(sp) # 4-byte Folded Reload 1333; RV32I-NEXT: addi sp, sp, 16 1334; RV32I-NEXT: ret 1335; 1336; RV64I-LABEL: fnmsub_s_contract: 1337; RV64I: # %bb.0: 1338; RV64I-NEXT: addi sp, sp, -32 1339; RV64I-NEXT: sd ra, 24(sp) # 8-byte Folded Spill 1340; RV64I-NEXT: sd s0, 16(sp) # 8-byte Folded Spill 1341; RV64I-NEXT: sd s1, 8(sp) # 8-byte Folded Spill 1342; RV64I-NEXT: sd s2, 0(sp) # 8-byte Folded Spill 1343; RV64I-NEXT: mv s2, a2 1344; RV64I-NEXT: mv s1, a1 1345; RV64I-NEXT: li a1, 0 1346; RV64I-NEXT: call __addsf3@plt 1347; RV64I-NEXT: mv s0, a0 1348; RV64I-NEXT: mv a0, s1 1349; RV64I-NEXT: li a1, 0 1350; RV64I-NEXT: call __addsf3@plt 1351; RV64I-NEXT: mv a1, a0 1352; RV64I-NEXT: mv a0, s0 1353; RV64I-NEXT: call __mulsf3@plt 1354; RV64I-NEXT: mv a1, a0 1355; RV64I-NEXT: mv a0, s2 1356; RV64I-NEXT: call __subsf3@plt 1357; RV64I-NEXT: ld ra, 24(sp) # 8-byte Folded Reload 1358; RV64I-NEXT: ld s0, 16(sp) # 8-byte Folded Reload 1359; RV64I-NEXT: ld s1, 8(sp) # 8-byte Folded Reload 1360; RV64I-NEXT: ld s2, 0(sp) # 8-byte Folded Reload 1361; RV64I-NEXT: addi sp, sp, 32 1362; RV64I-NEXT: ret 1363 %a_ = fadd float 0.0, %a ; avoid negation using xor 1364 %b_ = fadd float 0.0, %b ; avoid negation using xor 1365 %1 = fmul contract float %a_, %b_ 1366 %2 = fsub contract float %c, %1 1367 ret float %2 1368} 1369