1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefixes=FMA,FMA-INFS 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefixes=FMA4,FMA4-INFS 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefixes=FMA4,FMA4-INFS 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast | FileCheck %s --check-prefixes=AVX512,AVX512-INFS 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefixes=FMA,FMA-NOINFS 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefixes=FMA4,FMA4-NOINFS 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefixes=FMA4,FMA4-NOINFS 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq,+avx512vl -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefixes=AVX512,AVX512-NOINFS 10 11; 12; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z) 13; 14 15define float @test_f32_fmadd(float %a0, float %a1, float %a2) { 16; FMA-LABEL: test_f32_fmadd: 17; FMA: # %bb.0: 18; FMA-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 19; FMA-NEXT: retq 20; 21; FMA4-LABEL: test_f32_fmadd: 22; FMA4: # %bb.0: 23; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 24; FMA4-NEXT: retq 25; 26; AVX512-LABEL: test_f32_fmadd: 27; AVX512: # %bb.0: 28; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 29; AVX512-NEXT: retq 30 %x = fmul float %a0, %a1 31 %res = fadd float %x, %a2 32 ret float %res 33} 34 35define <4 x float> @test_4f32_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 36; FMA-LABEL: test_4f32_fmadd: 37; FMA: # %bb.0: 38; FMA-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 39; FMA-NEXT: retq 40; 41; FMA4-LABEL: test_4f32_fmadd: 42; FMA4: # %bb.0: 43; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 44; FMA4-NEXT: retq 45; 46; AVX512-LABEL: test_4f32_fmadd: 47; AVX512: # %bb.0: 48; AVX512-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 49; AVX512-NEXT: retq 50 %x = fmul <4 x float> %a0, %a1 51 %res = fadd <4 x float> %x, %a2 52 ret <4 x float> %res 53} 54 55define <8 x float> @test_8f32_fmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 56; FMA-LABEL: test_8f32_fmadd: 57; FMA: # %bb.0: 58; FMA-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 59; FMA-NEXT: retq 60; 61; FMA4-LABEL: test_8f32_fmadd: 62; FMA4: # %bb.0: 63; FMA4-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm2 64; FMA4-NEXT: retq 65; 66; AVX512-LABEL: test_8f32_fmadd: 67; AVX512: # %bb.0: 68; AVX512-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 69; AVX512-NEXT: retq 70 %x = fmul <8 x float> %a0, %a1 71 %res = fadd <8 x float> %x, %a2 72 ret <8 x float> %res 73} 74 75define double @test_f64_fmadd(double %a0, double %a1, double %a2) { 76; FMA-LABEL: test_f64_fmadd: 77; FMA: # %bb.0: 78; FMA-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 79; FMA-NEXT: retq 80; 81; FMA4-LABEL: test_f64_fmadd: 82; FMA4: # %bb.0: 83; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 84; FMA4-NEXT: retq 85; 86; AVX512-LABEL: test_f64_fmadd: 87; AVX512: # %bb.0: 88; AVX512-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 89; AVX512-NEXT: retq 90 %x = fmul double %a0, %a1 91 %res = fadd double %x, %a2 92 ret double %res 93} 94 95define <2 x double> @test_2f64_fmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 96; FMA-LABEL: test_2f64_fmadd: 97; FMA: # %bb.0: 98; FMA-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 99; FMA-NEXT: retq 100; 101; FMA4-LABEL: test_2f64_fmadd: 102; FMA4: # %bb.0: 103; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 104; FMA4-NEXT: retq 105; 106; AVX512-LABEL: test_2f64_fmadd: 107; AVX512: # %bb.0: 108; AVX512-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 109; AVX512-NEXT: retq 110 %x = fmul <2 x double> %a0, %a1 111 %res = fadd <2 x double> %x, %a2 112 ret <2 x double> %res 113} 114 115define <4 x double> @test_4f64_fmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 116; FMA-LABEL: test_4f64_fmadd: 117; FMA: # %bb.0: 118; FMA-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 119; FMA-NEXT: retq 120; 121; FMA4-LABEL: test_4f64_fmadd: 122; FMA4: # %bb.0: 123; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm2 124; FMA4-NEXT: retq 125; 126; AVX512-LABEL: test_4f64_fmadd: 127; AVX512: # %bb.0: 128; AVX512-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 129; AVX512-NEXT: retq 130 %x = fmul <4 x double> %a0, %a1 131 %res = fadd <4 x double> %x, %a2 132 ret <4 x double> %res 133} 134 135; 136; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z) 137; 138 139define float @test_f32_fmsub(float %a0, float %a1, float %a2) { 140; FMA-LABEL: test_f32_fmsub: 141; FMA: # %bb.0: 142; FMA-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 143; FMA-NEXT: retq 144; 145; FMA4-LABEL: test_f32_fmsub: 146; FMA4: # %bb.0: 147; FMA4-NEXT: vfmsubss {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 148; FMA4-NEXT: retq 149; 150; AVX512-LABEL: test_f32_fmsub: 151; AVX512: # %bb.0: 152; AVX512-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 153; AVX512-NEXT: retq 154 %x = fmul float %a0, %a1 155 %res = fsub float %x, %a2 156 ret float %res 157} 158 159define <4 x float> @test_4f32_fmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 160; FMA-LABEL: test_4f32_fmsub: 161; FMA: # %bb.0: 162; FMA-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 163; FMA-NEXT: retq 164; 165; FMA4-LABEL: test_4f32_fmsub: 166; FMA4: # %bb.0: 167; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 168; FMA4-NEXT: retq 169; 170; AVX512-LABEL: test_4f32_fmsub: 171; AVX512: # %bb.0: 172; AVX512-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 173; AVX512-NEXT: retq 174 %x = fmul <4 x float> %a0, %a1 175 %res = fsub <4 x float> %x, %a2 176 ret <4 x float> %res 177} 178 179define <8 x float> @test_8f32_fmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 180; FMA-LABEL: test_8f32_fmsub: 181; FMA: # %bb.0: 182; FMA-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 183; FMA-NEXT: retq 184; 185; FMA4-LABEL: test_8f32_fmsub: 186; FMA4: # %bb.0: 187; FMA4-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm1) - ymm2 188; FMA4-NEXT: retq 189; 190; AVX512-LABEL: test_8f32_fmsub: 191; AVX512: # %bb.0: 192; AVX512-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 193; AVX512-NEXT: retq 194 %x = fmul <8 x float> %a0, %a1 195 %res = fsub <8 x float> %x, %a2 196 ret <8 x float> %res 197} 198 199define double @test_f64_fmsub(double %a0, double %a1, double %a2) { 200; FMA-LABEL: test_f64_fmsub: 201; FMA: # %bb.0: 202; FMA-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 203; FMA-NEXT: retq 204; 205; FMA4-LABEL: test_f64_fmsub: 206; FMA4: # %bb.0: 207; FMA4-NEXT: vfmsubsd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 208; FMA4-NEXT: retq 209; 210; AVX512-LABEL: test_f64_fmsub: 211; AVX512: # %bb.0: 212; AVX512-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 213; AVX512-NEXT: retq 214 %x = fmul double %a0, %a1 215 %res = fsub double %x, %a2 216 ret double %res 217} 218 219define <2 x double> @test_2f64_fmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 220; FMA-LABEL: test_2f64_fmsub: 221; FMA: # %bb.0: 222; FMA-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 223; FMA-NEXT: retq 224; 225; FMA4-LABEL: test_2f64_fmsub: 226; FMA4: # %bb.0: 227; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 228; FMA4-NEXT: retq 229; 230; AVX512-LABEL: test_2f64_fmsub: 231; AVX512: # %bb.0: 232; AVX512-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 233; AVX512-NEXT: retq 234 %x = fmul <2 x double> %a0, %a1 235 %res = fsub <2 x double> %x, %a2 236 ret <2 x double> %res 237} 238 239define <4 x double> @test_4f64_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 240; FMA-LABEL: test_4f64_fmsub: 241; FMA: # %bb.0: 242; FMA-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 243; FMA-NEXT: retq 244; 245; FMA4-LABEL: test_4f64_fmsub: 246; FMA4: # %bb.0: 247; FMA4-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm1) - ymm2 248; FMA4-NEXT: retq 249; 250; AVX512-LABEL: test_4f64_fmsub: 251; AVX512: # %bb.0: 252; AVX512-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2 253; AVX512-NEXT: retq 254 %x = fmul <4 x double> %a0, %a1 255 %res = fsub <4 x double> %x, %a2 256 ret <4 x double> %res 257} 258 259; 260; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z) 261; 262 263define float @test_f32_fnmadd(float %a0, float %a1, float %a2) { 264; FMA-LABEL: test_f32_fnmadd: 265; FMA: # %bb.0: 266; FMA-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 267; FMA-NEXT: retq 268; 269; FMA4-LABEL: test_f32_fnmadd: 270; FMA4: # %bb.0: 271; FMA4-NEXT: vfnmaddss {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2 272; FMA4-NEXT: retq 273; 274; AVX512-LABEL: test_f32_fnmadd: 275; AVX512: # %bb.0: 276; AVX512-NEXT: vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 277; AVX512-NEXT: retq 278 %x = fmul float %a0, %a1 279 %res = fsub float %a2, %x 280 ret float %res 281} 282 283define <4 x float> @test_4f32_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 284; FMA-LABEL: test_4f32_fnmadd: 285; FMA: # %bb.0: 286; FMA-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 287; FMA-NEXT: retq 288; 289; FMA4-LABEL: test_4f32_fnmadd: 290; FMA4: # %bb.0: 291; FMA4-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2 292; FMA4-NEXT: retq 293; 294; AVX512-LABEL: test_4f32_fnmadd: 295; AVX512: # %bb.0: 296; AVX512-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 297; AVX512-NEXT: retq 298 %x = fmul <4 x float> %a0, %a1 299 %res = fsub <4 x float> %a2, %x 300 ret <4 x float> %res 301} 302 303define <8 x float> @test_8f32_fnmadd(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 304; FMA-LABEL: test_8f32_fnmadd: 305; FMA: # %bb.0: 306; FMA-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 307; FMA-NEXT: retq 308; 309; FMA4-LABEL: test_8f32_fnmadd: 310; FMA4: # %bb.0: 311; FMA4-NEXT: vfnmaddps {{.*#+}} ymm0 = -(ymm0 * ymm1) + ymm2 312; FMA4-NEXT: retq 313; 314; AVX512-LABEL: test_8f32_fnmadd: 315; AVX512: # %bb.0: 316; AVX512-NEXT: vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 317; AVX512-NEXT: retq 318 %x = fmul <8 x float> %a0, %a1 319 %res = fsub <8 x float> %a2, %x 320 ret <8 x float> %res 321} 322 323define double @test_f64_fnmadd(double %a0, double %a1, double %a2) { 324; FMA-LABEL: test_f64_fnmadd: 325; FMA: # %bb.0: 326; FMA-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 327; FMA-NEXT: retq 328; 329; FMA4-LABEL: test_f64_fnmadd: 330; FMA4: # %bb.0: 331; FMA4-NEXT: vfnmaddsd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2 332; FMA4-NEXT: retq 333; 334; AVX512-LABEL: test_f64_fnmadd: 335; AVX512: # %bb.0: 336; AVX512-NEXT: vfnmadd213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 337; AVX512-NEXT: retq 338 %x = fmul double %a0, %a1 339 %res = fsub double %a2, %x 340 ret double %res 341} 342 343define <2 x double> @test_2f64_fnmadd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 344; FMA-LABEL: test_2f64_fnmadd: 345; FMA: # %bb.0: 346; FMA-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 347; FMA-NEXT: retq 348; 349; FMA4-LABEL: test_2f64_fnmadd: 350; FMA4: # %bb.0: 351; FMA4-NEXT: vfnmaddpd {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm2 352; FMA4-NEXT: retq 353; 354; AVX512-LABEL: test_2f64_fnmadd: 355; AVX512: # %bb.0: 356; AVX512-NEXT: vfnmadd213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2 357; AVX512-NEXT: retq 358 %x = fmul <2 x double> %a0, %a1 359 %res = fsub <2 x double> %a2, %x 360 ret <2 x double> %res 361} 362 363define <4 x double> @test_4f64_fnmadd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 364; FMA-LABEL: test_4f64_fnmadd: 365; FMA: # %bb.0: 366; FMA-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 367; FMA-NEXT: retq 368; 369; FMA4-LABEL: test_4f64_fnmadd: 370; FMA4: # %bb.0: 371; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm1) + ymm2 372; FMA4-NEXT: retq 373; 374; AVX512-LABEL: test_4f64_fnmadd: 375; AVX512: # %bb.0: 376; AVX512-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 377; AVX512-NEXT: retq 378 %x = fmul <4 x double> %a0, %a1 379 %res = fsub <4 x double> %a2, %x 380 ret <4 x double> %res 381} 382 383; 384; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z) 385; 386 387define float @test_f32_fnmsub(float %a0, float %a1, float %a2) { 388; FMA-LABEL: test_f32_fnmsub: 389; FMA: # %bb.0: 390; FMA-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 391; FMA-NEXT: retq 392; 393; FMA4-LABEL: test_f32_fnmsub: 394; FMA4: # %bb.0: 395; FMA4-NEXT: vfnmsubss {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 396; FMA4-NEXT: retq 397; 398; AVX512-LABEL: test_f32_fnmsub: 399; AVX512: # %bb.0: 400; AVX512-NEXT: vfnmsub213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 401; AVX512-NEXT: retq 402 %x = fmul float %a0, %a1 403 %y = fsub float -0.000000e+00, %x 404 %res = fsub float %y, %a2 405 ret float %res 406} 407 408define <4 x float> @test_4f32_fnmsub(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) { 409; FMA-LABEL: test_4f32_fnmsub: 410; FMA: # %bb.0: 411; FMA-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 412; FMA-NEXT: retq 413; 414; FMA4-LABEL: test_4f32_fnmsub: 415; FMA4: # %bb.0: 416; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 417; FMA4-NEXT: retq 418; 419; AVX512-LABEL: test_4f32_fnmsub: 420; AVX512: # %bb.0: 421; AVX512-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 422; AVX512-NEXT: retq 423 %x = fmul <4 x float> %a0, %a1 424 %y = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x 425 %res = fsub <4 x float> %y, %a2 426 ret <4 x float> %res 427} 428 429define <8 x float> @test_8f32_fnmsub(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) { 430; FMA-LABEL: test_8f32_fnmsub: 431; FMA: # %bb.0: 432; FMA-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 433; FMA-NEXT: retq 434; 435; FMA4-LABEL: test_8f32_fnmsub: 436; FMA4: # %bb.0: 437; FMA4-NEXT: vfnmsubps {{.*#+}} ymm0 = -(ymm0 * ymm1) - ymm2 438; FMA4-NEXT: retq 439; 440; AVX512-LABEL: test_8f32_fnmsub: 441; AVX512: # %bb.0: 442; AVX512-NEXT: vfnmsub213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 443; AVX512-NEXT: retq 444 %x = fmul <8 x float> %a0, %a1 445 %y = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x 446 %res = fsub <8 x float> %y, %a2 447 ret <8 x float> %res 448} 449 450define double @test_f64_fnmsub(double %a0, double %a1, double %a2) { 451; FMA-LABEL: test_f64_fnmsub: 452; FMA: # %bb.0: 453; FMA-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 454; FMA-NEXT: retq 455; 456; FMA4-LABEL: test_f64_fnmsub: 457; FMA4: # %bb.0: 458; FMA4-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 459; FMA4-NEXT: retq 460; 461; AVX512-LABEL: test_f64_fnmsub: 462; AVX512: # %bb.0: 463; AVX512-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 464; AVX512-NEXT: retq 465 %x = fmul double %a0, %a1 466 %y = fsub double -0.000000e+00, %x 467 %res = fsub double %y, %a2 468 ret double %res 469} 470 471define <2 x double> @test_2f64_fnmsub(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) { 472; FMA-LABEL: test_2f64_fnmsub: 473; FMA: # %bb.0: 474; FMA-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 475; FMA-NEXT: retq 476; 477; FMA4-LABEL: test_2f64_fnmsub: 478; FMA4: # %bb.0: 479; FMA4-NEXT: vfnmsubpd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 480; FMA4-NEXT: retq 481; 482; AVX512-LABEL: test_2f64_fnmsub: 483; AVX512: # %bb.0: 484; AVX512-NEXT: vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 485; AVX512-NEXT: retq 486 %x = fmul <2 x double> %a0, %a1 487 %y = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %x 488 %res = fsub <2 x double> %y, %a2 489 ret <2 x double> %res 490} 491 492define <4 x double> @test_4f64_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) { 493; FMA-LABEL: test_4f64_fnmsub: 494; FMA: # %bb.0: 495; FMA-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 496; FMA-NEXT: retq 497; 498; FMA4-LABEL: test_4f64_fnmsub: 499; FMA4: # %bb.0: 500; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm1) - ymm2 501; FMA4-NEXT: retq 502; 503; AVX512-LABEL: test_4f64_fnmsub: 504; AVX512: # %bb.0: 505; AVX512-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 506; AVX512-NEXT: retq 507 %x = fmul <4 x double> %a0, %a1 508 %y = fsub <4 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x 509 %res = fsub <4 x double> %y, %a2 510 ret <4 x double> %res 511} 512 513; 514; Load Folding Patterns 515; 516 517define <4 x float> @test_4f32_fmadd_load(ptr %a0, <4 x float> %a1, <4 x float> %a2) { 518; FMA-LABEL: test_4f32_fmadd_load: 519; FMA: # %bb.0: 520; FMA-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 521; FMA-NEXT: retq 522; 523; FMA4-LABEL: test_4f32_fmadd_load: 524; FMA4: # %bb.0: 525; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 526; FMA4-NEXT: retq 527; 528; AVX512-LABEL: test_4f32_fmadd_load: 529; AVX512: # %bb.0: 530; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 531; AVX512-NEXT: retq 532 %x = load <4 x float>, ptr %a0 533 %y = fmul <4 x float> %x, %a1 534 %res = fadd <4 x float> %y, %a2 535 ret <4 x float> %res 536} 537 538define <2 x double> @test_2f64_fmsub_load(ptr %a0, <2 x double> %a1, <2 x double> %a2) { 539; FMA-LABEL: test_2f64_fmsub_load: 540; FMA: # %bb.0: 541; FMA-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 542; FMA-NEXT: retq 543; 544; FMA4-LABEL: test_2f64_fmsub_load: 545; FMA4: # %bb.0: 546; FMA4-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 547; FMA4-NEXT: retq 548; 549; AVX512-LABEL: test_2f64_fmsub_load: 550; AVX512: # %bb.0: 551; AVX512-NEXT: vfmsub132pd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1 552; AVX512-NEXT: retq 553 %x = load <2 x double>, ptr %a0 554 %y = fmul <2 x double> %x, %a1 555 %res = fsub <2 x double> %y, %a2 556 ret <2 x double> %res 557} 558 559; 560; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y) 561; 562 563define <4 x float> @test_v4f32_mul_add_x_one_y(<4 x float> %x, <4 x float> %y) { 564; FMA-INFS-LABEL: test_v4f32_mul_add_x_one_y: 565; FMA-INFS: # %bb.0: 566; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 567; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 568; FMA-INFS-NEXT: retq 569; 570; FMA4-INFS-LABEL: test_v4f32_mul_add_x_one_y: 571; FMA4-INFS: # %bb.0: 572; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 573; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 574; FMA4-INFS-NEXT: retq 575; 576; AVX512-INFS-LABEL: test_v4f32_mul_add_x_one_y: 577; AVX512-INFS: # %bb.0: 578; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 579; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 580; AVX512-INFS-NEXT: retq 581; 582; FMA-NOINFS-LABEL: test_v4f32_mul_add_x_one_y: 583; FMA-NOINFS: # %bb.0: 584; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 585; FMA-NOINFS-NEXT: retq 586; 587; FMA4-NOINFS-LABEL: test_v4f32_mul_add_x_one_y: 588; FMA4-NOINFS: # %bb.0: 589; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 590; FMA4-NOINFS-NEXT: retq 591; 592; AVX512-NOINFS-LABEL: test_v4f32_mul_add_x_one_y: 593; AVX512-NOINFS: # %bb.0: 594; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 595; AVX512-NOINFS-NEXT: retq 596 %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 597 %m = fmul <4 x float> %a, %y 598 ret <4 x float> %m 599} 600 601define <4 x float> @test_v4f32_mul_y_add_x_one(<4 x float> %x, <4 x float> %y) { 602; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_one: 603; FMA-INFS: # %bb.0: 604; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 605; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 606; FMA-INFS-NEXT: retq 607; 608; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_one: 609; FMA4-INFS: # %bb.0: 610; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 611; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 612; FMA4-INFS-NEXT: retq 613; 614; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_one: 615; AVX512-INFS: # %bb.0: 616; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 617; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 618; AVX512-INFS-NEXT: retq 619; 620; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_one: 621; FMA-NOINFS: # %bb.0: 622; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 623; FMA-NOINFS-NEXT: retq 624; 625; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_one: 626; FMA4-NOINFS: # %bb.0: 627; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 628; FMA4-NOINFS-NEXT: retq 629; 630; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_one: 631; AVX512-NOINFS: # %bb.0: 632; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 633; AVX512-NOINFS-NEXT: retq 634 %a = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 635 %m = fmul <4 x float> %y, %a 636 ret <4 x float> %m 637} 638 639define <4 x float> @test_v4f32_mul_y_add_x_one_undefs(<4 x float> %x, <4 x float> %y) { 640; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_one_undefs: 641; FMA-INFS: # %bb.0: 642; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 643; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 644; FMA-INFS-NEXT: retq 645; 646; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_one_undefs: 647; FMA4-INFS: # %bb.0: 648; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 649; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 650; FMA4-INFS-NEXT: retq 651; 652; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_one_undefs: 653; AVX512-INFS: # %bb.0: 654; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 655; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 656; AVX512-INFS-NEXT: retq 657; 658; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_one_undefs: 659; FMA-NOINFS: # %bb.0: 660; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 661; FMA-NOINFS-NEXT: retq 662; 663; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_one_undefs: 664; FMA4-NOINFS: # %bb.0: 665; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 666; FMA4-NOINFS-NEXT: retq 667; 668; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_one_undefs: 669; AVX512-NOINFS: # %bb.0: 670; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 671; AVX512-NOINFS-NEXT: retq 672 %a = fadd <4 x float> %x, <float 1.0, float undef, float 1.0, float undef> 673 %m = fmul <4 x float> %y, %a 674 ret <4 x float> %m 675} 676 677define <4 x float> @test_v4f32_mul_add_x_negone_y(<4 x float> %x, <4 x float> %y) { 678; FMA-INFS-LABEL: test_v4f32_mul_add_x_negone_y: 679; FMA-INFS: # %bb.0: 680; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 681; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 682; FMA-INFS-NEXT: retq 683; 684; FMA4-INFS-LABEL: test_v4f32_mul_add_x_negone_y: 685; FMA4-INFS: # %bb.0: 686; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 687; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 688; FMA4-INFS-NEXT: retq 689; 690; AVX512-INFS-LABEL: test_v4f32_mul_add_x_negone_y: 691; AVX512-INFS: # %bb.0: 692; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 693; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 694; AVX512-INFS-NEXT: retq 695; 696; FMA-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y: 697; FMA-NOINFS: # %bb.0: 698; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 699; FMA-NOINFS-NEXT: retq 700; 701; FMA4-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y: 702; FMA4-NOINFS: # %bb.0: 703; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 704; FMA4-NOINFS-NEXT: retq 705; 706; AVX512-NOINFS-LABEL: test_v4f32_mul_add_x_negone_y: 707; AVX512-NOINFS: # %bb.0: 708; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 709; AVX512-NOINFS-NEXT: retq 710 %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 711 %m = fmul <4 x float> %a, %y 712 ret <4 x float> %m 713} 714 715define <4 x float> @test_v4f32_mul_y_add_x_negone(<4 x float> %x, <4 x float> %y) { 716; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_negone: 717; FMA-INFS: # %bb.0: 718; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 719; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 720; FMA-INFS-NEXT: retq 721; 722; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_negone: 723; FMA4-INFS: # %bb.0: 724; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 725; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 726; FMA4-INFS-NEXT: retq 727; 728; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_negone: 729; AVX512-INFS: # %bb.0: 730; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 731; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 732; AVX512-INFS-NEXT: retq 733; 734; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone: 735; FMA-NOINFS: # %bb.0: 736; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 737; FMA-NOINFS-NEXT: retq 738; 739; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone: 740; FMA4-NOINFS: # %bb.0: 741; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 742; FMA4-NOINFS-NEXT: retq 743; 744; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone: 745; AVX512-NOINFS: # %bb.0: 746; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 747; AVX512-NOINFS-NEXT: retq 748 %a = fadd <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 749 %m = fmul <4 x float> %y, %a 750 ret <4 x float> %m 751} 752 753define <4 x float> @test_v4f32_mul_y_add_x_negone_undefs(<4 x float> %x, <4 x float> %y) { 754; FMA-INFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs: 755; FMA-INFS: # %bb.0: 756; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 757; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 758; FMA-INFS-NEXT: retq 759; 760; FMA4-INFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs: 761; FMA4-INFS: # %bb.0: 762; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 763; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 764; FMA4-INFS-NEXT: retq 765; 766; AVX512-INFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs: 767; AVX512-INFS: # %bb.0: 768; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 769; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 770; AVX512-INFS-NEXT: retq 771; 772; FMA-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs: 773; FMA-NOINFS: # %bb.0: 774; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 775; FMA-NOINFS-NEXT: retq 776; 777; FMA4-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs: 778; FMA4-NOINFS: # %bb.0: 779; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 780; FMA4-NOINFS-NEXT: retq 781; 782; AVX512-NOINFS-LABEL: test_v4f32_mul_y_add_x_negone_undefs: 783; AVX512-NOINFS: # %bb.0: 784; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 785; AVX512-NOINFS-NEXT: retq 786 %a = fadd <4 x float> %x, <float undef, float -1.0, float undef, float -1.0> 787 %m = fmul <4 x float> %y, %a 788 ret <4 x float> %m 789} 790 791define <4 x float> @test_v4f32_mul_sub_one_x_y(<4 x float> %x, <4 x float> %y) { 792; FMA-INFS-LABEL: test_v4f32_mul_sub_one_x_y: 793; FMA-INFS: # %bb.0: 794; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 795; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 796; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 797; FMA-INFS-NEXT: retq 798; 799; FMA4-INFS-LABEL: test_v4f32_mul_sub_one_x_y: 800; FMA4-INFS: # %bb.0: 801; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 802; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 803; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 804; FMA4-INFS-NEXT: retq 805; 806; AVX512-INFS-LABEL: test_v4f32_mul_sub_one_x_y: 807; AVX512-INFS: # %bb.0: 808; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 809; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 810; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 811; AVX512-INFS-NEXT: retq 812; 813; FMA-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y: 814; FMA-NOINFS: # %bb.0: 815; FMA-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 816; FMA-NOINFS-NEXT: retq 817; 818; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y: 819; FMA4-NOINFS: # %bb.0: 820; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1 821; FMA4-NOINFS-NEXT: retq 822; 823; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_one_x_y: 824; AVX512-NOINFS: # %bb.0: 825; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 826; AVX512-NOINFS-NEXT: retq 827 %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x 828 %m = fmul <4 x float> %s, %y 829 ret <4 x float> %m 830} 831 832define <4 x float> @test_v4f32_mul_y_sub_one_x(<4 x float> %x, <4 x float> %y) { 833; FMA-INFS-LABEL: test_v4f32_mul_y_sub_one_x: 834; FMA-INFS: # %bb.0: 835; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 836; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 837; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 838; FMA-INFS-NEXT: retq 839; 840; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_one_x: 841; FMA4-INFS: # %bb.0: 842; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 843; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 844; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 845; FMA4-INFS-NEXT: retq 846; 847; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_one_x: 848; AVX512-INFS: # %bb.0: 849; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 850; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 851; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 852; AVX512-INFS-NEXT: retq 853; 854; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x: 855; FMA-NOINFS: # %bb.0: 856; FMA-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 857; FMA-NOINFS-NEXT: retq 858; 859; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x: 860; FMA4-NOINFS: # %bb.0: 861; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1 862; FMA4-NOINFS-NEXT: retq 863; 864; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x: 865; AVX512-NOINFS: # %bb.0: 866; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 867; AVX512-NOINFS-NEXT: retq 868 %s = fsub <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x 869 %m = fmul <4 x float> %y, %s 870 ret <4 x float> %m 871} 872 873define <4 x float> @test_v4f32_mul_y_sub_one_x_undefs(<4 x float> %x, <4 x float> %y) { 874; FMA-INFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs: 875; FMA-INFS: # %bb.0: 876; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = <1.0E+0,u,1.0E+0,1.0E+0> 877; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 878; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 879; FMA-INFS-NEXT: retq 880; 881; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs: 882; FMA4-INFS: # %bb.0: 883; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = <1.0E+0,u,1.0E+0,1.0E+0> 884; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 885; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 886; FMA4-INFS-NEXT: retq 887; 888; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs: 889; AVX512-INFS: # %bb.0: 890; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 891; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 892; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 893; AVX512-INFS-NEXT: retq 894; 895; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs: 896; FMA-NOINFS: # %bb.0: 897; FMA-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 898; FMA-NOINFS-NEXT: retq 899; 900; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs: 901; FMA4-NOINFS: # %bb.0: 902; FMA4-NOINFS-NEXT: vfnmaddps {{.*#+}} xmm0 = -(xmm0 * xmm1) + xmm1 903; FMA4-NOINFS-NEXT: retq 904; 905; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_one_x_undefs: 906; AVX512-NOINFS: # %bb.0: 907; AVX512-NOINFS-NEXT: vfnmadd213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm1 908; AVX512-NOINFS-NEXT: retq 909 %s = fsub <4 x float> <float 1.0, float undef, float 1.0, float 1.0>, %x 910 %m = fmul <4 x float> %y, %s 911 ret <4 x float> %m 912} 913 914define <4 x float> @test_v4f32_mul_sub_negone_x_y(<4 x float> %x, <4 x float> %y) { 915; FMA-INFS-LABEL: test_v4f32_mul_sub_negone_x_y: 916; FMA-INFS: # %bb.0: 917; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 918; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 919; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 920; FMA-INFS-NEXT: retq 921; 922; FMA4-INFS-LABEL: test_v4f32_mul_sub_negone_x_y: 923; FMA4-INFS: # %bb.0: 924; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 925; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 926; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 927; FMA4-INFS-NEXT: retq 928; 929; AVX512-INFS-LABEL: test_v4f32_mul_sub_negone_x_y: 930; AVX512-INFS: # %bb.0: 931; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 932; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 933; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 934; AVX512-INFS-NEXT: retq 935; 936; FMA-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y: 937; FMA-NOINFS: # %bb.0: 938; FMA-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 939; FMA-NOINFS-NEXT: retq 940; 941; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y: 942; FMA4-NOINFS: # %bb.0: 943; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm1 944; FMA4-NOINFS-NEXT: retq 945; 946; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_negone_x_y: 947; AVX512-NOINFS: # %bb.0: 948; AVX512-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 949; AVX512-NOINFS-NEXT: retq 950 %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x 951 %m = fmul <4 x float> %s, %y 952 ret <4 x float> %m 953} 954 955define <4 x float> @test_v4f32_mul_y_sub_negone_x(<4 x float> %x, <4 x float> %y) { 956; FMA-INFS-LABEL: test_v4f32_mul_y_sub_negone_x: 957; FMA-INFS: # %bb.0: 958; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 959; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 960; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 961; FMA-INFS-NEXT: retq 962; 963; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_negone_x: 964; FMA4-INFS: # %bb.0: 965; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 966; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 967; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 968; FMA4-INFS-NEXT: retq 969; 970; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_negone_x: 971; AVX512-INFS: # %bb.0: 972; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 973; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 974; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 975; AVX512-INFS-NEXT: retq 976; 977; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x: 978; FMA-NOINFS: # %bb.0: 979; FMA-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 980; FMA-NOINFS-NEXT: retq 981; 982; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x: 983; FMA4-NOINFS: # %bb.0: 984; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm1 985; FMA4-NOINFS-NEXT: retq 986; 987; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x: 988; AVX512-NOINFS: # %bb.0: 989; AVX512-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 990; AVX512-NOINFS-NEXT: retq 991 %s = fsub <4 x float> <float -1.0, float -1.0, float -1.0, float -1.0>, %x 992 %m = fmul <4 x float> %y, %s 993 ret <4 x float> %m 994} 995 996define <4 x float> @test_v4f32_mul_y_sub_negone_x_undefs(<4 x float> %x, <4 x float> %y) { 997; FMA-INFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs: 998; FMA-INFS: # %bb.0: 999; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm2 = <-1.0E+0,-1.0E+0,u,-1.0E+0> 1000; FMA-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 1001; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1002; FMA-INFS-NEXT: retq 1003; 1004; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs: 1005; FMA4-INFS: # %bb.0: 1006; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm2 = <-1.0E+0,-1.0E+0,u,-1.0E+0> 1007; FMA4-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 1008; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1009; FMA4-INFS-NEXT: retq 1010; 1011; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs: 1012; AVX512-INFS: # %bb.0: 1013; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm2 = [-1.0E+0,-1.0E+0,-1.0E+0,-1.0E+0] 1014; AVX512-INFS-NEXT: vsubps %xmm0, %xmm2, %xmm0 1015; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1016; AVX512-INFS-NEXT: retq 1017; 1018; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs: 1019; FMA-NOINFS: # %bb.0: 1020; FMA-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 1021; FMA-NOINFS-NEXT: retq 1022; 1023; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs: 1024; FMA4-NOINFS: # %bb.0: 1025; FMA4-NOINFS-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm1 1026; FMA4-NOINFS-NEXT: retq 1027; 1028; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_negone_x_undefs: 1029; AVX512-NOINFS: # %bb.0: 1030; AVX512-NOINFS-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm1 1031; AVX512-NOINFS-NEXT: retq 1032 %s = fsub <4 x float> <float -1.0, float -1.0, float undef, float -1.0>, %x 1033 %m = fmul <4 x float> %y, %s 1034 ret <4 x float> %m 1035} 1036 1037define <4 x float> @test_v4f32_mul_sub_x_one_y(<4 x float> %x, <4 x float> %y) { 1038; FMA-INFS-LABEL: test_v4f32_mul_sub_x_one_y: 1039; FMA-INFS: # %bb.0: 1040; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1041; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 1042; FMA-INFS-NEXT: retq 1043; 1044; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_one_y: 1045; FMA4-INFS: # %bb.0: 1046; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1047; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 1048; FMA4-INFS-NEXT: retq 1049; 1050; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_one_y: 1051; AVX512-INFS: # %bb.0: 1052; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1053; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 1054; AVX512-INFS-NEXT: retq 1055; 1056; FMA-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y: 1057; FMA-NOINFS: # %bb.0: 1058; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 1059; FMA-NOINFS-NEXT: retq 1060; 1061; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y: 1062; FMA4-NOINFS: # %bb.0: 1063; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 1064; FMA4-NOINFS-NEXT: retq 1065; 1066; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_x_one_y: 1067; AVX512-NOINFS: # %bb.0: 1068; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 1069; AVX512-NOINFS-NEXT: retq 1070 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 1071 %m = fmul <4 x float> %s, %y 1072 ret <4 x float> %m 1073} 1074 1075define <4 x float> @test_v4f32_mul_y_sub_x_one(<4 x float> %x, <4 x float> %y) { 1076; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_one: 1077; FMA-INFS: # %bb.0: 1078; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1079; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1080; FMA-INFS-NEXT: retq 1081; 1082; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_one: 1083; FMA4-INFS: # %bb.0: 1084; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1085; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1086; FMA4-INFS-NEXT: retq 1087; 1088; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_one: 1089; AVX512-INFS: # %bb.0: 1090; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1091; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1092; AVX512-INFS-NEXT: retq 1093; 1094; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one: 1095; FMA-NOINFS: # %bb.0: 1096; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 1097; FMA-NOINFS-NEXT: retq 1098; 1099; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one: 1100; FMA4-NOINFS: # %bb.0: 1101; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 1102; FMA4-NOINFS-NEXT: retq 1103; 1104; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one: 1105; AVX512-NOINFS: # %bb.0: 1106; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 1107; AVX512-NOINFS-NEXT: retq 1108 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0> 1109 %m = fmul <4 x float> %y, %s 1110 ret <4 x float> %m 1111} 1112 1113define <4 x float> @test_v4f32_mul_y_sub_x_one_undefs(<4 x float> %x, <4 x float> %y) { 1114; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs: 1115; FMA-INFS: # %bb.0: 1116; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1117; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1118; FMA-INFS-NEXT: retq 1119; 1120; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs: 1121; FMA4-INFS: # %bb.0: 1122; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1123; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1124; FMA4-INFS-NEXT: retq 1125; 1126; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs: 1127; AVX512-INFS: # %bb.0: 1128; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1129; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1130; AVX512-INFS-NEXT: retq 1131; 1132; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs: 1133; FMA-NOINFS: # %bb.0: 1134; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 1135; FMA-NOINFS-NEXT: retq 1136; 1137; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs: 1138; FMA4-NOINFS: # %bb.0: 1139; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm1 1140; FMA4-NOINFS-NEXT: retq 1141; 1142; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_one_undefs: 1143; AVX512-NOINFS: # %bb.0: 1144; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm1 1145; AVX512-NOINFS-NEXT: retq 1146 %s = fsub <4 x float> %x, <float 1.0, float 1.0, float 1.0, float undef> 1147 %m = fmul <4 x float> %y, %s 1148 ret <4 x float> %m 1149} 1150 1151define <4 x float> @test_v4f32_mul_sub_x_negone_y(<4 x float> %x, <4 x float> %y) { 1152; FMA-INFS-LABEL: test_v4f32_mul_sub_x_negone_y: 1153; FMA-INFS: # %bb.0: 1154; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1155; FMA-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 1156; FMA-INFS-NEXT: retq 1157; 1158; FMA4-INFS-LABEL: test_v4f32_mul_sub_x_negone_y: 1159; FMA4-INFS: # %bb.0: 1160; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1161; FMA4-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 1162; FMA4-INFS-NEXT: retq 1163; 1164; AVX512-INFS-LABEL: test_v4f32_mul_sub_x_negone_y: 1165; AVX512-INFS: # %bb.0: 1166; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1167; AVX512-INFS-NEXT: vmulps %xmm1, %xmm0, %xmm0 1168; AVX512-INFS-NEXT: retq 1169; 1170; FMA-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y: 1171; FMA-NOINFS: # %bb.0: 1172; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 1173; FMA-NOINFS-NEXT: retq 1174; 1175; FMA4-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y: 1176; FMA4-NOINFS: # %bb.0: 1177; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 1178; FMA4-NOINFS-NEXT: retq 1179; 1180; AVX512-NOINFS-LABEL: test_v4f32_mul_sub_x_negone_y: 1181; AVX512-NOINFS: # %bb.0: 1182; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 1183; AVX512-NOINFS-NEXT: retq 1184 %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 1185 %m = fmul <4 x float> %s, %y 1186 ret <4 x float> %m 1187} 1188 1189define <4 x float> @test_v4f32_mul_y_sub_x_negone(<4 x float> %x, <4 x float> %y) { 1190; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_negone: 1191; FMA-INFS: # %bb.0: 1192; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1193; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1194; FMA-INFS-NEXT: retq 1195; 1196; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_negone: 1197; FMA4-INFS: # %bb.0: 1198; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1199; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1200; FMA4-INFS-NEXT: retq 1201; 1202; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_negone: 1203; AVX512-INFS: # %bb.0: 1204; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1205; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1206; AVX512-INFS-NEXT: retq 1207; 1208; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone: 1209; FMA-NOINFS: # %bb.0: 1210; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 1211; FMA-NOINFS-NEXT: retq 1212; 1213; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone: 1214; FMA4-NOINFS: # %bb.0: 1215; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 1216; FMA4-NOINFS-NEXT: retq 1217; 1218; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone: 1219; AVX512-NOINFS: # %bb.0: 1220; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 1221; AVX512-NOINFS-NEXT: retq 1222 %s = fsub <4 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0> 1223 %m = fmul <4 x float> %y, %s 1224 ret <4 x float> %m 1225} 1226 1227define <4 x float> @test_v4f32_mul_y_sub_x_negone_undefs(<4 x float> %x, <4 x float> %y) { 1228; FMA-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs: 1229; FMA-INFS: # %bb.0: 1230; FMA-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1231; FMA-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1232; FMA-INFS-NEXT: retq 1233; 1234; FMA4-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs: 1235; FMA4-INFS: # %bb.0: 1236; FMA4-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1237; FMA4-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1238; FMA4-INFS-NEXT: retq 1239; 1240; AVX512-INFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs: 1241; AVX512-INFS: # %bb.0: 1242; AVX512-INFS-NEXT: vaddps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1243; AVX512-INFS-NEXT: vmulps %xmm0, %xmm1, %xmm0 1244; AVX512-INFS-NEXT: retq 1245; 1246; FMA-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs: 1247; FMA-NOINFS: # %bb.0: 1248; FMA-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 1249; FMA-NOINFS-NEXT: retq 1250; 1251; FMA4-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs: 1252; FMA4-NOINFS: # %bb.0: 1253; FMA4-NOINFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm1 1254; FMA4-NOINFS-NEXT: retq 1255; 1256; AVX512-NOINFS-LABEL: test_v4f32_mul_y_sub_x_negone_undefs: 1257; AVX512-NOINFS: # %bb.0: 1258; AVX512-NOINFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm1 1259; AVX512-NOINFS-NEXT: retq 1260 %s = fsub <4 x float> %x, <float undef, float -1.0, float -1.0, float -1.0> 1261 %m = fmul <4 x float> %y, %s 1262 ret <4 x float> %m 1263} 1264 1265; 1266; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y)) 1267; 1268 1269define float @test_f32_interp(float %x, float %y, float %t) { 1270; FMA-INFS-LABEL: test_f32_interp: 1271; FMA-INFS: # %bb.0: 1272; FMA-INFS-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero 1273; FMA-INFS-NEXT: vsubss %xmm2, %xmm3, %xmm3 1274; FMA-INFS-NEXT: vmulss %xmm3, %xmm1, %xmm1 1275; FMA-INFS-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 1276; FMA-INFS-NEXT: retq 1277; 1278; FMA4-INFS-LABEL: test_f32_interp: 1279; FMA4-INFS: # %bb.0: 1280; FMA4-INFS-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero 1281; FMA4-INFS-NEXT: vsubss %xmm2, %xmm3, %xmm3 1282; FMA4-INFS-NEXT: vmulss %xmm3, %xmm1, %xmm1 1283; FMA4-INFS-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 1284; FMA4-INFS-NEXT: retq 1285; 1286; AVX512-INFS-LABEL: test_f32_interp: 1287; AVX512-INFS: # %bb.0: 1288; AVX512-INFS-NEXT: vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero 1289; AVX512-INFS-NEXT: vsubss %xmm2, %xmm3, %xmm3 1290; AVX512-INFS-NEXT: vmulss %xmm3, %xmm1, %xmm1 1291; AVX512-INFS-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 1292; AVX512-INFS-NEXT: retq 1293; 1294; FMA-NOINFS-LABEL: test_f32_interp: 1295; FMA-NOINFS: # %bb.0: 1296; FMA-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 1297; FMA-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 1298; FMA-NOINFS-NEXT: retq 1299; 1300; FMA4-NOINFS-LABEL: test_f32_interp: 1301; FMA4-NOINFS: # %bb.0: 1302; FMA4-NOINFS-NEXT: vfmsubss {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 1303; FMA4-NOINFS-NEXT: vfmsubss {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 1304; FMA4-NOINFS-NEXT: retq 1305; 1306; AVX512-NOINFS-LABEL: test_f32_interp: 1307; AVX512-NOINFS: # %bb.0: 1308; AVX512-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 1309; AVX512-NOINFS-NEXT: vfmsub213ss {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 1310; AVX512-NOINFS-NEXT: retq 1311 %t1 = fsub nsz float 1.0, %t 1312 %tx = fmul nsz float %x, %t 1313 %ty = fmul nsz float %y, %t1 1314 %r = fadd nsz float %tx, %ty 1315 ret float %r 1316} 1317 1318define <4 x float> @test_v4f32_interp(<4 x float> %x, <4 x float> %y, <4 x float> %t) { 1319; FMA-INFS-LABEL: test_v4f32_interp: 1320; FMA-INFS: # %bb.0: 1321; FMA-INFS-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 1322; FMA-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3 1323; FMA-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1 1324; FMA-INFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 1325; FMA-INFS-NEXT: retq 1326; 1327; FMA4-INFS-LABEL: test_v4f32_interp: 1328; FMA4-INFS: # %bb.0: 1329; FMA4-INFS-NEXT: vmovaps {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 1330; FMA4-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3 1331; FMA4-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1 1332; FMA4-INFS-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 1333; FMA4-INFS-NEXT: retq 1334; 1335; AVX512-INFS-LABEL: test_v4f32_interp: 1336; AVX512-INFS: # %bb.0: 1337; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} xmm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 1338; AVX512-INFS-NEXT: vsubps %xmm2, %xmm3, %xmm3 1339; AVX512-INFS-NEXT: vmulps %xmm3, %xmm1, %xmm1 1340; AVX512-INFS-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 1341; AVX512-INFS-NEXT: retq 1342; 1343; FMA-NOINFS-LABEL: test_v4f32_interp: 1344; FMA-NOINFS: # %bb.0: 1345; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 1346; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 1347; FMA-NOINFS-NEXT: retq 1348; 1349; FMA4-NOINFS-LABEL: test_v4f32_interp: 1350; FMA4-NOINFS: # %bb.0: 1351; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 1352; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 1353; FMA4-NOINFS-NEXT: retq 1354; 1355; AVX512-NOINFS-LABEL: test_v4f32_interp: 1356; AVX512-NOINFS: # %bb.0: 1357; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 1358; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 1359; AVX512-NOINFS-NEXT: retq 1360 %t1 = fsub nsz <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %t 1361 %tx = fmul nsz <4 x float> %x, %t 1362 %ty = fmul nsz <4 x float> %y, %t1 1363 %r = fadd nsz <4 x float> %tx, %ty 1364 ret <4 x float> %r 1365} 1366 1367define <8 x float> @test_v8f32_interp(<8 x float> %x, <8 x float> %y, <8 x float> %t) { 1368; FMA-INFS-LABEL: test_v8f32_interp: 1369; FMA-INFS: # %bb.0: 1370; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 1371; FMA-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3 1372; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 1373; FMA-INFS-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1 1374; FMA-INFS-NEXT: retq 1375; 1376; FMA4-INFS-LABEL: test_v8f32_interp: 1377; FMA4-INFS: # %bb.0: 1378; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 1379; FMA4-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3 1380; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 1381; FMA4-INFS-NEXT: vfmaddps {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 1382; FMA4-INFS-NEXT: retq 1383; 1384; AVX512-INFS-LABEL: test_v8f32_interp: 1385; AVX512-INFS: # %bb.0: 1386; AVX512-INFS-NEXT: vbroadcastss {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0] 1387; AVX512-INFS-NEXT: vsubps %ymm2, %ymm3, %ymm3 1388; AVX512-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 1389; AVX512-INFS-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1 1390; AVX512-INFS-NEXT: retq 1391; 1392; FMA-NOINFS-LABEL: test_v8f32_interp: 1393; FMA-NOINFS: # %bb.0: 1394; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 1395; FMA-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1 1396; FMA-NOINFS-NEXT: retq 1397; 1398; FMA4-NOINFS-LABEL: test_v8f32_interp: 1399; FMA4-NOINFS: # %bb.0: 1400; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 1401; FMA4-NOINFS-NEXT: vfmsubps {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 1402; FMA4-NOINFS-NEXT: retq 1403; 1404; AVX512-NOINFS-LABEL: test_v8f32_interp: 1405; AVX512-NOINFS: # %bb.0: 1406; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 1407; AVX512-NOINFS-NEXT: vfmsub213ps {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1 1408; AVX512-NOINFS-NEXT: retq 1409 %t1 = fsub nsz <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t 1410 %tx = fmul nsz <8 x float> %x, %t 1411 %ty = fmul nsz <8 x float> %y, %t1 1412 %r = fadd nsz <8 x float> %tx, %ty 1413 ret <8 x float> %r 1414} 1415 1416define double @test_f64_interp(double %x, double %y, double %t) { 1417; FMA-INFS-LABEL: test_f64_interp: 1418; FMA-INFS: # %bb.0: 1419; FMA-INFS-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero 1420; FMA-INFS-NEXT: vsubsd %xmm2, %xmm3, %xmm3 1421; FMA-INFS-NEXT: vmulsd %xmm3, %xmm1, %xmm1 1422; FMA-INFS-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 1423; FMA-INFS-NEXT: retq 1424; 1425; FMA4-INFS-LABEL: test_f64_interp: 1426; FMA4-INFS: # %bb.0: 1427; FMA4-INFS-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero 1428; FMA4-INFS-NEXT: vsubsd %xmm2, %xmm3, %xmm3 1429; FMA4-INFS-NEXT: vmulsd %xmm3, %xmm1, %xmm1 1430; FMA4-INFS-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 1431; FMA4-INFS-NEXT: retq 1432; 1433; AVX512-INFS-LABEL: test_f64_interp: 1434; AVX512-INFS: # %bb.0: 1435; AVX512-INFS-NEXT: vmovsd {{.*#+}} xmm3 = mem[0],zero 1436; AVX512-INFS-NEXT: vsubsd %xmm2, %xmm3, %xmm3 1437; AVX512-INFS-NEXT: vmulsd %xmm3, %xmm1, %xmm1 1438; AVX512-INFS-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 1439; AVX512-INFS-NEXT: retq 1440; 1441; FMA-NOINFS-LABEL: test_f64_interp: 1442; FMA-NOINFS: # %bb.0: 1443; FMA-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 1444; FMA-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 1445; FMA-NOINFS-NEXT: retq 1446; 1447; FMA4-NOINFS-LABEL: test_f64_interp: 1448; FMA4-NOINFS: # %bb.0: 1449; FMA4-NOINFS-NEXT: vfmsubsd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 1450; FMA4-NOINFS-NEXT: vfmsubsd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 1451; FMA4-NOINFS-NEXT: retq 1452; 1453; AVX512-NOINFS-LABEL: test_f64_interp: 1454; AVX512-NOINFS: # %bb.0: 1455; AVX512-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 1456; AVX512-NOINFS-NEXT: vfmsub213sd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 1457; AVX512-NOINFS-NEXT: retq 1458 %t1 = fsub nsz double 1.0, %t 1459 %tx = fmul nsz double %x, %t 1460 %ty = fmul nsz double %y, %t1 1461 %r = fadd nsz double %tx, %ty 1462 ret double %r 1463} 1464 1465define <2 x double> @test_v2f64_interp(<2 x double> %x, <2 x double> %y, <2 x double> %t) { 1466; FMA-INFS-LABEL: test_v2f64_interp: 1467; FMA-INFS: # %bb.0: 1468; FMA-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1.0E+0,1.0E+0] 1469; FMA-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3 1470; FMA-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1 1471; FMA-INFS-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 1472; FMA-INFS-NEXT: retq 1473; 1474; FMA4-INFS-LABEL: test_v2f64_interp: 1475; FMA4-INFS: # %bb.0: 1476; FMA4-INFS-NEXT: vmovapd {{.*#+}} xmm3 = [1.0E+0,1.0E+0] 1477; FMA4-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3 1478; FMA4-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1 1479; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm2) + xmm1 1480; FMA4-INFS-NEXT: retq 1481; 1482; AVX512-INFS-LABEL: test_v2f64_interp: 1483; AVX512-INFS: # %bb.0: 1484; AVX512-INFS-NEXT: vmovddup {{.*#+}} xmm3 = [1.0E+0,1.0E+0] 1485; AVX512-INFS-NEXT: # xmm3 = mem[0,0] 1486; AVX512-INFS-NEXT: vsubpd %xmm2, %xmm3, %xmm3 1487; AVX512-INFS-NEXT: vmulpd %xmm3, %xmm1, %xmm1 1488; AVX512-INFS-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm2 * xmm0) + xmm1 1489; AVX512-INFS-NEXT: retq 1490; 1491; FMA-NOINFS-LABEL: test_v2f64_interp: 1492; FMA-NOINFS: # %bb.0: 1493; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 1494; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 1495; FMA-NOINFS-NEXT: retq 1496; 1497; FMA4-NOINFS-LABEL: test_v2f64_interp: 1498; FMA4-NOINFS: # %bb.0: 1499; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 1500; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} xmm0 = (xmm0 * xmm2) - xmm1 1501; FMA4-NOINFS-NEXT: retq 1502; 1503; AVX512-NOINFS-LABEL: test_v2f64_interp: 1504; AVX512-NOINFS: # %bb.0: 1505; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm1 = (xmm2 * xmm1) - xmm1 1506; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} xmm0 = (xmm2 * xmm0) - xmm1 1507; AVX512-NOINFS-NEXT: retq 1508 %t1 = fsub nsz <2 x double> <double 1.0, double 1.0>, %t 1509 %tx = fmul nsz <2 x double> %x, %t 1510 %ty = fmul nsz <2 x double> %y, %t1 1511 %r = fadd nsz <2 x double> %tx, %ty 1512 ret <2 x double> %r 1513} 1514 1515define <4 x double> @test_v4f64_interp(<4 x double> %x, <4 x double> %y, <4 x double> %t) { 1516; FMA-INFS-LABEL: test_v4f64_interp: 1517; FMA-INFS: # %bb.0: 1518; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 1519; FMA-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3 1520; FMA-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1 1521; FMA-INFS-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1 1522; FMA-INFS-NEXT: retq 1523; 1524; FMA4-INFS-LABEL: test_v4f64_interp: 1525; FMA4-INFS: # %bb.0: 1526; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 1527; FMA4-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3 1528; FMA4-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1 1529; FMA4-INFS-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm2) + ymm1 1530; FMA4-INFS-NEXT: retq 1531; 1532; AVX512-INFS-LABEL: test_v4f64_interp: 1533; AVX512-INFS: # %bb.0: 1534; AVX512-INFS-NEXT: vbroadcastsd {{.*#+}} ymm3 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0] 1535; AVX512-INFS-NEXT: vsubpd %ymm2, %ymm3, %ymm3 1536; AVX512-INFS-NEXT: vmulpd %ymm3, %ymm1, %ymm1 1537; AVX512-INFS-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1 1538; AVX512-INFS-NEXT: retq 1539; 1540; FMA-NOINFS-LABEL: test_v4f64_interp: 1541; FMA-NOINFS: # %bb.0: 1542; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 1543; FMA-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1 1544; FMA-NOINFS-NEXT: retq 1545; 1546; FMA4-NOINFS-LABEL: test_v4f64_interp: 1547; FMA4-NOINFS: # %bb.0: 1548; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 1549; FMA4-NOINFS-NEXT: vfmsubpd {{.*#+}} ymm0 = (ymm0 * ymm2) - ymm1 1550; FMA4-NOINFS-NEXT: retq 1551; 1552; AVX512-NOINFS-LABEL: test_v4f64_interp: 1553; AVX512-NOINFS: # %bb.0: 1554; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm1 = (ymm2 * ymm1) - ymm1 1555; AVX512-NOINFS-NEXT: vfmsub213pd {{.*#+}} ymm0 = (ymm2 * ymm0) - ymm1 1556; AVX512-NOINFS-NEXT: retq 1557 %t1 = fsub nsz <4 x double> <double 1.0, double 1.0, double 1.0, double 1.0>, %t 1558 %tx = fmul nsz <4 x double> %x, %t 1559 %ty = fmul nsz <4 x double> %y, %t1 1560 %r = fadd nsz <4 x double> %tx, %ty 1561 ret <4 x double> %r 1562} 1563 1564; 1565; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z) 1566; 1567 1568define <4 x float> @test_v4f32_fneg_fmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 1569; FMA-LABEL: test_v4f32_fneg_fmadd: 1570; FMA: # %bb.0: 1571; FMA-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 1572; FMA-NEXT: retq 1573; 1574; FMA4-LABEL: test_v4f32_fneg_fmadd: 1575; FMA4: # %bb.0: 1576; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 1577; FMA4-NEXT: retq 1578; 1579; AVX512-LABEL: test_v4f32_fneg_fmadd: 1580; AVX512: # %bb.0: 1581; AVX512-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 1582; AVX512-NEXT: retq 1583 %mul = fmul nsz <4 x float> %a0, %a1 1584 %add = fadd nsz <4 x float> %mul, %a2 1585 %neg = fsub nsz <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add 1586 ret <4 x float> %neg 1587} 1588 1589define <4 x double> @test_v4f64_fneg_fmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 1590; FMA-LABEL: test_v4f64_fneg_fmsub: 1591; FMA: # %bb.0: 1592; FMA-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 1593; FMA-NEXT: retq 1594; 1595; FMA4-LABEL: test_v4f64_fneg_fmsub: 1596; FMA4: # %bb.0: 1597; FMA4-NEXT: vfnmaddpd {{.*#+}} ymm0 = -(ymm0 * ymm1) + ymm2 1598; FMA4-NEXT: retq 1599; 1600; AVX512-LABEL: test_v4f64_fneg_fmsub: 1601; AVX512: # %bb.0: 1602; AVX512-NEXT: vfnmadd213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2 1603; AVX512-NEXT: retq 1604 %mul = fmul nsz <4 x double> %a0, %a1 1605 %sub = fsub nsz <4 x double> %mul, %a2 1606 %neg = fsub nsz <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub 1607 ret <4 x double> %neg 1608} 1609 1610define <4 x float> @test_v4f32_fneg_fnmadd(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) #0 { 1611; FMA-LABEL: test_v4f32_fneg_fnmadd: 1612; FMA: # %bb.0: 1613; FMA-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 1614; FMA-NEXT: retq 1615; 1616; FMA4-LABEL: test_v4f32_fneg_fnmadd: 1617; FMA4: # %bb.0: 1618; FMA4-NEXT: vfmsubps {{.*#+}} xmm0 = (xmm0 * xmm1) - xmm2 1619; FMA4-NEXT: retq 1620; 1621; AVX512-LABEL: test_v4f32_fneg_fnmadd: 1622; AVX512: # %bb.0: 1623; AVX512-NEXT: vfmsub213ps {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2 1624; AVX512-NEXT: retq 1625 %mul = fmul nsz <4 x float> %a0, %a1 1626 %neg0 = fsub nsz <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %mul 1627 %add = fadd nsz <4 x float> %neg0, %a2 1628 %neg1 = fsub nsz <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %add 1629 ret <4 x float> %neg1 1630} 1631 1632define <4 x double> @test_v4f64_fneg_fnmsub(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) #0 { 1633; FMA-LABEL: test_v4f64_fneg_fnmsub: 1634; FMA: # %bb.0: 1635; FMA-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 1636; FMA-NEXT: retq 1637; 1638; FMA4-LABEL: test_v4f64_fneg_fnmsub: 1639; FMA4: # %bb.0: 1640; FMA4-NEXT: vfmaddpd {{.*#+}} ymm0 = (ymm0 * ymm1) + ymm2 1641; FMA4-NEXT: retq 1642; 1643; AVX512-LABEL: test_v4f64_fneg_fnmsub: 1644; AVX512: # %bb.0: 1645; AVX512-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2 1646; AVX512-NEXT: retq 1647 %mul = fmul nsz <4 x double> %a0, %a1 1648 %neg0 = fsub nsz <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %mul 1649 %sub = fsub nsz <4 x double> %neg0, %a2 1650 %neg1 = fsub nsz <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %sub 1651 ret <4 x double> %neg1 1652} 1653 1654; 1655; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) 1656; 1657 1658define <4 x float> @test_v4f32_fma_x_c1_fmul_x_c2(<4 x float> %x) #0 { 1659; FMA-LABEL: test_v4f32_fma_x_c1_fmul_x_c2: 1660; FMA: # %bb.0: 1661; FMA-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1662; FMA-NEXT: retq 1663; 1664; FMA4-LABEL: test_v4f32_fma_x_c1_fmul_x_c2: 1665; FMA4: # %bb.0: 1666; FMA4-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0 1667; FMA4-NEXT: retq 1668; 1669; AVX512-LABEL: test_v4f32_fma_x_c1_fmul_x_c2: 1670; AVX512: # %bb.0: 1671; AVX512-NEXT: vmulps {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %xmm0, %xmm0 1672; AVX512-NEXT: retq 1673 %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0> 1674 %m1 = fmul <4 x float> %x, <float 4.0, float 3.0, float 2.0, float 1.0> 1675 %a = fadd <4 x float> %m0, %m1 1676 ret <4 x float> %a 1677} 1678 1679; 1680; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) 1681; 1682 1683define <4 x float> @test_v4f32_fma_fmul_x_c1_c2_y(<4 x float> %x, <4 x float> %y) #0 { 1684; FMA-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: 1685; FMA: # %bb.0: 1686; FMA-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 1687; FMA-NEXT: retq 1688; 1689; FMA4-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: 1690; FMA4: # %bb.0: 1691; FMA4-NEXT: vfmaddps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 1692; FMA4-NEXT: retq 1693; 1694; AVX512-LABEL: test_v4f32_fma_fmul_x_c1_c2_y: 1695; AVX512: # %bb.0: 1696; AVX512-NEXT: vfmadd132ps {{.*#+}} xmm0 = (xmm0 * mem) + xmm1 1697; AVX512-NEXT: retq 1698 %m0 = fmul <4 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0> 1699 %m1 = fmul <4 x float> %m0, <float 4.0, float 3.0, float 2.0, float 1.0> 1700 %a = fadd <4 x float> %m1, %y 1701 ret <4 x float> %a 1702} 1703 1704; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0) 1705 1706define double @test_f64_fneg_fmul(double %x, double %y) #0 { 1707; FMA-LABEL: test_f64_fneg_fmul: 1708; FMA: # %bb.0: 1709; FMA-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1710; FMA-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 1711; FMA-NEXT: retq 1712; 1713; FMA4-LABEL: test_f64_fneg_fmul: 1714; FMA4: # %bb.0: 1715; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1716; FMA4-NEXT: vfnmsubsd {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 1717; FMA4-NEXT: retq 1718; 1719; AVX512-LABEL: test_f64_fneg_fmul: 1720; AVX512: # %bb.0: 1721; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1722; AVX512-NEXT: vfnmsub213sd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 1723; AVX512-NEXT: retq 1724 %m = fmul nsz double %x, %y 1725 %n = fsub double -0.0, %m 1726 ret double %n 1727} 1728 1729define <4 x float> @test_v4f32_fneg_fmul(<4 x float> %x, <4 x float> %y) #0 { 1730; FMA-LABEL: test_v4f32_fneg_fmul: 1731; FMA: # %bb.0: 1732; FMA-NEXT: vxorps %xmm2, %xmm2, %xmm2 1733; FMA-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 1734; FMA-NEXT: retq 1735; 1736; FMA4-LABEL: test_v4f32_fneg_fmul: 1737; FMA4: # %bb.0: 1738; FMA4-NEXT: vxorps %xmm2, %xmm2, %xmm2 1739; FMA4-NEXT: vfnmsubps {{.*#+}} xmm0 = -(xmm0 * xmm1) - xmm2 1740; FMA4-NEXT: retq 1741; 1742; AVX512-LABEL: test_v4f32_fneg_fmul: 1743; AVX512: # %bb.0: 1744; AVX512-NEXT: vxorps %xmm2, %xmm2, %xmm2 1745; AVX512-NEXT: vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2 1746; AVX512-NEXT: retq 1747 %m = fmul nsz <4 x float> %x, %y 1748 %n = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %m 1749 ret <4 x float> %n 1750} 1751 1752define <4 x double> @test_v4f64_fneg_fmul(<4 x double> %x, <4 x double> %y) #0 { 1753; FMA-LABEL: test_v4f64_fneg_fmul: 1754; FMA: # %bb.0: 1755; FMA-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1756; FMA-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 1757; FMA-NEXT: retq 1758; 1759; FMA4-LABEL: test_v4f64_fneg_fmul: 1760; FMA4: # %bb.0: 1761; FMA4-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1762; FMA4-NEXT: vfnmsubpd {{.*#+}} ymm0 = -(ymm0 * ymm1) - ymm2 1763; FMA4-NEXT: retq 1764; 1765; AVX512-LABEL: test_v4f64_fneg_fmul: 1766; AVX512: # %bb.0: 1767; AVX512-NEXT: vxorpd %xmm2, %xmm2, %xmm2 1768; AVX512-NEXT: vfnmsub213pd {{.*#+}} ymm0 = -(ymm1 * ymm0) - ymm2 1769; AVX512-NEXT: retq 1770 %m = fmul nsz <4 x double> %x, %y 1771 %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m 1772 ret <4 x double> %n 1773} 1774 1775define <4 x double> @test_v4f64_fneg_fmul_no_nsz(<4 x double> %x, <4 x double> %y) #0 { 1776; FMA-LABEL: test_v4f64_fneg_fmul_no_nsz: 1777; FMA: # %bb.0: 1778; FMA-NEXT: vmulpd %ymm1, %ymm0, %ymm0 1779; FMA-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1780; FMA-NEXT: retq 1781; 1782; FMA4-LABEL: test_v4f64_fneg_fmul_no_nsz: 1783; FMA4: # %bb.0: 1784; FMA4-NEXT: vmulpd %ymm1, %ymm0, %ymm0 1785; FMA4-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm0, %ymm0 1786; FMA4-NEXT: retq 1787; 1788; AVX512-LABEL: test_v4f64_fneg_fmul_no_nsz: 1789; AVX512: # %bb.0: 1790; AVX512-NEXT: vmulpd %ymm1, %ymm0, %ymm0 1791; AVX512-NEXT: vxorpd {{\.?LCPI[0-9]+_[0-9]+}}(%rip){1to4}, %ymm0, %ymm0 1792; AVX512-NEXT: retq 1793 %m = fmul <4 x double> %x, %y 1794 %n = fsub <4 x double> <double -0.0, double -0.0, double -0.0, double -0.0>, %m 1795 ret <4 x double> %n 1796} 1797 1798; ((a*b) + (c*d)) + n1 --> (a*b) + ((c*d) + n1) 1799 1800define double @fadd_fma_fmul_1(double %a, double %b, double %c, double %d, double %n1) nounwind { 1801; FMA-LABEL: fadd_fma_fmul_1: 1802; FMA: # %bb.0: 1803; FMA-NEXT: vfmadd213sd {{.*#+}} xmm2 = (xmm3 * xmm2) + xmm4 1804; FMA-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 1805; FMA-NEXT: retq 1806; 1807; FMA4-LABEL: fadd_fma_fmul_1: 1808; FMA4: # %bb.0: 1809; FMA4-NEXT: vfmaddsd {{.*#+}} xmm2 = (xmm2 * xmm3) + xmm4 1810; FMA4-NEXT: vfmaddsd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 1811; FMA4-NEXT: retq 1812; 1813; AVX512-LABEL: fadd_fma_fmul_1: 1814; AVX512: # %bb.0: 1815; AVX512-NEXT: vfmadd213sd {{.*#+}} xmm2 = (xmm3 * xmm2) + xmm4 1816; AVX512-NEXT: vfmadd213sd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 1817; AVX512-NEXT: retq 1818 %m1 = fmul fast double %a, %b 1819 %m2 = fmul fast double %c, %d 1820 %a1 = fadd fast double %m1, %m2 1821 %a2 = fadd fast double %a1, %n1 1822 ret double %a2 1823} 1824 1825; Minimum FMF - the 1st fadd is contracted because that combines 1826; fmul+fadd as specified by the order of operations; the 2nd fadd 1827; requires reassociation to fuse with c*d. 1828 1829define float @fadd_fma_fmul_fmf(float %a, float %b, float %c, float %d, float %n0) nounwind { 1830; FMA-LABEL: fadd_fma_fmul_fmf: 1831; FMA: # %bb.0: 1832; FMA-NEXT: vfmadd213ss {{.*#+}} xmm2 = (xmm3 * xmm2) + xmm4 1833; FMA-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 1834; FMA-NEXT: retq 1835; 1836; FMA4-LABEL: fadd_fma_fmul_fmf: 1837; FMA4: # %bb.0: 1838; FMA4-NEXT: vfmaddss {{.*#+}} xmm2 = (xmm2 * xmm3) + xmm4 1839; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 1840; FMA4-NEXT: retq 1841; 1842; AVX512-LABEL: fadd_fma_fmul_fmf: 1843; AVX512: # %bb.0: 1844; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm2 = (xmm3 * xmm2) + xmm4 1845; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 1846; AVX512-NEXT: retq 1847 %m1 = fmul float %a, %b 1848 %m2 = fmul float %c, %d 1849 %a1 = fadd contract float %m1, %m2 1850 %a2 = fadd reassoc float %n0, %a1 1851 ret float %a2 1852} 1853 1854; Not minimum FMF. 1855 1856define float @fadd_fma_fmul_2(float %a, float %b, float %c, float %d, float %n0) nounwind { 1857; FMA-LABEL: fadd_fma_fmul_2: 1858; FMA: # %bb.0: 1859; FMA-NEXT: vmulss %xmm3, %xmm2, %xmm2 1860; FMA-NEXT: vfmadd231ss {{.*#+}} xmm2 = (xmm1 * xmm0) + xmm2 1861; FMA-NEXT: vaddss %xmm2, %xmm4, %xmm0 1862; FMA-NEXT: retq 1863; 1864; FMA4-LABEL: fadd_fma_fmul_2: 1865; FMA4: # %bb.0: 1866; FMA4-NEXT: vmulss %xmm3, %xmm2, %xmm2 1867; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 1868; FMA4-NEXT: vaddss %xmm0, %xmm4, %xmm0 1869; FMA4-NEXT: retq 1870; 1871; AVX512-LABEL: fadd_fma_fmul_2: 1872; AVX512: # %bb.0: 1873; AVX512-NEXT: vmulss %xmm3, %xmm2, %xmm2 1874; AVX512-NEXT: vfmadd231ss {{.*#+}} xmm2 = (xmm1 * xmm0) + xmm2 1875; AVX512-NEXT: vaddss %xmm2, %xmm4, %xmm0 1876; AVX512-NEXT: retq 1877 %m1 = fmul float %a, %b 1878 %m2 = fmul float %c, %d 1879 %a1 = fadd contract float %m1, %m2 1880 %a2 = fadd contract float %n0, %a1 1881 ret float %a2 1882} 1883 1884; The final fadd can be folded with either 1 of the leading fmuls. 1885 1886define <2 x double> @fadd_fma_fmul_3(<2 x double> %x1, <2 x double> %x2, <2 x double> %x3, <2 x double> %x4, <2 x double> %x5, <2 x double> %x6, <2 x double> %x7, <2 x double> %x8) nounwind { 1887; FMA-LABEL: fadd_fma_fmul_3: 1888; FMA: # %bb.0: 1889; FMA-NEXT: vmulpd %xmm3, %xmm2, %xmm2 1890; FMA-NEXT: vfmadd231pd {{.*#+}} xmm2 = (xmm1 * xmm0) + xmm2 1891; FMA-NEXT: vfmadd231pd {{.*#+}} xmm2 = (xmm7 * xmm6) + xmm2 1892; FMA-NEXT: vfmadd231pd {{.*#+}} xmm2 = (xmm5 * xmm4) + xmm2 1893; FMA-NEXT: vmovapd %xmm2, %xmm0 1894; FMA-NEXT: retq 1895; 1896; FMA4-LABEL: fadd_fma_fmul_3: 1897; FMA4: # %bb.0: 1898; FMA4-NEXT: vmulpd %xmm3, %xmm2, %xmm2 1899; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 1900; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm6 * xmm7) + xmm0 1901; FMA4-NEXT: vfmaddpd {{.*#+}} xmm0 = (xmm4 * xmm5) + xmm0 1902; FMA4-NEXT: retq 1903; 1904; AVX512-LABEL: fadd_fma_fmul_3: 1905; AVX512: # %bb.0: 1906; AVX512-NEXT: vmulpd %xmm3, %xmm2, %xmm2 1907; AVX512-NEXT: vfmadd231pd {{.*#+}} xmm2 = (xmm1 * xmm0) + xmm2 1908; AVX512-NEXT: vfmadd231pd {{.*#+}} xmm2 = (xmm7 * xmm6) + xmm2 1909; AVX512-NEXT: vfmadd231pd {{.*#+}} xmm2 = (xmm5 * xmm4) + xmm2 1910; AVX512-NEXT: vmovapd %xmm2, %xmm0 1911; AVX512-NEXT: retq 1912 %m1 = fmul fast <2 x double> %x1, %x2 1913 %m2 = fmul fast <2 x double> %x3, %x4 1914 %m3 = fmul fast <2 x double> %x5, %x6 1915 %m4 = fmul fast <2 x double> %x7, %x8 1916 %a1 = fadd fast <2 x double> %m1, %m2 1917 %a2 = fadd fast <2 x double> %m3, %m4 1918 %a3 = fadd fast <2 x double> %a1, %a2 1919 ret <2 x double> %a3 1920} 1921 1922; negative test 1923 1924define float @fadd_fma_fmul_extra_use_1(float %a, float %b, float %c, float %d, float %n0, ptr %p) nounwind { 1925; FMA-LABEL: fadd_fma_fmul_extra_use_1: 1926; FMA: # %bb.0: 1927; FMA-NEXT: vmulss %xmm1, %xmm0, %xmm0 1928; FMA-NEXT: vmovss %xmm0, (%rdi) 1929; FMA-NEXT: vfmadd213ss {{.*#+}} xmm2 = (xmm3 * xmm2) + xmm0 1930; FMA-NEXT: vaddss %xmm2, %xmm4, %xmm0 1931; FMA-NEXT: retq 1932; 1933; FMA4-LABEL: fadd_fma_fmul_extra_use_1: 1934; FMA4: # %bb.0: 1935; FMA4-NEXT: vmulss %xmm1, %xmm0, %xmm0 1936; FMA4-NEXT: vmovss %xmm0, (%rdi) 1937; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm2 * xmm3) + xmm0 1938; FMA4-NEXT: vaddss %xmm0, %xmm4, %xmm0 1939; FMA4-NEXT: retq 1940; 1941; AVX512-LABEL: fadd_fma_fmul_extra_use_1: 1942; AVX512: # %bb.0: 1943; AVX512-NEXT: vmulss %xmm1, %xmm0, %xmm0 1944; AVX512-NEXT: vmovss %xmm0, (%rdi) 1945; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm2 = (xmm3 * xmm2) + xmm0 1946; AVX512-NEXT: vaddss %xmm2, %xmm4, %xmm0 1947; AVX512-NEXT: retq 1948 %m1 = fmul fast float %a, %b 1949 store float %m1, ptr %p 1950 %m2 = fmul fast float %c, %d 1951 %a1 = fadd fast float %m1, %m2 1952 %a2 = fadd fast float %n0, %a1 1953 ret float %a2 1954} 1955 1956; negative test 1957 1958define float @fadd_fma_fmul_extra_use_2(float %a, float %b, float %c, float %d, float %n0, ptr %p) nounwind { 1959; FMA-LABEL: fadd_fma_fmul_extra_use_2: 1960; FMA: # %bb.0: 1961; FMA-NEXT: vmulss %xmm3, %xmm2, %xmm2 1962; FMA-NEXT: vmovss %xmm2, (%rdi) 1963; FMA-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 1964; FMA-NEXT: vaddss %xmm0, %xmm4, %xmm0 1965; FMA-NEXT: retq 1966; 1967; FMA4-LABEL: fadd_fma_fmul_extra_use_2: 1968; FMA4: # %bb.0: 1969; FMA4-NEXT: vmulss %xmm3, %xmm2, %xmm2 1970; FMA4-NEXT: vmovss %xmm2, (%rdi) 1971; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 1972; FMA4-NEXT: vaddss %xmm0, %xmm4, %xmm0 1973; FMA4-NEXT: retq 1974; 1975; AVX512-LABEL: fadd_fma_fmul_extra_use_2: 1976; AVX512: # %bb.0: 1977; AVX512-NEXT: vmulss %xmm3, %xmm2, %xmm2 1978; AVX512-NEXT: vmovss %xmm2, (%rdi) 1979; AVX512-NEXT: vfmadd213ss {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2 1980; AVX512-NEXT: vaddss %xmm0, %xmm4, %xmm0 1981; AVX512-NEXT: retq 1982 %m1 = fmul fast float %a, %b 1983 %m2 = fmul fast float %c, %d 1984 store float %m2, ptr %p 1985 %a1 = fadd fast float %m1, %m2 1986 %a2 = fadd fast float %n0, %a1 1987 ret float %a2 1988} 1989 1990; negative test 1991 1992define float @fadd_fma_fmul_extra_use_3(float %a, float %b, float %c, float %d, float %n0, ptr %p) nounwind { 1993; FMA-LABEL: fadd_fma_fmul_extra_use_3: 1994; FMA: # %bb.0: 1995; FMA-NEXT: vmulss %xmm3, %xmm2, %xmm2 1996; FMA-NEXT: vfmadd231ss {{.*#+}} xmm2 = (xmm1 * xmm0) + xmm2 1997; FMA-NEXT: vmovss %xmm2, (%rdi) 1998; FMA-NEXT: vaddss %xmm2, %xmm4, %xmm0 1999; FMA-NEXT: retq 2000; 2001; FMA4-LABEL: fadd_fma_fmul_extra_use_3: 2002; FMA4: # %bb.0: 2003; FMA4-NEXT: vmulss %xmm3, %xmm2, %xmm2 2004; FMA4-NEXT: vfmaddss {{.*#+}} xmm0 = (xmm0 * xmm1) + xmm2 2005; FMA4-NEXT: vmovss %xmm0, (%rdi) 2006; FMA4-NEXT: vaddss %xmm0, %xmm4, %xmm0 2007; FMA4-NEXT: retq 2008; 2009; AVX512-LABEL: fadd_fma_fmul_extra_use_3: 2010; AVX512: # %bb.0: 2011; AVX512-NEXT: vmulss %xmm3, %xmm2, %xmm2 2012; AVX512-NEXT: vfmadd231ss {{.*#+}} xmm2 = (xmm1 * xmm0) + xmm2 2013; AVX512-NEXT: vmovss %xmm2, (%rdi) 2014; AVX512-NEXT: vaddss %xmm2, %xmm4, %xmm0 2015; AVX512-NEXT: retq 2016 %m1 = fmul fast float %a, %b 2017 %m2 = fmul fast float %c, %d 2018 %a1 = fadd fast float %m1, %m2 2019 store float %a1, ptr %p 2020 %a2 = fadd fast float %n0, %a1 2021 ret float %a2 2022} 2023 2024attributes #0 = { "unsafe-fp-math"="true" } 2025