1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA --check-prefix=FMA-INFS 3; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-INFS 4; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-INFS 5; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq -fp-contract=fast | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512-INFS 6; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=FMA --check-prefix=FMA-NOINFS 7; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4,+fma -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-NOINFS 8; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx,+fma4 -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=FMA4 --check-prefix=FMA4-NOINFS 9; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512dq -fp-contract=fast -enable-no-infs-fp-math | FileCheck %s --check-prefix=AVX512 --check-prefix=AVX512-NOINFS 10 11; 12; Pattern: (fadd (fmul x, y), z) -> (fmadd x,y,z) 13; 14 15define <16 x float> @test_16f32_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 16; FMA-LABEL: test_16f32_fmadd: 17; FMA: # BB#0: 18; FMA-NEXT: vfmadd213ps %ymm4, %ymm2, %ymm0 19; FMA-NEXT: vfmadd213ps %ymm5, %ymm3, %ymm1 20; FMA-NEXT: retq 21; 22; FMA4-LABEL: test_16f32_fmadd: 23; FMA4: # BB#0: 24; FMA4-NEXT: vfmaddps %ymm4, %ymm2, %ymm0, %ymm0 25; FMA4-NEXT: vfmaddps %ymm5, %ymm3, %ymm1, %ymm1 26; FMA4-NEXT: retq 27; 28; AVX512-LABEL: test_16f32_fmadd: 29; AVX512: # BB#0: 30; AVX512-NEXT: vfmadd213ps %zmm2, %zmm1, %zmm0 31; AVX512-NEXT: retq 32 %x = fmul <16 x float> %a0, %a1 33 %res = fadd <16 x float> %x, %a2 34 ret <16 x float> %res 35} 36 37define <8 x double> @test_8f64_fmadd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 38; FMA-LABEL: test_8f64_fmadd: 39; FMA: # BB#0: 40; FMA-NEXT: vfmadd213pd %ymm4, %ymm2, %ymm0 41; FMA-NEXT: vfmadd213pd %ymm5, %ymm3, %ymm1 42; FMA-NEXT: retq 43; 44; FMA4-LABEL: test_8f64_fmadd: 45; FMA4: # BB#0: 46; FMA4-NEXT: vfmaddpd %ymm4, %ymm2, %ymm0, %ymm0 47; FMA4-NEXT: vfmaddpd %ymm5, %ymm3, %ymm1, %ymm1 48; FMA4-NEXT: retq 49; 50; AVX512-LABEL: test_8f64_fmadd: 51; AVX512: # BB#0: 52; AVX512-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 53; AVX512-NEXT: retq 54 %x = fmul <8 x double> %a0, %a1 55 %res = fadd <8 x double> %x, %a2 56 ret <8 x double> %res 57} 58 59; 60; Pattern: (fsub (fmul x, y), z) -> (fmsub x, y, z) 61; 62 63define <16 x float> @test_16f32_fmsub(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 64; FMA-LABEL: test_16f32_fmsub: 65; FMA: # BB#0: 66; FMA-NEXT: vfmsub213ps %ymm4, %ymm2, %ymm0 67; FMA-NEXT: vfmsub213ps %ymm5, %ymm3, %ymm1 68; FMA-NEXT: retq 69; 70; FMA4-LABEL: test_16f32_fmsub: 71; FMA4: # BB#0: 72; FMA4-NEXT: vfmsubps %ymm4, %ymm2, %ymm0, %ymm0 73; FMA4-NEXT: vfmsubps %ymm5, %ymm3, %ymm1, %ymm1 74; FMA4-NEXT: retq 75; 76; AVX512-LABEL: test_16f32_fmsub: 77; AVX512: # BB#0: 78; AVX512-NEXT: vfmsub213ps %zmm2, %zmm1, %zmm0 79; AVX512-NEXT: retq 80 %x = fmul <16 x float> %a0, %a1 81 %res = fsub <16 x float> %x, %a2 82 ret <16 x float> %res 83} 84 85define <8 x double> @test_8f64_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 86; FMA-LABEL: test_8f64_fmsub: 87; FMA: # BB#0: 88; FMA-NEXT: vfmsub213pd %ymm4, %ymm2, %ymm0 89; FMA-NEXT: vfmsub213pd %ymm5, %ymm3, %ymm1 90; FMA-NEXT: retq 91; 92; FMA4-LABEL: test_8f64_fmsub: 93; FMA4: # BB#0: 94; FMA4-NEXT: vfmsubpd %ymm4, %ymm2, %ymm0, %ymm0 95; FMA4-NEXT: vfmsubpd %ymm5, %ymm3, %ymm1, %ymm1 96; FMA4-NEXT: retq 97; 98; AVX512-LABEL: test_8f64_fmsub: 99; AVX512: # BB#0: 100; AVX512-NEXT: vfmsub213pd %zmm2, %zmm1, %zmm0 101; AVX512-NEXT: retq 102 %x = fmul <8 x double> %a0, %a1 103 %res = fsub <8 x double> %x, %a2 104 ret <8 x double> %res 105} 106 107; 108; Pattern: (fsub z, (fmul x, y)) -> (fnmadd x, y, z) 109; 110 111define <16 x float> @test_16f32_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 112; FMA-LABEL: test_16f32_fnmadd: 113; FMA: # BB#0: 114; FMA-NEXT: vfnmadd213ps %ymm4, %ymm2, %ymm0 115; FMA-NEXT: vfnmadd213ps %ymm5, %ymm3, %ymm1 116; FMA-NEXT: retq 117; 118; FMA4-LABEL: test_16f32_fnmadd: 119; FMA4: # BB#0: 120; FMA4-NEXT: vfnmaddps %ymm4, %ymm2, %ymm0, %ymm0 121; FMA4-NEXT: vfnmaddps %ymm5, %ymm3, %ymm1, %ymm1 122; FMA4-NEXT: retq 123; 124; AVX512-LABEL: test_16f32_fnmadd: 125; AVX512: # BB#0: 126; AVX512-NEXT: vfnmadd213ps %zmm2, %zmm1, %zmm0 127; AVX512-NEXT: retq 128 %x = fmul <16 x float> %a0, %a1 129 %res = fsub <16 x float> %a2, %x 130 ret <16 x float> %res 131} 132 133define <8 x double> @test_8f64_fnmadd(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 134; FMA-LABEL: test_8f64_fnmadd: 135; FMA: # BB#0: 136; FMA-NEXT: vfnmadd213pd %ymm4, %ymm2, %ymm0 137; FMA-NEXT: vfnmadd213pd %ymm5, %ymm3, %ymm1 138; FMA-NEXT: retq 139; 140; FMA4-LABEL: test_8f64_fnmadd: 141; FMA4: # BB#0: 142; FMA4-NEXT: vfnmaddpd %ymm4, %ymm2, %ymm0, %ymm0 143; FMA4-NEXT: vfnmaddpd %ymm5, %ymm3, %ymm1, %ymm1 144; FMA4-NEXT: retq 145; 146; AVX512-LABEL: test_8f64_fnmadd: 147; AVX512: # BB#0: 148; AVX512-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm0 149; AVX512-NEXT: retq 150 %x = fmul <8 x double> %a0, %a1 151 %res = fsub <8 x double> %a2, %x 152 ret <8 x double> %res 153} 154 155; 156; Pattern: (fsub (fneg (fmul x, y)), z) -> (fnmsub x, y, z) 157; 158 159define <16 x float> @test_16f32_fnmsub(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) { 160; FMA-LABEL: test_16f32_fnmsub: 161; FMA: # BB#0: 162; FMA-NEXT: vfnmsub213ps %ymm4, %ymm2, %ymm0 163; FMA-NEXT: vfnmsub213ps %ymm5, %ymm3, %ymm1 164; FMA-NEXT: retq 165; 166; FMA4-LABEL: test_16f32_fnmsub: 167; FMA4: # BB#0: 168; FMA4-NEXT: vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0 169; FMA4-NEXT: vfnmsubps %ymm5, %ymm3, %ymm1, %ymm1 170; FMA4-NEXT: retq 171; 172; AVX512-LABEL: test_16f32_fnmsub: 173; AVX512: # BB#0: 174; AVX512-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 175; AVX512-NEXT: retq 176 %x = fmul <16 x float> %a0, %a1 177 %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %x 178 %res = fsub <16 x float> %y, %a2 179 ret <16 x float> %res 180} 181 182define <8 x double> @test_8f64_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) { 183; FMA-LABEL: test_8f64_fnmsub: 184; FMA: # BB#0: 185; FMA-NEXT: vfnmsub213pd %ymm4, %ymm2, %ymm0 186; FMA-NEXT: vfnmsub213pd %ymm5, %ymm3, %ymm1 187; FMA-NEXT: retq 188; 189; FMA4-LABEL: test_8f64_fnmsub: 190; FMA4: # BB#0: 191; FMA4-NEXT: vfnmsubpd %ymm4, %ymm2, %ymm0, %ymm0 192; FMA4-NEXT: vfnmsubpd %ymm5, %ymm3, %ymm1, %ymm1 193; FMA4-NEXT: retq 194; 195; AVX512-LABEL: test_8f64_fnmsub: 196; AVX512: # BB#0: 197; AVX512-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 198; AVX512-NEXT: retq 199 %x = fmul <8 x double> %a0, %a1 200 %y = fsub <8 x double> <double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00, double -0.000000e+00>, %x 201 %res = fsub <8 x double> %y, %a2 202 ret <8 x double> %res 203} 204 205; 206; Load Folding Patterns 207; 208 209define <16 x float> @test_16f32_fmadd_load(<16 x float>* %a0, <16 x float> %a1, <16 x float> %a2) { 210; FMA-LABEL: test_16f32_fmadd_load: 211; FMA: # BB#0: 212; FMA-NEXT: vfmadd132ps (%rdi), %ymm2, %ymm0 213; FMA-NEXT: vfmadd132ps 32(%rdi), %ymm3, %ymm1 214; FMA-NEXT: retq 215; 216; FMA4-LABEL: test_16f32_fmadd_load: 217; FMA4: # BB#0: 218; FMA4-NEXT: vfmaddps %ymm2, (%rdi), %ymm0, %ymm0 219; FMA4-NEXT: vfmaddps %ymm3, 32(%rdi), %ymm1, %ymm1 220; FMA4-NEXT: retq 221; 222; AVX512-LABEL: test_16f32_fmadd_load: 223; AVX512: # BB#0: 224; AVX512-NEXT: vfmadd132ps (%rdi), %zmm1, %zmm0 225; AVX512-NEXT: retq 226 %x = load <16 x float>, <16 x float>* %a0 227 %y = fmul <16 x float> %x, %a1 228 %res = fadd <16 x float> %y, %a2 229 ret <16 x float> %res 230} 231 232define <8 x double> @test_8f64_fmsub_load(<8 x double>* %a0, <8 x double> %a1, <8 x double> %a2) { 233; FMA-LABEL: test_8f64_fmsub_load: 234; FMA: # BB#0: 235; FMA-NEXT: vfmsub132pd (%rdi), %ymm2, %ymm0 236; FMA-NEXT: vfmsub132pd 32(%rdi), %ymm3, %ymm1 237; FMA-NEXT: retq 238; 239; FMA4-LABEL: test_8f64_fmsub_load: 240; FMA4: # BB#0: 241; FMA4-NEXT: vfmsubpd %ymm2, (%rdi), %ymm0, %ymm0 242; FMA4-NEXT: vfmsubpd %ymm3, 32(%rdi), %ymm1, %ymm1 243; FMA4-NEXT: retq 244; 245; AVX512-LABEL: test_8f64_fmsub_load: 246; AVX512: # BB#0: 247; AVX512-NEXT: vfmsub132pd (%rdi), %zmm1, %zmm0 248; AVX512-NEXT: retq 249 %x = load <8 x double>, <8 x double>* %a0 250 %y = fmul <8 x double> %x, %a1 251 %res = fsub <8 x double> %y, %a2 252 ret <8 x double> %res 253} 254 255; 256; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y) 257; 258 259define <16 x float> @test_v16f32_mul_add_x_one_y(<16 x float> %x, <16 x float> %y) { 260; FMA-INFS-LABEL: test_v16f32_mul_add_x_one_y: 261; FMA-INFS: # BB#0: 262; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 263; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 264; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 265; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 266; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 267; FMA-INFS-NEXT: retq 268; 269; FMA4-INFS-LABEL: test_v16f32_mul_add_x_one_y: 270; FMA4-INFS: # BB#0: 271; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 272; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 273; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 274; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 275; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 276; FMA4-INFS-NEXT: retq 277; 278; AVX512-INFS-LABEL: test_v16f32_mul_add_x_one_y: 279; AVX512-INFS: # BB#0: 280; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 281; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0 282; AVX512-INFS-NEXT: retq 283; 284; FMA-NOINFS-LABEL: test_v16f32_mul_add_x_one_y: 285; FMA-NOINFS: # BB#0: 286; FMA-NOINFS-NEXT: vfmadd213ps %ymm2, %ymm2, %ymm0 287; FMA-NOINFS-NEXT: vfmadd213ps %ymm3, %ymm3, %ymm1 288; FMA-NOINFS-NEXT: retq 289; 290; FMA4-NOINFS-LABEL: test_v16f32_mul_add_x_one_y: 291; FMA4-NOINFS: # BB#0: 292; FMA4-NOINFS-NEXT: vfmaddps %ymm2, %ymm2, %ymm0, %ymm0 293; FMA4-NOINFS-NEXT: vfmaddps %ymm3, %ymm3, %ymm1, %ymm1 294; FMA4-NOINFS-NEXT: retq 295; 296; AVX512-NOINFS-LABEL: test_v16f32_mul_add_x_one_y: 297; AVX512-NOINFS: # BB#0: 298; AVX512-NOINFS-NEXT: vfmadd213ps %zmm1, %zmm1, %zmm0 299; AVX512-NOINFS-NEXT: retq 300 %a = fadd <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0> 301 %m = fmul <16 x float> %a, %y 302 ret <16 x float> %m 303} 304 305define <8 x double> @test_v8f64_mul_y_add_x_one(<8 x double> %x, <8 x double> %y) { 306; FMA-INFS-LABEL: test_v8f64_mul_y_add_x_one: 307; FMA-INFS: # BB#0: 308; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 309; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 310; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 311; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 312; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 313; FMA-INFS-NEXT: retq 314; 315; FMA4-INFS-LABEL: test_v8f64_mul_y_add_x_one: 316; FMA4-INFS: # BB#0: 317; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 318; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 319; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 320; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 321; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 322; FMA4-INFS-NEXT: retq 323; 324; AVX512-INFS-LABEL: test_v8f64_mul_y_add_x_one: 325; AVX512-INFS: # BB#0: 326; AVX512-INFS-NEXT: vaddpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 327; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0 328; AVX512-INFS-NEXT: retq 329; 330; FMA-NOINFS-LABEL: test_v8f64_mul_y_add_x_one: 331; FMA-NOINFS: # BB#0: 332; FMA-NOINFS-NEXT: vfmadd213pd %ymm2, %ymm2, %ymm0 333; FMA-NOINFS-NEXT: vfmadd213pd %ymm3, %ymm3, %ymm1 334; FMA-NOINFS-NEXT: retq 335; 336; FMA4-NOINFS-LABEL: test_v8f64_mul_y_add_x_one: 337; FMA4-NOINFS: # BB#0: 338; FMA4-NOINFS-NEXT: vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0 339; FMA4-NOINFS-NEXT: vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1 340; FMA4-NOINFS-NEXT: retq 341; 342; AVX512-NOINFS-LABEL: test_v8f64_mul_y_add_x_one: 343; AVX512-NOINFS: # BB#0: 344; AVX512-NOINFS-NEXT: vfmadd213pd %zmm1, %zmm1, %zmm0 345; AVX512-NOINFS-NEXT: retq 346 %a = fadd <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0> 347 %m = fmul <8 x double> %y, %a 348 ret <8 x double> %m 349} 350 351define <16 x float> @test_v16f32_mul_add_x_negone_y(<16 x float> %x, <16 x float> %y) { 352; FMA-INFS-LABEL: test_v16f32_mul_add_x_negone_y: 353; FMA-INFS: # BB#0: 354; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00] 355; FMA-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 356; FMA-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 357; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 358; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 359; FMA-INFS-NEXT: retq 360; 361; FMA4-INFS-LABEL: test_v16f32_mul_add_x_negone_y: 362; FMA4-INFS: # BB#0: 363; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00] 364; FMA4-INFS-NEXT: vaddps %ymm4, %ymm1, %ymm1 365; FMA4-INFS-NEXT: vaddps %ymm4, %ymm0, %ymm0 366; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 367; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 368; FMA4-INFS-NEXT: retq 369; 370; AVX512-INFS-LABEL: test_v16f32_mul_add_x_negone_y: 371; AVX512-INFS: # BB#0: 372; AVX512-INFS-NEXT: vaddps {{.*}}(%rip){1to16}, %zmm0, %zmm0 373; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0 374; AVX512-INFS-NEXT: retq 375; 376; FMA-NOINFS-LABEL: test_v16f32_mul_add_x_negone_y: 377; FMA-NOINFS: # BB#0: 378; FMA-NOINFS-NEXT: vfmsub213ps %ymm2, %ymm2, %ymm0 379; FMA-NOINFS-NEXT: vfmsub213ps %ymm3, %ymm3, %ymm1 380; FMA-NOINFS-NEXT: retq 381; 382; FMA4-NOINFS-LABEL: test_v16f32_mul_add_x_negone_y: 383; FMA4-NOINFS: # BB#0: 384; FMA4-NOINFS-NEXT: vfmsubps %ymm2, %ymm2, %ymm0, %ymm0 385; FMA4-NOINFS-NEXT: vfmsubps %ymm3, %ymm3, %ymm1, %ymm1 386; FMA4-NOINFS-NEXT: retq 387; 388; AVX512-NOINFS-LABEL: test_v16f32_mul_add_x_negone_y: 389; AVX512-NOINFS: # BB#0: 390; AVX512-NOINFS-NEXT: vfmsub213ps %zmm1, %zmm1, %zmm0 391; AVX512-NOINFS-NEXT: retq 392 %a = fadd <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0> 393 %m = fmul <16 x float> %a, %y 394 ret <16 x float> %m 395} 396 397define <8 x double> @test_v8f64_mul_y_add_x_negone(<8 x double> %x, <8 x double> %y) { 398; FMA-INFS-LABEL: test_v8f64_mul_y_add_x_negone: 399; FMA-INFS: # BB#0: 400; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00] 401; FMA-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 402; FMA-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 403; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 404; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 405; FMA-INFS-NEXT: retq 406; 407; FMA4-INFS-LABEL: test_v8f64_mul_y_add_x_negone: 408; FMA4-INFS: # BB#0: 409; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00] 410; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm1, %ymm1 411; FMA4-INFS-NEXT: vaddpd %ymm4, %ymm0, %ymm0 412; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 413; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 414; FMA4-INFS-NEXT: retq 415; 416; AVX512-INFS-LABEL: test_v8f64_mul_y_add_x_negone: 417; AVX512-INFS: # BB#0: 418; AVX512-INFS-NEXT: vaddpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 419; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0 420; AVX512-INFS-NEXT: retq 421; 422; FMA-NOINFS-LABEL: test_v8f64_mul_y_add_x_negone: 423; FMA-NOINFS: # BB#0: 424; FMA-NOINFS-NEXT: vfmsub213pd %ymm2, %ymm2, %ymm0 425; FMA-NOINFS-NEXT: vfmsub213pd %ymm3, %ymm3, %ymm1 426; FMA-NOINFS-NEXT: retq 427; 428; FMA4-NOINFS-LABEL: test_v8f64_mul_y_add_x_negone: 429; FMA4-NOINFS: # BB#0: 430; FMA4-NOINFS-NEXT: vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0 431; FMA4-NOINFS-NEXT: vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1 432; FMA4-NOINFS-NEXT: retq 433; 434; AVX512-NOINFS-LABEL: test_v8f64_mul_y_add_x_negone: 435; AVX512-NOINFS: # BB#0: 436; AVX512-NOINFS-NEXT: vfmsub213pd %zmm1, %zmm1, %zmm0 437; AVX512-NOINFS-NEXT: retq 438 %a = fadd <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0> 439 %m = fmul <8 x double> %y, %a 440 ret <8 x double> %m 441} 442 443define <16 x float> @test_v16f32_mul_sub_one_x_y(<16 x float> %x, <16 x float> %y) { 444; FMA-INFS-LABEL: test_v16f32_mul_sub_one_x_y: 445; FMA-INFS: # BB#0: 446; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 447; FMA-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1 448; FMA-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0 449; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 450; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 451; FMA-INFS-NEXT: retq 452; 453; FMA4-INFS-LABEL: test_v16f32_mul_sub_one_x_y: 454; FMA4-INFS: # BB#0: 455; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 456; FMA4-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1 457; FMA4-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0 458; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 459; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 460; FMA4-INFS-NEXT: retq 461; 462; AVX512-INFS-LABEL: test_v16f32_mul_sub_one_x_y: 463; AVX512-INFS: # BB#0: 464; AVX512-INFS-NEXT: vbroadcastss {{.*}}(%rip), %zmm2 465; AVX512-INFS-NEXT: vsubps %zmm0, %zmm2, %zmm0 466; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0 467; AVX512-INFS-NEXT: retq 468; 469; FMA-NOINFS-LABEL: test_v16f32_mul_sub_one_x_y: 470; FMA-NOINFS: # BB#0: 471; FMA-NOINFS-NEXT: vfnmadd213ps %ymm2, %ymm2, %ymm0 472; FMA-NOINFS-NEXT: vfnmadd213ps %ymm3, %ymm3, %ymm1 473; FMA-NOINFS-NEXT: retq 474; 475; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_one_x_y: 476; FMA4-NOINFS: # BB#0: 477; FMA4-NOINFS-NEXT: vfnmaddps %ymm2, %ymm2, %ymm0, %ymm0 478; FMA4-NOINFS-NEXT: vfnmaddps %ymm3, %ymm3, %ymm1, %ymm1 479; FMA4-NOINFS-NEXT: retq 480; 481; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_one_x_y: 482; AVX512-NOINFS: # BB#0: 483; AVX512-NOINFS-NEXT: vfnmadd213ps %zmm1, %zmm1, %zmm0 484; AVX512-NOINFS-NEXT: retq 485 %s = fsub <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x 486 %m = fmul <16 x float> %s, %y 487 ret <16 x float> %m 488} 489 490define <8 x double> @test_v8f64_mul_y_sub_one_x(<8 x double> %x, <8 x double> %y) { 491; FMA-INFS-LABEL: test_v8f64_mul_y_sub_one_x: 492; FMA-INFS: # BB#0: 493; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 494; FMA-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1 495; FMA-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0 496; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 497; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 498; FMA-INFS-NEXT: retq 499; 500; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_one_x: 501; FMA4-INFS: # BB#0: 502; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 503; FMA4-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1 504; FMA4-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0 505; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 506; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 507; FMA4-INFS-NEXT: retq 508; 509; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_one_x: 510; AVX512-INFS: # BB#0: 511; AVX512-INFS-NEXT: vbroadcastsd {{.*}}(%rip), %zmm2 512; AVX512-INFS-NEXT: vsubpd %zmm0, %zmm2, %zmm0 513; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0 514; AVX512-INFS-NEXT: retq 515; 516; FMA-NOINFS-LABEL: test_v8f64_mul_y_sub_one_x: 517; FMA-NOINFS: # BB#0: 518; FMA-NOINFS-NEXT: vfnmadd213pd %ymm2, %ymm2, %ymm0 519; FMA-NOINFS-NEXT: vfnmadd213pd %ymm3, %ymm3, %ymm1 520; FMA-NOINFS-NEXT: retq 521; 522; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_one_x: 523; FMA4-NOINFS: # BB#0: 524; FMA4-NOINFS-NEXT: vfnmaddpd %ymm2, %ymm2, %ymm0, %ymm0 525; FMA4-NOINFS-NEXT: vfnmaddpd %ymm3, %ymm3, %ymm1, %ymm1 526; FMA4-NOINFS-NEXT: retq 527; 528; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_one_x: 529; AVX512-NOINFS: # BB#0: 530; AVX512-NOINFS-NEXT: vfnmadd213pd %zmm1, %zmm1, %zmm0 531; AVX512-NOINFS-NEXT: retq 532 %s = fsub <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %x 533 %m = fmul <8 x double> %y, %s 534 ret <8 x double> %m 535} 536 537define <16 x float> @test_v16f32_mul_sub_negone_x_y(<16 x float> %x, <16 x float> %y) { 538; FMA-INFS-LABEL: test_v16f32_mul_sub_negone_x_y: 539; FMA-INFS: # BB#0: 540; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00] 541; FMA-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1 542; FMA-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0 543; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 544; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 545; FMA-INFS-NEXT: retq 546; 547; FMA4-INFS-LABEL: test_v16f32_mul_sub_negone_x_y: 548; FMA4-INFS: # BB#0: 549; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00] 550; FMA4-INFS-NEXT: vsubps %ymm1, %ymm4, %ymm1 551; FMA4-INFS-NEXT: vsubps %ymm0, %ymm4, %ymm0 552; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 553; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 554; FMA4-INFS-NEXT: retq 555; 556; AVX512-INFS-LABEL: test_v16f32_mul_sub_negone_x_y: 557; AVX512-INFS: # BB#0: 558; AVX512-INFS-NEXT: vbroadcastss {{.*}}(%rip), %zmm2 559; AVX512-INFS-NEXT: vsubps %zmm0, %zmm2, %zmm0 560; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0 561; AVX512-INFS-NEXT: retq 562; 563; FMA-NOINFS-LABEL: test_v16f32_mul_sub_negone_x_y: 564; FMA-NOINFS: # BB#0: 565; FMA-NOINFS-NEXT: vfnmsub213ps %ymm2, %ymm2, %ymm0 566; FMA-NOINFS-NEXT: vfnmsub213ps %ymm3, %ymm3, %ymm1 567; FMA-NOINFS-NEXT: retq 568; 569; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_negone_x_y: 570; FMA4-NOINFS: # BB#0: 571; FMA4-NOINFS-NEXT: vfnmsubps %ymm2, %ymm2, %ymm0, %ymm0 572; FMA4-NOINFS-NEXT: vfnmsubps %ymm3, %ymm3, %ymm1, %ymm1 573; FMA4-NOINFS-NEXT: retq 574; 575; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_negone_x_y: 576; AVX512-NOINFS: # BB#0: 577; AVX512-NOINFS-NEXT: vfnmsub213ps %zmm1, %zmm1, %zmm0 578; AVX512-NOINFS-NEXT: retq 579 %s = fsub <16 x float> <float -1.0, float -1.0, float -1.0, float -1.0,float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0>, %x 580 %m = fmul <16 x float> %s, %y 581 ret <16 x float> %m 582} 583 584define <8 x double> @test_v8f64_mul_y_sub_negone_x(<8 x double> %x, <8 x double> %y) { 585; FMA-INFS-LABEL: test_v8f64_mul_y_sub_negone_x: 586; FMA-INFS: # BB#0: 587; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00] 588; FMA-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1 589; FMA-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0 590; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 591; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 592; FMA-INFS-NEXT: retq 593; 594; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_negone_x: 595; FMA4-INFS: # BB#0: 596; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00] 597; FMA4-INFS-NEXT: vsubpd %ymm1, %ymm4, %ymm1 598; FMA4-INFS-NEXT: vsubpd %ymm0, %ymm4, %ymm0 599; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 600; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 601; FMA4-INFS-NEXT: retq 602; 603; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_negone_x: 604; AVX512-INFS: # BB#0: 605; AVX512-INFS-NEXT: vbroadcastsd {{.*}}(%rip), %zmm2 606; AVX512-INFS-NEXT: vsubpd %zmm0, %zmm2, %zmm0 607; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0 608; AVX512-INFS-NEXT: retq 609; 610; FMA-NOINFS-LABEL: test_v8f64_mul_y_sub_negone_x: 611; FMA-NOINFS: # BB#0: 612; FMA-NOINFS-NEXT: vfnmsub213pd %ymm2, %ymm2, %ymm0 613; FMA-NOINFS-NEXT: vfnmsub213pd %ymm3, %ymm3, %ymm1 614; FMA-NOINFS-NEXT: retq 615; 616; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_negone_x: 617; FMA4-NOINFS: # BB#0: 618; FMA4-NOINFS-NEXT: vfnmsubpd %ymm2, %ymm2, %ymm0, %ymm0 619; FMA4-NOINFS-NEXT: vfnmsubpd %ymm3, %ymm3, %ymm1, %ymm1 620; FMA4-NOINFS-NEXT: retq 621; 622; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_negone_x: 623; AVX512-NOINFS: # BB#0: 624; AVX512-NOINFS-NEXT: vfnmsub213pd %zmm1, %zmm1, %zmm0 625; AVX512-NOINFS-NEXT: retq 626 %s = fsub <8 x double> <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0>, %x 627 %m = fmul <8 x double> %y, %s 628 ret <8 x double> %m 629} 630 631define <16 x float> @test_v16f32_mul_sub_x_one_y(<16 x float> %x, <16 x float> %y) { 632; FMA-INFS-LABEL: test_v16f32_mul_sub_x_one_y: 633; FMA-INFS: # BB#0: 634; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 635; FMA-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1 636; FMA-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0 637; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 638; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 639; FMA-INFS-NEXT: retq 640; 641; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_one_y: 642; FMA4-INFS: # BB#0: 643; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 644; FMA4-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1 645; FMA4-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0 646; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 647; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 648; FMA4-INFS-NEXT: retq 649; 650; AVX512-INFS-LABEL: test_v16f32_mul_sub_x_one_y: 651; AVX512-INFS: # BB#0: 652; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to16}, %zmm0, %zmm0 653; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0 654; AVX512-INFS-NEXT: retq 655; 656; FMA-NOINFS-LABEL: test_v16f32_mul_sub_x_one_y: 657; FMA-NOINFS: # BB#0: 658; FMA-NOINFS-NEXT: vfmsub213ps %ymm2, %ymm2, %ymm0 659; FMA-NOINFS-NEXT: vfmsub213ps %ymm3, %ymm3, %ymm1 660; FMA-NOINFS-NEXT: retq 661; 662; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_x_one_y: 663; FMA4-NOINFS: # BB#0: 664; FMA4-NOINFS-NEXT: vfmsubps %ymm2, %ymm2, %ymm0, %ymm0 665; FMA4-NOINFS-NEXT: vfmsubps %ymm3, %ymm3, %ymm1, %ymm1 666; FMA4-NOINFS-NEXT: retq 667; 668; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_x_one_y: 669; AVX512-NOINFS: # BB#0: 670; AVX512-NOINFS-NEXT: vfmsub213ps %zmm1, %zmm1, %zmm0 671; AVX512-NOINFS-NEXT: retq 672 %s = fsub <16 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0> 673 %m = fmul <16 x float> %s, %y 674 ret <16 x float> %m 675} 676 677define <8 x double> @test_v8f64_mul_y_sub_x_one(<8 x double> %x, <8 x double> %y) { 678; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_one: 679; FMA-INFS: # BB#0: 680; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 681; FMA-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1 682; FMA-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0 683; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 684; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 685; FMA-INFS-NEXT: retq 686; 687; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_one: 688; FMA4-INFS: # BB#0: 689; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 690; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1 691; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0 692; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 693; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 694; FMA4-INFS-NEXT: retq 695; 696; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_x_one: 697; AVX512-INFS: # BB#0: 698; AVX512-INFS-NEXT: vsubpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 699; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0 700; AVX512-INFS-NEXT: retq 701; 702; FMA-NOINFS-LABEL: test_v8f64_mul_y_sub_x_one: 703; FMA-NOINFS: # BB#0: 704; FMA-NOINFS-NEXT: vfmsub213pd %ymm2, %ymm2, %ymm0 705; FMA-NOINFS-NEXT: vfmsub213pd %ymm3, %ymm3, %ymm1 706; FMA-NOINFS-NEXT: retq 707; 708; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_x_one: 709; FMA4-NOINFS: # BB#0: 710; FMA4-NOINFS-NEXT: vfmsubpd %ymm2, %ymm2, %ymm0, %ymm0 711; FMA4-NOINFS-NEXT: vfmsubpd %ymm3, %ymm3, %ymm1, %ymm1 712; FMA4-NOINFS-NEXT: retq 713; 714; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_x_one: 715; AVX512-NOINFS: # BB#0: 716; AVX512-NOINFS-NEXT: vfmsub213pd %zmm1, %zmm1, %zmm0 717; AVX512-NOINFS-NEXT: retq 718 %s = fsub <8 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0> 719 %m = fmul <8 x double> %y, %s 720 ret <8 x double> %m 721} 722 723define <16 x float> @test_v16f32_mul_sub_x_negone_y(<16 x float> %x, <16 x float> %y) { 724; FMA-INFS-LABEL: test_v16f32_mul_sub_x_negone_y: 725; FMA-INFS: # BB#0: 726; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00] 727; FMA-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1 728; FMA-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0 729; FMA-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 730; FMA-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 731; FMA-INFS-NEXT: retq 732; 733; FMA4-INFS-LABEL: test_v16f32_mul_sub_x_negone_y: 734; FMA4-INFS: # BB#0: 735; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00] 736; FMA4-INFS-NEXT: vsubps %ymm4, %ymm1, %ymm1 737; FMA4-INFS-NEXT: vsubps %ymm4, %ymm0, %ymm0 738; FMA4-INFS-NEXT: vmulps %ymm2, %ymm0, %ymm0 739; FMA4-INFS-NEXT: vmulps %ymm3, %ymm1, %ymm1 740; FMA4-INFS-NEXT: retq 741; 742; AVX512-INFS-LABEL: test_v16f32_mul_sub_x_negone_y: 743; AVX512-INFS: # BB#0: 744; AVX512-INFS-NEXT: vsubps {{.*}}(%rip){1to16}, %zmm0, %zmm0 745; AVX512-INFS-NEXT: vmulps %zmm1, %zmm0, %zmm0 746; AVX512-INFS-NEXT: retq 747; 748; FMA-NOINFS-LABEL: test_v16f32_mul_sub_x_negone_y: 749; FMA-NOINFS: # BB#0: 750; FMA-NOINFS-NEXT: vfmadd213ps %ymm2, %ymm2, %ymm0 751; FMA-NOINFS-NEXT: vfmadd213ps %ymm3, %ymm3, %ymm1 752; FMA-NOINFS-NEXT: retq 753; 754; FMA4-NOINFS-LABEL: test_v16f32_mul_sub_x_negone_y: 755; FMA4-NOINFS: # BB#0: 756; FMA4-NOINFS-NEXT: vfmaddps %ymm2, %ymm2, %ymm0, %ymm0 757; FMA4-NOINFS-NEXT: vfmaddps %ymm3, %ymm3, %ymm1, %ymm1 758; FMA4-NOINFS-NEXT: retq 759; 760; AVX512-NOINFS-LABEL: test_v16f32_mul_sub_x_negone_y: 761; AVX512-NOINFS: # BB#0: 762; AVX512-NOINFS-NEXT: vfmadd213ps %zmm1, %zmm1, %zmm0 763; AVX512-NOINFS-NEXT: retq 764 %s = fsub <16 x float> %x, <float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0, float -1.0> 765 %m = fmul <16 x float> %s, %y 766 ret <16 x float> %m 767} 768 769define <8 x double> @test_v8f64_mul_y_sub_x_negone(<8 x double> %x, <8 x double> %y) { 770; FMA-INFS-LABEL: test_v8f64_mul_y_sub_x_negone: 771; FMA-INFS: # BB#0: 772; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00] 773; FMA-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1 774; FMA-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0 775; FMA-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 776; FMA-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 777; FMA-INFS-NEXT: retq 778; 779; FMA4-INFS-LABEL: test_v8f64_mul_y_sub_x_negone: 780; FMA4-INFS: # BB#0: 781; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm4 = [-1.000000e+00,-1.000000e+00,-1.000000e+00,-1.000000e+00] 782; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm1, %ymm1 783; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm0, %ymm0 784; FMA4-INFS-NEXT: vmulpd %ymm0, %ymm2, %ymm0 785; FMA4-INFS-NEXT: vmulpd %ymm1, %ymm3, %ymm1 786; FMA4-INFS-NEXT: retq 787; 788; AVX512-INFS-LABEL: test_v8f64_mul_y_sub_x_negone: 789; AVX512-INFS: # BB#0: 790; AVX512-INFS-NEXT: vsubpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 791; AVX512-INFS-NEXT: vmulpd %zmm0, %zmm1, %zmm0 792; AVX512-INFS-NEXT: retq 793; 794; FMA-NOINFS-LABEL: test_v8f64_mul_y_sub_x_negone: 795; FMA-NOINFS: # BB#0: 796; FMA-NOINFS-NEXT: vfmadd213pd %ymm2, %ymm2, %ymm0 797; FMA-NOINFS-NEXT: vfmadd213pd %ymm3, %ymm3, %ymm1 798; FMA-NOINFS-NEXT: retq 799; 800; FMA4-NOINFS-LABEL: test_v8f64_mul_y_sub_x_negone: 801; FMA4-NOINFS: # BB#0: 802; FMA4-NOINFS-NEXT: vfmaddpd %ymm2, %ymm2, %ymm0, %ymm0 803; FMA4-NOINFS-NEXT: vfmaddpd %ymm3, %ymm3, %ymm1, %ymm1 804; FMA4-NOINFS-NEXT: retq 805; 806; AVX512-NOINFS-LABEL: test_v8f64_mul_y_sub_x_negone: 807; AVX512-NOINFS: # BB#0: 808; AVX512-NOINFS-NEXT: vfmadd213pd %zmm1, %zmm1, %zmm0 809; AVX512-NOINFS-NEXT: retq 810 %s = fsub <8 x double> %x, <double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0, double -1.0> 811 %m = fmul <8 x double> %y, %s 812 ret <8 x double> %m 813} 814 815; 816; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y)) 817; 818 819define <16 x float> @test_v16f32_interp(<16 x float> %x, <16 x float> %y, <16 x float> %t) { 820; FMA-INFS-LABEL: test_v16f32_interp: 821; FMA-INFS: # BB#0: 822; FMA-INFS-NEXT: vmovaps {{.*#+}} ymm6 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 823; FMA-INFS-NEXT: vsubps %ymm4, %ymm6, %ymm7 824; FMA-INFS-NEXT: vsubps %ymm5, %ymm6, %ymm6 825; FMA-INFS-NEXT: vmulps %ymm6, %ymm3, %ymm3 826; FMA-INFS-NEXT: vmulps %ymm7, %ymm2, %ymm2 827; FMA-INFS-NEXT: vfmadd213ps %ymm2, %ymm4, %ymm0 828; FMA-INFS-NEXT: vfmadd213ps %ymm3, %ymm5, %ymm1 829; FMA-INFS-NEXT: retq 830; 831; FMA4-INFS-LABEL: test_v16f32_interp: 832; FMA4-INFS: # BB#0: 833; FMA4-INFS-NEXT: vmovaps {{.*#+}} ymm6 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 834; FMA4-INFS-NEXT: vsubps %ymm4, %ymm6, %ymm7 835; FMA4-INFS-NEXT: vsubps %ymm5, %ymm6, %ymm6 836; FMA4-INFS-NEXT: vmulps %ymm6, %ymm3, %ymm3 837; FMA4-INFS-NEXT: vmulps %ymm7, %ymm2, %ymm2 838; FMA4-INFS-NEXT: vfmaddps %ymm2, %ymm4, %ymm0, %ymm0 839; FMA4-INFS-NEXT: vfmaddps %ymm3, %ymm5, %ymm1, %ymm1 840; FMA4-INFS-NEXT: retq 841; 842; AVX512-INFS-LABEL: test_v16f32_interp: 843; AVX512-INFS: # BB#0: 844; AVX512-INFS-NEXT: vbroadcastss {{.*}}(%rip), %zmm3 845; AVX512-INFS-NEXT: vsubps %zmm2, %zmm3, %zmm3 846; AVX512-INFS-NEXT: vmulps %zmm3, %zmm1, %zmm1 847; AVX512-INFS-NEXT: vfmadd213ps %zmm1, %zmm2, %zmm0 848; AVX512-INFS-NEXT: retq 849; 850; FMA-NOINFS-LABEL: test_v16f32_interp: 851; FMA-NOINFS: # BB#0: 852; FMA-NOINFS-NEXT: vfnmadd213ps %ymm3, %ymm5, %ymm3 853; FMA-NOINFS-NEXT: vfnmadd213ps %ymm2, %ymm4, %ymm2 854; FMA-NOINFS-NEXT: vfmadd213ps %ymm2, %ymm4, %ymm0 855; FMA-NOINFS-NEXT: vfmadd213ps %ymm3, %ymm5, %ymm1 856; FMA-NOINFS-NEXT: retq 857; 858; FMA4-NOINFS-LABEL: test_v16f32_interp: 859; FMA4-NOINFS: # BB#0: 860; FMA4-NOINFS-NEXT: vfnmaddps %ymm3, %ymm3, %ymm5, %ymm3 861; FMA4-NOINFS-NEXT: vfnmaddps %ymm2, %ymm2, %ymm4, %ymm2 862; FMA4-NOINFS-NEXT: vfmaddps %ymm2, %ymm4, %ymm0, %ymm0 863; FMA4-NOINFS-NEXT: vfmaddps %ymm3, %ymm5, %ymm1, %ymm1 864; FMA4-NOINFS-NEXT: retq 865; 866; AVX512-NOINFS-LABEL: test_v16f32_interp: 867; AVX512-NOINFS: # BB#0: 868; AVX512-NOINFS-NEXT: vfnmadd213ps %zmm1, %zmm2, %zmm1 869; AVX512-NOINFS-NEXT: vfmadd213ps %zmm1, %zmm2, %zmm0 870; AVX512-NOINFS-NEXT: retq 871 %t1 = fsub <16 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %t 872 %tx = fmul <16 x float> %x, %t 873 %ty = fmul <16 x float> %y, %t1 874 %r = fadd <16 x float> %tx, %ty 875 ret <16 x float> %r 876} 877 878define <8 x double> @test_v8f64_interp(<8 x double> %x, <8 x double> %y, <8 x double> %t) { 879; FMA-INFS-LABEL: test_v8f64_interp: 880; FMA-INFS: # BB#0: 881; FMA-INFS-NEXT: vmovapd {{.*#+}} ymm6 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 882; FMA-INFS-NEXT: vsubpd %ymm4, %ymm6, %ymm7 883; FMA-INFS-NEXT: vsubpd %ymm5, %ymm6, %ymm6 884; FMA-INFS-NEXT: vmulpd %ymm6, %ymm3, %ymm3 885; FMA-INFS-NEXT: vmulpd %ymm7, %ymm2, %ymm2 886; FMA-INFS-NEXT: vfmadd213pd %ymm2, %ymm4, %ymm0 887; FMA-INFS-NEXT: vfmadd213pd %ymm3, %ymm5, %ymm1 888; FMA-INFS-NEXT: retq 889; 890; FMA4-INFS-LABEL: test_v8f64_interp: 891; FMA4-INFS: # BB#0: 892; FMA4-INFS-NEXT: vmovapd {{.*#+}} ymm6 = [1.000000e+00,1.000000e+00,1.000000e+00,1.000000e+00] 893; FMA4-INFS-NEXT: vsubpd %ymm4, %ymm6, %ymm7 894; FMA4-INFS-NEXT: vsubpd %ymm5, %ymm6, %ymm6 895; FMA4-INFS-NEXT: vmulpd %ymm6, %ymm3, %ymm3 896; FMA4-INFS-NEXT: vmulpd %ymm7, %ymm2, %ymm2 897; FMA4-INFS-NEXT: vfmaddpd %ymm2, %ymm4, %ymm0, %ymm0 898; FMA4-INFS-NEXT: vfmaddpd %ymm3, %ymm5, %ymm1, %ymm1 899; FMA4-INFS-NEXT: retq 900; 901; AVX512-INFS-LABEL: test_v8f64_interp: 902; AVX512-INFS: # BB#0: 903; AVX512-INFS-NEXT: vbroadcastsd {{.*}}(%rip), %zmm3 904; AVX512-INFS-NEXT: vsubpd %zmm2, %zmm3, %zmm3 905; AVX512-INFS-NEXT: vmulpd %zmm3, %zmm1, %zmm1 906; AVX512-INFS-NEXT: vfmadd213pd %zmm1, %zmm2, %zmm0 907; AVX512-INFS-NEXT: retq 908; 909; FMA-NOINFS-LABEL: test_v8f64_interp: 910; FMA-NOINFS: # BB#0: 911; FMA-NOINFS-NEXT: vfnmadd213pd %ymm3, %ymm5, %ymm3 912; FMA-NOINFS-NEXT: vfnmadd213pd %ymm2, %ymm4, %ymm2 913; FMA-NOINFS-NEXT: vfmadd213pd %ymm2, %ymm4, %ymm0 914; FMA-NOINFS-NEXT: vfmadd213pd %ymm3, %ymm5, %ymm1 915; FMA-NOINFS-NEXT: retq 916; 917; FMA4-NOINFS-LABEL: test_v8f64_interp: 918; FMA4-NOINFS: # BB#0: 919; FMA4-NOINFS-NEXT: vfnmaddpd %ymm3, %ymm3, %ymm5, %ymm3 920; FMA4-NOINFS-NEXT: vfnmaddpd %ymm2, %ymm2, %ymm4, %ymm2 921; FMA4-NOINFS-NEXT: vfmaddpd %ymm2, %ymm4, %ymm0, %ymm0 922; FMA4-NOINFS-NEXT: vfmaddpd %ymm3, %ymm5, %ymm1, %ymm1 923; FMA4-NOINFS-NEXT: retq 924; 925; AVX512-NOINFS-LABEL: test_v8f64_interp: 926; AVX512-NOINFS: # BB#0: 927; AVX512-NOINFS-NEXT: vfnmadd213pd %zmm1, %zmm2, %zmm1 928; AVX512-NOINFS-NEXT: vfmadd213pd %zmm1, %zmm2, %zmm0 929; AVX512-NOINFS-NEXT: retq 930 %t1 = fsub <8 x double> <double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0, double 1.0>, %t 931 %tx = fmul <8 x double> %x, %t 932 %ty = fmul <8 x double> %y, %t1 933 %r = fadd <8 x double> %tx, %ty 934 ret <8 x double> %r 935} 936 937; 938; Pattern: (fneg (fma x, y, z)) -> (fma x, -y, -z) 939; 940 941define <16 x float> @test_v16f32_fneg_fmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) #0 { 942; FMA-LABEL: test_v16f32_fneg_fmadd: 943; FMA: # BB#0: 944; FMA-NEXT: vfnmsub213ps %ymm4, %ymm2, %ymm0 945; FMA-NEXT: vfnmsub213ps %ymm5, %ymm3, %ymm1 946; FMA-NEXT: retq 947; 948; FMA4-LABEL: test_v16f32_fneg_fmadd: 949; FMA4: # BB#0: 950; FMA4-NEXT: vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0 951; FMA4-NEXT: vfnmsubps %ymm5, %ymm3, %ymm1, %ymm1 952; FMA4-NEXT: retq 953; 954; AVX512-LABEL: test_v16f32_fneg_fmadd: 955; AVX512: # BB#0: 956; AVX512-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 957; AVX512-NEXT: retq 958 %mul = fmul <16 x float> %a0, %a1 959 %add = fadd <16 x float> %mul, %a2 960 %neg = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add 961 ret <16 x float> %neg 962} 963 964define <8 x double> @test_v8f64_fneg_fmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) #0 { 965; FMA-LABEL: test_v8f64_fneg_fmsub: 966; FMA: # BB#0: 967; FMA-NEXT: vfnmadd213pd %ymm4, %ymm2, %ymm0 968; FMA-NEXT: vfnmadd213pd %ymm5, %ymm3, %ymm1 969; FMA-NEXT: retq 970; 971; FMA4-LABEL: test_v8f64_fneg_fmsub: 972; FMA4: # BB#0: 973; FMA4-NEXT: vfnmaddpd %ymm4, %ymm2, %ymm0, %ymm0 974; FMA4-NEXT: vfnmaddpd %ymm5, %ymm3, %ymm1, %ymm1 975; FMA4-NEXT: retq 976; 977; AVX512-LABEL: test_v8f64_fneg_fmsub: 978; AVX512: # BB#0: 979; AVX512-NEXT: vfnmadd213pd %zmm2, %zmm1, %zmm0 980; AVX512-NEXT: retq 981 %mul = fmul <8 x double> %a0, %a1 982 %sub = fsub <8 x double> %mul, %a2 983 %neg = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub 984 ret <8 x double> %neg 985} 986 987define <16 x float> @test_v16f32_fneg_fnmadd(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) #0 { 988; FMA-LABEL: test_v16f32_fneg_fnmadd: 989; FMA: # BB#0: 990; FMA-NEXT: vfmsub213ps %ymm4, %ymm2, %ymm0 991; FMA-NEXT: vfmsub213ps %ymm5, %ymm3, %ymm1 992; FMA-NEXT: retq 993; 994; FMA4-LABEL: test_v16f32_fneg_fnmadd: 995; FMA4: # BB#0: 996; FMA4-NEXT: vfmsubps %ymm4, %ymm2, %ymm0, %ymm0 997; FMA4-NEXT: vfmsubps %ymm5, %ymm3, %ymm1, %ymm1 998; FMA4-NEXT: retq 999; 1000; AVX512-LABEL: test_v16f32_fneg_fnmadd: 1001; AVX512: # BB#0: 1002; AVX512-NEXT: vfmsub213ps %zmm2, %zmm1, %zmm0 1003; AVX512-NEXT: retq 1004 %mul = fmul <16 x float> %a0, %a1 1005 %neg0 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %mul 1006 %add = fadd <16 x float> %neg0, %a2 1007 %neg1 = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %add 1008 ret <16 x float> %neg1 1009} 1010 1011define <8 x double> @test_v8f64_fneg_fnmsub(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) #0 { 1012; FMA-LABEL: test_v8f64_fneg_fnmsub: 1013; FMA: # BB#0: 1014; FMA-NEXT: vfmadd213pd %ymm4, %ymm2, %ymm0 1015; FMA-NEXT: vfmadd213pd %ymm5, %ymm3, %ymm1 1016; FMA-NEXT: retq 1017; 1018; FMA4-LABEL: test_v8f64_fneg_fnmsub: 1019; FMA4: # BB#0: 1020; FMA4-NEXT: vfmaddpd %ymm4, %ymm2, %ymm0, %ymm0 1021; FMA4-NEXT: vfmaddpd %ymm5, %ymm3, %ymm1, %ymm1 1022; FMA4-NEXT: retq 1023; 1024; AVX512-LABEL: test_v8f64_fneg_fnmsub: 1025; AVX512: # BB#0: 1026; AVX512-NEXT: vfmadd213pd %zmm2, %zmm1, %zmm0 1027; AVX512-NEXT: retq 1028 %mul = fmul <8 x double> %a0, %a1 1029 %neg0 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %mul 1030 %sub = fsub <8 x double> %neg0, %a2 1031 %neg1 = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %sub 1032 ret <8 x double> %neg1 1033} 1034 1035; 1036; Pattern: (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) 1037; 1038 1039define <16 x float> @test_v16f32_fma_x_c1_fmul_x_c2(<16 x float> %x) #0 { 1040; FMA-LABEL: test_v16f32_fma_x_c1_fmul_x_c2: 1041; FMA: # BB#0: 1042; FMA-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 1043; FMA-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 1044; FMA-NEXT: retq 1045; 1046; FMA4-LABEL: test_v16f32_fma_x_c1_fmul_x_c2: 1047; FMA4: # BB#0: 1048; FMA4-NEXT: vmulps {{.*}}(%rip), %ymm0, %ymm0 1049; FMA4-NEXT: vmulps {{.*}}(%rip), %ymm1, %ymm1 1050; FMA4-NEXT: retq 1051; 1052; AVX512-LABEL: test_v16f32_fma_x_c1_fmul_x_c2: 1053; AVX512: # BB#0: 1054; AVX512-NEXT: vmulps {{.*}}(%rip), %zmm0, %zmm0 1055; AVX512-NEXT: retq 1056 %m0 = fmul <16 x float> %x, <float 17.0, float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0> 1057 %m1 = fmul <16 x float> %x, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0> 1058 %a = fadd <16 x float> %m0, %m1 1059 ret <16 x float> %a 1060} 1061 1062; 1063; Pattern: (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) 1064; 1065 1066define <16 x float> @test_v16f32_fma_fmul_x_c1_c2_y(<16 x float> %x, <16 x float> %y) #0 { 1067; FMA-LABEL: test_v16f32_fma_fmul_x_c1_c2_y: 1068; FMA: # BB#0: 1069; FMA-NEXT: vfmadd132ps {{.*}}(%rip), %ymm2, %ymm0 1070; FMA-NEXT: vfmadd132ps {{.*}}(%rip), %ymm3, %ymm1 1071; FMA-NEXT: retq 1072; 1073; FMA4-LABEL: test_v16f32_fma_fmul_x_c1_c2_y: 1074; FMA4: # BB#0: 1075; FMA4-NEXT: vfmaddps %ymm2, {{.*}}(%rip), %ymm0, %ymm0 1076; FMA4-NEXT: vfmaddps %ymm3, {{.*}}(%rip), %ymm1, %ymm1 1077; FMA4-NEXT: retq 1078; 1079; AVX512-LABEL: test_v16f32_fma_fmul_x_c1_c2_y: 1080; AVX512: # BB#0: 1081; AVX512-NEXT: vfmadd132ps {{.*}}(%rip), %zmm1, %zmm0 1082; AVX512-NEXT: retq 1083 %m0 = fmul <16 x float> %x, <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0> 1084 %m1 = fmul <16 x float> %m0, <float 16.0, float 15.0, float 14.0, float 13.0, float 12.0, float 11.0, float 10.0, float 9.0, float 8.0, float 7.0, float 6.0, float 5.0, float 4.0, float 3.0, float 2.0, float 1.0> 1085 %a = fadd <16 x float> %m1, %y 1086 ret <16 x float> %a 1087} 1088 1089; Pattern: (fneg (fmul x, y)) -> (fnmsub x, y, 0) 1090 1091define <16 x float> @test_v16f32_fneg_fmul(<16 x float> %x, <16 x float> %y) #0 { 1092; FMA-LABEL: test_v16f32_fneg_fmul: 1093; FMA: # BB#0: 1094; FMA-NEXT: vxorps %ymm4, %ymm4, %ymm4 1095; FMA-NEXT: vfnmsub213ps %ymm4, %ymm2, %ymm0 1096; FMA-NEXT: vfnmsub213ps %ymm4, %ymm3, %ymm1 1097; FMA-NEXT: retq 1098; 1099; FMA4-LABEL: test_v16f32_fneg_fmul: 1100; FMA4: # BB#0: 1101; FMA4-NEXT: vxorps %ymm4, %ymm4, %ymm4 1102; FMA4-NEXT: vfnmsubps %ymm4, %ymm2, %ymm0, %ymm0 1103; FMA4-NEXT: vfnmsubps %ymm4, %ymm3, %ymm1, %ymm1 1104; FMA4-NEXT: retq 1105; 1106; AVX512-LABEL: test_v16f32_fneg_fmul: 1107; AVX512: # BB#0: 1108; AVX512-NEXT: vxorps %zmm2, %zmm2, %zmm2 1109; AVX512-NEXT: vfnmsub213ps %zmm2, %zmm1, %zmm0 1110; AVX512-NEXT: retq 1111 %m = fmul nsz <16 x float> %x, %y 1112 %n = fsub <16 x float> <float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0, float -0.0>, %m 1113 ret <16 x float> %n 1114} 1115 1116define <8 x double> @test_v8f64_fneg_fmul(<8 x double> %x, <8 x double> %y) #0 { 1117; FMA-LABEL: test_v8f64_fneg_fmul: 1118; FMA: # BB#0: 1119; FMA-NEXT: vxorpd %ymm4, %ymm4, %ymm4 1120; FMA-NEXT: vfnmsub213pd %ymm4, %ymm2, %ymm0 1121; FMA-NEXT: vfnmsub213pd %ymm4, %ymm3, %ymm1 1122; FMA-NEXT: retq 1123; 1124; FMA4-LABEL: test_v8f64_fneg_fmul: 1125; FMA4: # BB#0: 1126; FMA4-NEXT: vxorpd %ymm4, %ymm4, %ymm4 1127; FMA4-NEXT: vfnmsubpd %ymm4, %ymm2, %ymm0, %ymm0 1128; FMA4-NEXT: vfnmsubpd %ymm4, %ymm3, %ymm1, %ymm1 1129; FMA4-NEXT: retq 1130; 1131; AVX512-LABEL: test_v8f64_fneg_fmul: 1132; AVX512: # BB#0: 1133; AVX512-NEXT: vxorpd %zmm2, %zmm2, %zmm2 1134; AVX512-NEXT: vfnmsub213pd %zmm2, %zmm1, %zmm0 1135; AVX512-NEXT: retq 1136 %m = fmul nsz <8 x double> %x, %y 1137 %n = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %m 1138 ret <8 x double> %n 1139} 1140 1141define <8 x double> @test_v8f64_fneg_fmul_no_nsz(<8 x double> %x, <8 x double> %y) #0 { 1142; FMA-LABEL: test_v8f64_fneg_fmul_no_nsz: 1143; FMA: # BB#0: 1144; FMA-NEXT: vmulpd %ymm3, %ymm1, %ymm1 1145; FMA-NEXT: vmulpd %ymm2, %ymm0, %ymm0 1146; FMA-NEXT: vmovapd {{.*#+}} ymm2 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00] 1147; FMA-NEXT: vxorpd %ymm2, %ymm0, %ymm0 1148; FMA-NEXT: vxorpd %ymm2, %ymm1, %ymm1 1149; FMA-NEXT: retq 1150; 1151; FMA4-LABEL: test_v8f64_fneg_fmul_no_nsz: 1152; FMA4: # BB#0: 1153; FMA4-NEXT: vmulpd %ymm3, %ymm1, %ymm1 1154; FMA4-NEXT: vmulpd %ymm2, %ymm0, %ymm0 1155; FMA4-NEXT: vmovapd {{.*#+}} ymm2 = [-0.000000e+00,-0.000000e+00,-0.000000e+00,-0.000000e+00] 1156; FMA4-NEXT: vxorpd %ymm2, %ymm0, %ymm0 1157; FMA4-NEXT: vxorpd %ymm2, %ymm1, %ymm1 1158; FMA4-NEXT: retq 1159; 1160; AVX512-LABEL: test_v8f64_fneg_fmul_no_nsz: 1161; AVX512: # BB#0: 1162; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0 1163; AVX512-NEXT: vxorpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 1164; AVX512-NEXT: retq 1165 %m = fmul <8 x double> %x, %y 1166 %n = fsub <8 x double> <double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0, double -0.0>, %m 1167 ret <8 x double> %n 1168} 1169 1170attributes #0 = { "unsafe-fp-math"="true" } 1171