1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 3; RUN: -mcpu=pwr9 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR9LE 4; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 5; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR9BE 6; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 7; RUN: -mcpu=pwr10 -mattr=-paired-vector-memops -mtriple=powerpc64le < %s | \ 8; RUN: FileCheck %s --check-prefix=PWR10LE 9; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 10; RUN: -mcpu=pwr10 -mattr=-paired-vector-memops -mtriple=powerpc64 < %s | \ 11; RUN: FileCheck %s --check-prefix=PWR10BE 12 13;; 14;; Vectors of f32 15;; 16define dso_local float @v2f32(<2 x float> %a) local_unnamed_addr #0 { 17; PWR9LE-LABEL: v2f32: 18; PWR9LE: # %bb.0: # %entry 19; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3 20; PWR9LE-NEXT: xxswapd vs1, v2 21; PWR9LE-NEXT: xscvspdpn f0, vs0 22; PWR9LE-NEXT: xscvspdpn f1, vs1 23; PWR9LE-NEXT: xsmulsp f1, f0, f1 24; PWR9LE-NEXT: blr 25; 26; PWR9BE-LABEL: v2f32: 27; PWR9BE: # %bb.0: # %entry 28; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1 29; PWR9BE-NEXT: xscvspdpn f0, v2 30; PWR9BE-NEXT: xscvspdpn f1, vs1 31; PWR9BE-NEXT: xsmulsp f1, f0, f1 32; PWR9BE-NEXT: blr 33; 34; PWR10LE-LABEL: v2f32: 35; PWR10LE: # %bb.0: # %entry 36; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3 37; PWR10LE-NEXT: xxswapd vs1, v2 38; PWR10LE-NEXT: xscvspdpn f0, vs0 39; PWR10LE-NEXT: xscvspdpn f1, vs1 40; PWR10LE-NEXT: xsmulsp f1, f0, f1 41; PWR10LE-NEXT: blr 42; 43; PWR10BE-LABEL: v2f32: 44; PWR10BE: # %bb.0: # %entry 45; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1 46; PWR10BE-NEXT: xscvspdpn f0, v2 47; PWR10BE-NEXT: xscvspdpn f1, vs1 48; PWR10BE-NEXT: xsmulsp f1, f0, f1 49; PWR10BE-NEXT: blr 50entry: 51 %0 = call float @llvm.vector.reduce.fmul.v2f32(float 1.000000e+00, <2 x float> %a) 52 ret float %0 53} 54 55define dso_local float @v2f32_b(<2 x float> %a, float %b) local_unnamed_addr #0 { 56; PWR9LE-LABEL: v2f32_b: 57; PWR9LE: # %bb.0: # %entry 58; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3 59; PWR9LE-NEXT: xscvspdpn f0, vs0 60; PWR9LE-NEXT: xsmulsp f0, f1, f0 61; PWR9LE-NEXT: xxswapd vs1, v2 62; PWR9LE-NEXT: xscvspdpn f1, vs1 63; PWR9LE-NEXT: xsmulsp f1, f0, f1 64; PWR9LE-NEXT: blr 65; 66; PWR9BE-LABEL: v2f32_b: 67; PWR9BE: # %bb.0: # %entry 68; PWR9BE-NEXT: xscvspdpn f0, v2 69; PWR9BE-NEXT: xsmulsp f0, f1, f0 70; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1 71; PWR9BE-NEXT: xscvspdpn f1, vs1 72; PWR9BE-NEXT: xsmulsp f1, f0, f1 73; PWR9BE-NEXT: blr 74; 75; PWR10LE-LABEL: v2f32_b: 76; PWR10LE: # %bb.0: # %entry 77; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3 78; PWR10LE-NEXT: xscvspdpn f0, vs0 79; PWR10LE-NEXT: xsmulsp f0, f1, f0 80; PWR10LE-NEXT: xxswapd vs1, v2 81; PWR10LE-NEXT: xscvspdpn f1, vs1 82; PWR10LE-NEXT: xsmulsp f1, f0, f1 83; PWR10LE-NEXT: blr 84; 85; PWR10BE-LABEL: v2f32_b: 86; PWR10BE: # %bb.0: # %entry 87; PWR10BE-NEXT: xscvspdpn f0, v2 88; PWR10BE-NEXT: xsmulsp f0, f1, f0 89; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1 90; PWR10BE-NEXT: xscvspdpn f1, vs1 91; PWR10BE-NEXT: xsmulsp f1, f0, f1 92; PWR10BE-NEXT: blr 93entry: 94 %0 = call float @llvm.vector.reduce.fmul.v2f32(float %b, <2 x float> %a) 95 ret float %0 96} 97 98define dso_local float @v2f32_fast(<2 x float> %a) local_unnamed_addr #0 { 99; PWR9LE-LABEL: v2f32_fast: 100; PWR9LE: # %bb.0: # %entry 101; PWR9LE-NEXT: xxspltw vs0, v2, 2 102; PWR9LE-NEXT: xvmulsp vs0, v2, vs0 103; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 104; PWR9LE-NEXT: xscvspdpn f1, vs0 105; PWR9LE-NEXT: blr 106; 107; PWR9BE-LABEL: v2f32_fast: 108; PWR9BE: # %bb.0: # %entry 109; PWR9BE-NEXT: xxspltw vs0, v2, 1 110; PWR9BE-NEXT: xvmulsp vs0, v2, vs0 111; PWR9BE-NEXT: xscvspdpn f1, vs0 112; PWR9BE-NEXT: blr 113; 114; PWR10LE-LABEL: v2f32_fast: 115; PWR10LE: # %bb.0: # %entry 116; PWR10LE-NEXT: xxspltw vs0, v2, 2 117; PWR10LE-NEXT: xvmulsp vs0, v2, vs0 118; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3 119; PWR10LE-NEXT: xscvspdpn f1, vs0 120; PWR10LE-NEXT: blr 121; 122; PWR10BE-LABEL: v2f32_fast: 123; PWR10BE: # %bb.0: # %entry 124; PWR10BE-NEXT: xxspltw vs0, v2, 1 125; PWR10BE-NEXT: xvmulsp vs0, v2, vs0 126; PWR10BE-NEXT: xscvspdpn f1, vs0 127; PWR10BE-NEXT: blr 128entry: 129 %0 = call fast float @llvm.vector.reduce.fmul.v2f32(float 1.000000e+00, <2 x float> %a) 130 ret float %0 131} 132 133define dso_local float @v4f32(<4 x float> %a) local_unnamed_addr #0 { 134; PWR9LE-LABEL: v4f32: 135; PWR9LE: # %bb.0: # %entry 136; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3 137; PWR9LE-NEXT: xxswapd vs1, v2 138; PWR9LE-NEXT: xscvspdpn f0, vs0 139; PWR9LE-NEXT: xscvspdpn f1, vs1 140; PWR9LE-NEXT: xsmulsp f0, f0, f1 141; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1 142; PWR9LE-NEXT: xscvspdpn f1, vs1 143; PWR9LE-NEXT: xsmulsp f0, f0, f1 144; PWR9LE-NEXT: xscvspdpn f1, v2 145; PWR9LE-NEXT: xsmulsp f1, f0, f1 146; PWR9LE-NEXT: blr 147; 148; PWR9BE-LABEL: v4f32: 149; PWR9BE: # %bb.0: # %entry 150; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1 151; PWR9BE-NEXT: xscvspdpn f0, v2 152; PWR9BE-NEXT: xscvspdpn f1, vs1 153; PWR9BE-NEXT: xsmulsp f0, f0, f1 154; PWR9BE-NEXT: xxswapd vs1, v2 155; PWR9BE-NEXT: xscvspdpn f1, vs1 156; PWR9BE-NEXT: xsmulsp f0, f0, f1 157; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3 158; PWR9BE-NEXT: xscvspdpn f1, vs1 159; PWR9BE-NEXT: xsmulsp f1, f0, f1 160; PWR9BE-NEXT: blr 161; 162; PWR10LE-LABEL: v4f32: 163; PWR10LE: # %bb.0: # %entry 164; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3 165; PWR10LE-NEXT: xxswapd vs1, v2 166; PWR10LE-NEXT: xscvspdpn f0, vs0 167; PWR10LE-NEXT: xscvspdpn f1, vs1 168; PWR10LE-NEXT: xsmulsp f0, f0, f1 169; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1 170; PWR10LE-NEXT: xscvspdpn f1, vs1 171; PWR10LE-NEXT: xsmulsp f0, f0, f1 172; PWR10LE-NEXT: xscvspdpn f1, v2 173; PWR10LE-NEXT: xsmulsp f1, f0, f1 174; PWR10LE-NEXT: blr 175; 176; PWR10BE-LABEL: v4f32: 177; PWR10BE: # %bb.0: # %entry 178; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1 179; PWR10BE-NEXT: xscvspdpn f0, v2 180; PWR10BE-NEXT: xscvspdpn f1, vs1 181; PWR10BE-NEXT: xsmulsp f0, f0, f1 182; PWR10BE-NEXT: xxswapd vs1, v2 183; PWR10BE-NEXT: xscvspdpn f1, vs1 184; PWR10BE-NEXT: xsmulsp f0, f0, f1 185; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3 186; PWR10BE-NEXT: xscvspdpn f1, vs1 187; PWR10BE-NEXT: xsmulsp f1, f0, f1 188; PWR10BE-NEXT: blr 189entry: 190 %0 = call float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> %a) 191 ret float %0 192} 193 194define dso_local float @v4f32_b(<4 x float> %a, float %b) local_unnamed_addr #0 { 195; PWR9LE-LABEL: v4f32_b: 196; PWR9LE: # %bb.0: # %entry 197; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3 198; PWR9LE-NEXT: xscvspdpn f0, vs0 199; PWR9LE-NEXT: xsmulsp f0, f1, f0 200; PWR9LE-NEXT: xxswapd vs1, v2 201; PWR9LE-NEXT: xscvspdpn f1, vs1 202; PWR9LE-NEXT: xsmulsp f0, f0, f1 203; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1 204; PWR9LE-NEXT: xscvspdpn f1, vs1 205; PWR9LE-NEXT: xsmulsp f0, f0, f1 206; PWR9LE-NEXT: xscvspdpn f1, v2 207; PWR9LE-NEXT: xsmulsp f1, f0, f1 208; PWR9LE-NEXT: blr 209; 210; PWR9BE-LABEL: v4f32_b: 211; PWR9BE: # %bb.0: # %entry 212; PWR9BE-NEXT: xscvspdpn f0, v2 213; PWR9BE-NEXT: xsmulsp f0, f1, f0 214; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1 215; PWR9BE-NEXT: xscvspdpn f1, vs1 216; PWR9BE-NEXT: xsmulsp f0, f0, f1 217; PWR9BE-NEXT: xxswapd vs1, v2 218; PWR9BE-NEXT: xscvspdpn f1, vs1 219; PWR9BE-NEXT: xsmulsp f0, f0, f1 220; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3 221; PWR9BE-NEXT: xscvspdpn f1, vs1 222; PWR9BE-NEXT: xsmulsp f1, f0, f1 223; PWR9BE-NEXT: blr 224; 225; PWR10LE-LABEL: v4f32_b: 226; PWR10LE: # %bb.0: # %entry 227; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3 228; PWR10LE-NEXT: xscvspdpn f0, vs0 229; PWR10LE-NEXT: xsmulsp f0, f1, f0 230; PWR10LE-NEXT: xxswapd vs1, v2 231; PWR10LE-NEXT: xscvspdpn f1, vs1 232; PWR10LE-NEXT: xsmulsp f0, f0, f1 233; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1 234; PWR10LE-NEXT: xscvspdpn f1, vs1 235; PWR10LE-NEXT: xsmulsp f0, f0, f1 236; PWR10LE-NEXT: xscvspdpn f1, v2 237; PWR10LE-NEXT: xsmulsp f1, f0, f1 238; PWR10LE-NEXT: blr 239; 240; PWR10BE-LABEL: v4f32_b: 241; PWR10BE: # %bb.0: # %entry 242; PWR10BE-NEXT: xscvspdpn f0, v2 243; PWR10BE-NEXT: xsmulsp f0, f1, f0 244; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1 245; PWR10BE-NEXT: xscvspdpn f1, vs1 246; PWR10BE-NEXT: xsmulsp f0, f0, f1 247; PWR10BE-NEXT: xxswapd vs1, v2 248; PWR10BE-NEXT: xscvspdpn f1, vs1 249; PWR10BE-NEXT: xsmulsp f0, f0, f1 250; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3 251; PWR10BE-NEXT: xscvspdpn f1, vs1 252; PWR10BE-NEXT: xsmulsp f1, f0, f1 253; PWR10BE-NEXT: blr 254entry: 255 %0 = call float @llvm.vector.reduce.fmul.v4f32(float %b, <4 x float> %a) 256 ret float %0 257} 258 259define dso_local float @v4f32_fast(<4 x float> %a) local_unnamed_addr #0 { 260; PWR9LE-LABEL: v4f32_fast: 261; PWR9LE: # %bb.0: # %entry 262; PWR9LE-NEXT: xxswapd v3, v2 263; PWR9LE-NEXT: xvmulsp vs0, v2, v3 264; PWR9LE-NEXT: xxspltw vs1, vs0, 2 265; PWR9LE-NEXT: xvmulsp vs0, vs0, vs1 266; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 267; PWR9LE-NEXT: xscvspdpn f1, vs0 268; PWR9LE-NEXT: blr 269; 270; PWR9BE-LABEL: v4f32_fast: 271; PWR9BE: # %bb.0: # %entry 272; PWR9BE-NEXT: xxswapd v3, v2 273; PWR9BE-NEXT: xvmulsp vs0, v2, v3 274; PWR9BE-NEXT: xxspltw vs1, vs0, 1 275; PWR9BE-NEXT: xvmulsp vs0, vs0, vs1 276; PWR9BE-NEXT: xscvspdpn f1, vs0 277; PWR9BE-NEXT: blr 278; 279; PWR10LE-LABEL: v4f32_fast: 280; PWR10LE: # %bb.0: # %entry 281; PWR10LE-NEXT: xxswapd v3, v2 282; PWR10LE-NEXT: xvmulsp vs0, v2, v3 283; PWR10LE-NEXT: xxspltw vs1, vs0, 2 284; PWR10LE-NEXT: xvmulsp vs0, vs0, vs1 285; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3 286; PWR10LE-NEXT: xscvspdpn f1, vs0 287; PWR10LE-NEXT: blr 288; 289; PWR10BE-LABEL: v4f32_fast: 290; PWR10BE: # %bb.0: # %entry 291; PWR10BE-NEXT: xxswapd v3, v2 292; PWR10BE-NEXT: xvmulsp vs0, v2, v3 293; PWR10BE-NEXT: xxspltw vs1, vs0, 1 294; PWR10BE-NEXT: xvmulsp vs0, vs0, vs1 295; PWR10BE-NEXT: xscvspdpn f1, vs0 296; PWR10BE-NEXT: blr 297entry: 298 %0 = call fast float @llvm.vector.reduce.fmul.v4f32(float 1.000000e+00, <4 x float> %a) 299 ret float %0 300} 301 302define dso_local float @v8f32(<8 x float> %a) local_unnamed_addr #0 { 303; PWR9LE-LABEL: v8f32: 304; PWR9LE: # %bb.0: # %entry 305; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3 306; PWR9LE-NEXT: xxswapd vs1, v2 307; PWR9LE-NEXT: xscvspdpn f0, vs0 308; PWR9LE-NEXT: xscvspdpn f1, vs1 309; PWR9LE-NEXT: xsmulsp f0, f0, f1 310; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1 311; PWR9LE-NEXT: xscvspdpn f1, vs1 312; PWR9LE-NEXT: xsmulsp f0, f0, f1 313; PWR9LE-NEXT: xscvspdpn f1, v2 314; PWR9LE-NEXT: xsmulsp f0, f0, f1 315; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 3 316; PWR9LE-NEXT: xscvspdpn f1, vs1 317; PWR9LE-NEXT: xsmulsp f0, f0, f1 318; PWR9LE-NEXT: xxswapd vs1, v3 319; PWR9LE-NEXT: xscvspdpn f1, vs1 320; PWR9LE-NEXT: xsmulsp f0, f0, f1 321; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 1 322; PWR9LE-NEXT: xscvspdpn f1, vs1 323; PWR9LE-NEXT: xsmulsp f0, f0, f1 324; PWR9LE-NEXT: xscvspdpn f1, v3 325; PWR9LE-NEXT: xsmulsp f1, f0, f1 326; PWR9LE-NEXT: blr 327; 328; PWR9BE-LABEL: v8f32: 329; PWR9BE: # %bb.0: # %entry 330; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1 331; PWR9BE-NEXT: xscvspdpn f0, v2 332; PWR9BE-NEXT: xscvspdpn f1, vs1 333; PWR9BE-NEXT: xsmulsp f0, f0, f1 334; PWR9BE-NEXT: xxswapd vs1, v2 335; PWR9BE-NEXT: xscvspdpn f1, vs1 336; PWR9BE-NEXT: xsmulsp f0, f0, f1 337; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3 338; PWR9BE-NEXT: xscvspdpn f1, vs1 339; PWR9BE-NEXT: xsmulsp f0, f0, f1 340; PWR9BE-NEXT: xscvspdpn f1, v3 341; PWR9BE-NEXT: xsmulsp f0, f0, f1 342; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 1 343; PWR9BE-NEXT: xscvspdpn f1, vs1 344; PWR9BE-NEXT: xsmulsp f0, f0, f1 345; PWR9BE-NEXT: xxswapd vs1, v3 346; PWR9BE-NEXT: xscvspdpn f1, vs1 347; PWR9BE-NEXT: xsmulsp f0, f0, f1 348; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 3 349; PWR9BE-NEXT: xscvspdpn f1, vs1 350; PWR9BE-NEXT: xsmulsp f1, f0, f1 351; PWR9BE-NEXT: blr 352; 353; PWR10LE-LABEL: v8f32: 354; PWR10LE: # %bb.0: # %entry 355; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3 356; PWR10LE-NEXT: xxswapd vs1, v2 357; PWR10LE-NEXT: xscvspdpn f0, vs0 358; PWR10LE-NEXT: xscvspdpn f1, vs1 359; PWR10LE-NEXT: xsmulsp f0, f0, f1 360; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1 361; PWR10LE-NEXT: xscvspdpn f1, vs1 362; PWR10LE-NEXT: xsmulsp f0, f0, f1 363; PWR10LE-NEXT: xscvspdpn f1, v2 364; PWR10LE-NEXT: xsmulsp f0, f0, f1 365; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 3 366; PWR10LE-NEXT: xscvspdpn f1, vs1 367; PWR10LE-NEXT: xsmulsp f0, f0, f1 368; PWR10LE-NEXT: xxswapd vs1, v3 369; PWR10LE-NEXT: xscvspdpn f1, vs1 370; PWR10LE-NEXT: xsmulsp f0, f0, f1 371; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 1 372; PWR10LE-NEXT: xscvspdpn f1, vs1 373; PWR10LE-NEXT: xsmulsp f0, f0, f1 374; PWR10LE-NEXT: xscvspdpn f1, v3 375; PWR10LE-NEXT: xsmulsp f1, f0, f1 376; PWR10LE-NEXT: blr 377; 378; PWR10BE-LABEL: v8f32: 379; PWR10BE: # %bb.0: # %entry 380; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1 381; PWR10BE-NEXT: xscvspdpn f0, v2 382; PWR10BE-NEXT: xscvspdpn f1, vs1 383; PWR10BE-NEXT: xsmulsp f0, f0, f1 384; PWR10BE-NEXT: xxswapd vs1, v2 385; PWR10BE-NEXT: xscvspdpn f1, vs1 386; PWR10BE-NEXT: xsmulsp f0, f0, f1 387; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3 388; PWR10BE-NEXT: xscvspdpn f1, vs1 389; PWR10BE-NEXT: xsmulsp f0, f0, f1 390; PWR10BE-NEXT: xscvspdpn f1, v3 391; PWR10BE-NEXT: xsmulsp f0, f0, f1 392; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 1 393; PWR10BE-NEXT: xscvspdpn f1, vs1 394; PWR10BE-NEXT: xsmulsp f0, f0, f1 395; PWR10BE-NEXT: xxswapd vs1, v3 396; PWR10BE-NEXT: xscvspdpn f1, vs1 397; PWR10BE-NEXT: xsmulsp f0, f0, f1 398; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 3 399; PWR10BE-NEXT: xscvspdpn f1, vs1 400; PWR10BE-NEXT: xsmulsp f1, f0, f1 401; PWR10BE-NEXT: blr 402entry: 403 %0 = call float @llvm.vector.reduce.fmul.v8f32(float 1.000000e+00, <8 x float> %a) 404 ret float %0 405} 406 407define dso_local float @v8f32_b(<8 x float> %a, float %b) local_unnamed_addr #0 { 408; PWR9LE-LABEL: v8f32_b: 409; PWR9LE: # %bb.0: # %entry 410; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3 411; PWR9LE-NEXT: xscvspdpn f0, vs0 412; PWR9LE-NEXT: xsmulsp f0, f1, f0 413; PWR9LE-NEXT: xxswapd vs1, v2 414; PWR9LE-NEXT: xscvspdpn f1, vs1 415; PWR9LE-NEXT: xsmulsp f0, f0, f1 416; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1 417; PWR9LE-NEXT: xscvspdpn f1, vs1 418; PWR9LE-NEXT: xsmulsp f0, f0, f1 419; PWR9LE-NEXT: xscvspdpn f1, v2 420; PWR9LE-NEXT: xsmulsp f0, f0, f1 421; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 3 422; PWR9LE-NEXT: xscvspdpn f1, vs1 423; PWR9LE-NEXT: xsmulsp f0, f0, f1 424; PWR9LE-NEXT: xxswapd vs1, v3 425; PWR9LE-NEXT: xscvspdpn f1, vs1 426; PWR9LE-NEXT: xsmulsp f0, f0, f1 427; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 1 428; PWR9LE-NEXT: xscvspdpn f1, vs1 429; PWR9LE-NEXT: xsmulsp f0, f0, f1 430; PWR9LE-NEXT: xscvspdpn f1, v3 431; PWR9LE-NEXT: xsmulsp f1, f0, f1 432; PWR9LE-NEXT: blr 433; 434; PWR9BE-LABEL: v8f32_b: 435; PWR9BE: # %bb.0: # %entry 436; PWR9BE-NEXT: xscvspdpn f0, v2 437; PWR9BE-NEXT: xsmulsp f0, f1, f0 438; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1 439; PWR9BE-NEXT: xscvspdpn f1, vs1 440; PWR9BE-NEXT: xsmulsp f0, f0, f1 441; PWR9BE-NEXT: xxswapd vs1, v2 442; PWR9BE-NEXT: xscvspdpn f1, vs1 443; PWR9BE-NEXT: xsmulsp f0, f0, f1 444; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3 445; PWR9BE-NEXT: xscvspdpn f1, vs1 446; PWR9BE-NEXT: xsmulsp f0, f0, f1 447; PWR9BE-NEXT: xscvspdpn f1, v3 448; PWR9BE-NEXT: xsmulsp f0, f0, f1 449; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 1 450; PWR9BE-NEXT: xscvspdpn f1, vs1 451; PWR9BE-NEXT: xsmulsp f0, f0, f1 452; PWR9BE-NEXT: xxswapd vs1, v3 453; PWR9BE-NEXT: xscvspdpn f1, vs1 454; PWR9BE-NEXT: xsmulsp f0, f0, f1 455; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 3 456; PWR9BE-NEXT: xscvspdpn f1, vs1 457; PWR9BE-NEXT: xsmulsp f1, f0, f1 458; PWR9BE-NEXT: blr 459; 460; PWR10LE-LABEL: v8f32_b: 461; PWR10LE: # %bb.0: # %entry 462; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3 463; PWR10LE-NEXT: xscvspdpn f0, vs0 464; PWR10LE-NEXT: xsmulsp f0, f1, f0 465; PWR10LE-NEXT: xxswapd vs1, v2 466; PWR10LE-NEXT: xscvspdpn f1, vs1 467; PWR10LE-NEXT: xsmulsp f0, f0, f1 468; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1 469; PWR10LE-NEXT: xscvspdpn f1, vs1 470; PWR10LE-NEXT: xsmulsp f0, f0, f1 471; PWR10LE-NEXT: xscvspdpn f1, v2 472; PWR10LE-NEXT: xsmulsp f0, f0, f1 473; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 3 474; PWR10LE-NEXT: xscvspdpn f1, vs1 475; PWR10LE-NEXT: xsmulsp f0, f0, f1 476; PWR10LE-NEXT: xxswapd vs1, v3 477; PWR10LE-NEXT: xscvspdpn f1, vs1 478; PWR10LE-NEXT: xsmulsp f0, f0, f1 479; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 1 480; PWR10LE-NEXT: xscvspdpn f1, vs1 481; PWR10LE-NEXT: xsmulsp f0, f0, f1 482; PWR10LE-NEXT: xscvspdpn f1, v3 483; PWR10LE-NEXT: xsmulsp f1, f0, f1 484; PWR10LE-NEXT: blr 485; 486; PWR10BE-LABEL: v8f32_b: 487; PWR10BE: # %bb.0: # %entry 488; PWR10BE-NEXT: xscvspdpn f0, v2 489; PWR10BE-NEXT: xsmulsp f0, f1, f0 490; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1 491; PWR10BE-NEXT: xscvspdpn f1, vs1 492; PWR10BE-NEXT: xsmulsp f0, f0, f1 493; PWR10BE-NEXT: xxswapd vs1, v2 494; PWR10BE-NEXT: xscvspdpn f1, vs1 495; PWR10BE-NEXT: xsmulsp f0, f0, f1 496; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3 497; PWR10BE-NEXT: xscvspdpn f1, vs1 498; PWR10BE-NEXT: xsmulsp f0, f0, f1 499; PWR10BE-NEXT: xscvspdpn f1, v3 500; PWR10BE-NEXT: xsmulsp f0, f0, f1 501; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 1 502; PWR10BE-NEXT: xscvspdpn f1, vs1 503; PWR10BE-NEXT: xsmulsp f0, f0, f1 504; PWR10BE-NEXT: xxswapd vs1, v3 505; PWR10BE-NEXT: xscvspdpn f1, vs1 506; PWR10BE-NEXT: xsmulsp f0, f0, f1 507; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 3 508; PWR10BE-NEXT: xscvspdpn f1, vs1 509; PWR10BE-NEXT: xsmulsp f1, f0, f1 510; PWR10BE-NEXT: blr 511entry: 512 %0 = call float @llvm.vector.reduce.fmul.v8f32(float %b, <8 x float> %a) 513 ret float %0 514} 515 516define dso_local float @v8f32_fast(<8 x float> %a) local_unnamed_addr #0 { 517; PWR9LE-LABEL: v8f32_fast: 518; PWR9LE: # %bb.0: # %entry 519; PWR9LE-NEXT: xvmulsp vs0, v2, v3 520; PWR9LE-NEXT: xxswapd v2, vs0 521; PWR9LE-NEXT: xvmulsp vs0, vs0, v2 522; PWR9LE-NEXT: xxspltw vs1, vs0, 2 523; PWR9LE-NEXT: xvmulsp vs0, vs0, vs1 524; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 525; PWR9LE-NEXT: xscvspdpn f1, vs0 526; PWR9LE-NEXT: blr 527; 528; PWR9BE-LABEL: v8f32_fast: 529; PWR9BE: # %bb.0: # %entry 530; PWR9BE-NEXT: xvmulsp vs0, v2, v3 531; PWR9BE-NEXT: xxswapd v2, vs0 532; PWR9BE-NEXT: xvmulsp vs0, vs0, v2 533; PWR9BE-NEXT: xxspltw vs1, vs0, 1 534; PWR9BE-NEXT: xvmulsp vs0, vs0, vs1 535; PWR9BE-NEXT: xscvspdpn f1, vs0 536; PWR9BE-NEXT: blr 537; 538; PWR10LE-LABEL: v8f32_fast: 539; PWR10LE: # %bb.0: # %entry 540; PWR10LE-NEXT: xvmulsp vs0, v2, v3 541; PWR10LE-NEXT: xxswapd v2, vs0 542; PWR10LE-NEXT: xvmulsp vs0, vs0, v2 543; PWR10LE-NEXT: xxspltw vs1, vs0, 2 544; PWR10LE-NEXT: xvmulsp vs0, vs0, vs1 545; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3 546; PWR10LE-NEXT: xscvspdpn f1, vs0 547; PWR10LE-NEXT: blr 548; 549; PWR10BE-LABEL: v8f32_fast: 550; PWR10BE: # %bb.0: # %entry 551; PWR10BE-NEXT: xvmulsp vs0, v2, v3 552; PWR10BE-NEXT: xxswapd v2, vs0 553; PWR10BE-NEXT: xvmulsp vs0, vs0, v2 554; PWR10BE-NEXT: xxspltw vs1, vs0, 1 555; PWR10BE-NEXT: xvmulsp vs0, vs0, vs1 556; PWR10BE-NEXT: xscvspdpn f1, vs0 557; PWR10BE-NEXT: blr 558entry: 559 %0 = call fast float @llvm.vector.reduce.fmul.v8f32(float 1.000000e+00, <8 x float> %a) 560 ret float %0 561} 562 563define dso_local float @v16f32(<16 x float> %a) local_unnamed_addr #0 { 564; PWR9LE-LABEL: v16f32: 565; PWR9LE: # %bb.0: # %entry 566; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3 567; PWR9LE-NEXT: xxswapd vs1, v2 568; PWR9LE-NEXT: xscvspdpn f0, vs0 569; PWR9LE-NEXT: xscvspdpn f1, vs1 570; PWR9LE-NEXT: xsmulsp f0, f0, f1 571; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1 572; PWR9LE-NEXT: xscvspdpn f1, vs1 573; PWR9LE-NEXT: xsmulsp f0, f0, f1 574; PWR9LE-NEXT: xscvspdpn f1, v2 575; PWR9LE-NEXT: xsmulsp f0, f0, f1 576; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 3 577; PWR9LE-NEXT: xscvspdpn f1, vs1 578; PWR9LE-NEXT: xsmulsp f0, f0, f1 579; PWR9LE-NEXT: xxswapd vs1, v3 580; PWR9LE-NEXT: xscvspdpn f1, vs1 581; PWR9LE-NEXT: xsmulsp f0, f0, f1 582; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 1 583; PWR9LE-NEXT: xscvspdpn f1, vs1 584; PWR9LE-NEXT: xsmulsp f0, f0, f1 585; PWR9LE-NEXT: xscvspdpn f1, v3 586; PWR9LE-NEXT: xsmulsp f0, f0, f1 587; PWR9LE-NEXT: xxsldwi vs1, v4, v4, 3 588; PWR9LE-NEXT: xscvspdpn f1, vs1 589; PWR9LE-NEXT: xsmulsp f0, f0, f1 590; PWR9LE-NEXT: xxswapd vs1, v4 591; PWR9LE-NEXT: xscvspdpn f1, vs1 592; PWR9LE-NEXT: xsmulsp f0, f0, f1 593; PWR9LE-NEXT: xxsldwi vs1, v4, v4, 1 594; PWR9LE-NEXT: xscvspdpn f1, vs1 595; PWR9LE-NEXT: xsmulsp f0, f0, f1 596; PWR9LE-NEXT: xscvspdpn f1, v4 597; PWR9LE-NEXT: xsmulsp f0, f0, f1 598; PWR9LE-NEXT: xxsldwi vs1, v5, v5, 3 599; PWR9LE-NEXT: xscvspdpn f1, vs1 600; PWR9LE-NEXT: xsmulsp f0, f0, f1 601; PWR9LE-NEXT: xxswapd vs1, v5 602; PWR9LE-NEXT: xscvspdpn f1, vs1 603; PWR9LE-NEXT: xsmulsp f0, f0, f1 604; PWR9LE-NEXT: xxsldwi vs1, v5, v5, 1 605; PWR9LE-NEXT: xscvspdpn f1, vs1 606; PWR9LE-NEXT: xsmulsp f0, f0, f1 607; PWR9LE-NEXT: xscvspdpn f1, v5 608; PWR9LE-NEXT: xsmulsp f1, f0, f1 609; PWR9LE-NEXT: blr 610; 611; PWR9BE-LABEL: v16f32: 612; PWR9BE: # %bb.0: # %entry 613; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1 614; PWR9BE-NEXT: xscvspdpn f0, v2 615; PWR9BE-NEXT: xscvspdpn f1, vs1 616; PWR9BE-NEXT: xsmulsp f0, f0, f1 617; PWR9BE-NEXT: xxswapd vs1, v2 618; PWR9BE-NEXT: xscvspdpn f1, vs1 619; PWR9BE-NEXT: xsmulsp f0, f0, f1 620; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3 621; PWR9BE-NEXT: xscvspdpn f1, vs1 622; PWR9BE-NEXT: xsmulsp f0, f0, f1 623; PWR9BE-NEXT: xscvspdpn f1, v3 624; PWR9BE-NEXT: xsmulsp f0, f0, f1 625; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 1 626; PWR9BE-NEXT: xscvspdpn f1, vs1 627; PWR9BE-NEXT: xsmulsp f0, f0, f1 628; PWR9BE-NEXT: xxswapd vs1, v3 629; PWR9BE-NEXT: xscvspdpn f1, vs1 630; PWR9BE-NEXT: xsmulsp f0, f0, f1 631; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 3 632; PWR9BE-NEXT: xscvspdpn f1, vs1 633; PWR9BE-NEXT: xsmulsp f0, f0, f1 634; PWR9BE-NEXT: xscvspdpn f1, v4 635; PWR9BE-NEXT: xsmulsp f0, f0, f1 636; PWR9BE-NEXT: xxsldwi vs1, v4, v4, 1 637; PWR9BE-NEXT: xscvspdpn f1, vs1 638; PWR9BE-NEXT: xsmulsp f0, f0, f1 639; PWR9BE-NEXT: xxswapd vs1, v4 640; PWR9BE-NEXT: xscvspdpn f1, vs1 641; PWR9BE-NEXT: xsmulsp f0, f0, f1 642; PWR9BE-NEXT: xxsldwi vs1, v4, v4, 3 643; PWR9BE-NEXT: xscvspdpn f1, vs1 644; PWR9BE-NEXT: xsmulsp f0, f0, f1 645; PWR9BE-NEXT: xscvspdpn f1, v5 646; PWR9BE-NEXT: xsmulsp f0, f0, f1 647; PWR9BE-NEXT: xxsldwi vs1, v5, v5, 1 648; PWR9BE-NEXT: xscvspdpn f1, vs1 649; PWR9BE-NEXT: xsmulsp f0, f0, f1 650; PWR9BE-NEXT: xxswapd vs1, v5 651; PWR9BE-NEXT: xscvspdpn f1, vs1 652; PWR9BE-NEXT: xsmulsp f0, f0, f1 653; PWR9BE-NEXT: xxsldwi vs1, v5, v5, 3 654; PWR9BE-NEXT: xscvspdpn f1, vs1 655; PWR9BE-NEXT: xsmulsp f1, f0, f1 656; PWR9BE-NEXT: blr 657; 658; PWR10LE-LABEL: v16f32: 659; PWR10LE: # %bb.0: # %entry 660; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3 661; PWR10LE-NEXT: xxswapd vs1, v2 662; PWR10LE-NEXT: xscvspdpn f0, vs0 663; PWR10LE-NEXT: xscvspdpn f1, vs1 664; PWR10LE-NEXT: xsmulsp f0, f0, f1 665; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1 666; PWR10LE-NEXT: xscvspdpn f1, vs1 667; PWR10LE-NEXT: xsmulsp f0, f0, f1 668; PWR10LE-NEXT: xscvspdpn f1, v2 669; PWR10LE-NEXT: xsmulsp f0, f0, f1 670; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 3 671; PWR10LE-NEXT: xscvspdpn f1, vs1 672; PWR10LE-NEXT: xsmulsp f0, f0, f1 673; PWR10LE-NEXT: xxswapd vs1, v3 674; PWR10LE-NEXT: xscvspdpn f1, vs1 675; PWR10LE-NEXT: xsmulsp f0, f0, f1 676; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 1 677; PWR10LE-NEXT: xscvspdpn f1, vs1 678; PWR10LE-NEXT: xsmulsp f0, f0, f1 679; PWR10LE-NEXT: xscvspdpn f1, v3 680; PWR10LE-NEXT: xsmulsp f0, f0, f1 681; PWR10LE-NEXT: xxsldwi vs1, v4, v4, 3 682; PWR10LE-NEXT: xscvspdpn f1, vs1 683; PWR10LE-NEXT: xsmulsp f0, f0, f1 684; PWR10LE-NEXT: xxswapd vs1, v4 685; PWR10LE-NEXT: xscvspdpn f1, vs1 686; PWR10LE-NEXT: xsmulsp f0, f0, f1 687; PWR10LE-NEXT: xxsldwi vs1, v4, v4, 1 688; PWR10LE-NEXT: xscvspdpn f1, vs1 689; PWR10LE-NEXT: xsmulsp f0, f0, f1 690; PWR10LE-NEXT: xscvspdpn f1, v4 691; PWR10LE-NEXT: xsmulsp f0, f0, f1 692; PWR10LE-NEXT: xxsldwi vs1, v5, v5, 3 693; PWR10LE-NEXT: xscvspdpn f1, vs1 694; PWR10LE-NEXT: xsmulsp f0, f0, f1 695; PWR10LE-NEXT: xxswapd vs1, v5 696; PWR10LE-NEXT: xscvspdpn f1, vs1 697; PWR10LE-NEXT: xsmulsp f0, f0, f1 698; PWR10LE-NEXT: xxsldwi vs1, v5, v5, 1 699; PWR10LE-NEXT: xscvspdpn f1, vs1 700; PWR10LE-NEXT: xsmulsp f0, f0, f1 701; PWR10LE-NEXT: xscvspdpn f1, v5 702; PWR10LE-NEXT: xsmulsp f1, f0, f1 703; PWR10LE-NEXT: blr 704; 705; PWR10BE-LABEL: v16f32: 706; PWR10BE: # %bb.0: # %entry 707; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1 708; PWR10BE-NEXT: xscvspdpn f0, v2 709; PWR10BE-NEXT: xscvspdpn f1, vs1 710; PWR10BE-NEXT: xsmulsp f0, f0, f1 711; PWR10BE-NEXT: xxswapd vs1, v2 712; PWR10BE-NEXT: xscvspdpn f1, vs1 713; PWR10BE-NEXT: xsmulsp f0, f0, f1 714; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3 715; PWR10BE-NEXT: xscvspdpn f1, vs1 716; PWR10BE-NEXT: xsmulsp f0, f0, f1 717; PWR10BE-NEXT: xscvspdpn f1, v3 718; PWR10BE-NEXT: xsmulsp f0, f0, f1 719; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 1 720; PWR10BE-NEXT: xscvspdpn f1, vs1 721; PWR10BE-NEXT: xsmulsp f0, f0, f1 722; PWR10BE-NEXT: xxswapd vs1, v3 723; PWR10BE-NEXT: xscvspdpn f1, vs1 724; PWR10BE-NEXT: xsmulsp f0, f0, f1 725; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 3 726; PWR10BE-NEXT: xscvspdpn f1, vs1 727; PWR10BE-NEXT: xsmulsp f0, f0, f1 728; PWR10BE-NEXT: xscvspdpn f1, v4 729; PWR10BE-NEXT: xsmulsp f0, f0, f1 730; PWR10BE-NEXT: xxsldwi vs1, v4, v4, 1 731; PWR10BE-NEXT: xscvspdpn f1, vs1 732; PWR10BE-NEXT: xsmulsp f0, f0, f1 733; PWR10BE-NEXT: xxswapd vs1, v4 734; PWR10BE-NEXT: xscvspdpn f1, vs1 735; PWR10BE-NEXT: xsmulsp f0, f0, f1 736; PWR10BE-NEXT: xxsldwi vs1, v4, v4, 3 737; PWR10BE-NEXT: xscvspdpn f1, vs1 738; PWR10BE-NEXT: xsmulsp f0, f0, f1 739; PWR10BE-NEXT: xscvspdpn f1, v5 740; PWR10BE-NEXT: xsmulsp f0, f0, f1 741; PWR10BE-NEXT: xxsldwi vs1, v5, v5, 1 742; PWR10BE-NEXT: xscvspdpn f1, vs1 743; PWR10BE-NEXT: xsmulsp f0, f0, f1 744; PWR10BE-NEXT: xxswapd vs1, v5 745; PWR10BE-NEXT: xscvspdpn f1, vs1 746; PWR10BE-NEXT: xsmulsp f0, f0, f1 747; PWR10BE-NEXT: xxsldwi vs1, v5, v5, 3 748; PWR10BE-NEXT: xscvspdpn f1, vs1 749; PWR10BE-NEXT: xsmulsp f1, f0, f1 750; PWR10BE-NEXT: blr 751entry: 752 %0 = call float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> %a) 753 ret float %0 754} 755 756define dso_local float @v16f32_b(<16 x float> %a, float %b) local_unnamed_addr #0 { 757; PWR9LE-LABEL: v16f32_b: 758; PWR9LE: # %bb.0: # %entry 759; PWR9LE-NEXT: xxsldwi vs0, v2, v2, 3 760; PWR9LE-NEXT: xscvspdpn f0, vs0 761; PWR9LE-NEXT: xsmulsp f0, f1, f0 762; PWR9LE-NEXT: xxswapd vs1, v2 763; PWR9LE-NEXT: xscvspdpn f1, vs1 764; PWR9LE-NEXT: xsmulsp f0, f0, f1 765; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1 766; PWR9LE-NEXT: xscvspdpn f1, vs1 767; PWR9LE-NEXT: xsmulsp f0, f0, f1 768; PWR9LE-NEXT: xscvspdpn f1, v2 769; PWR9LE-NEXT: xsmulsp f0, f0, f1 770; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 3 771; PWR9LE-NEXT: xscvspdpn f1, vs1 772; PWR9LE-NEXT: xsmulsp f0, f0, f1 773; PWR9LE-NEXT: xxswapd vs1, v3 774; PWR9LE-NEXT: xscvspdpn f1, vs1 775; PWR9LE-NEXT: xsmulsp f0, f0, f1 776; PWR9LE-NEXT: xxsldwi vs1, v3, v3, 1 777; PWR9LE-NEXT: xscvspdpn f1, vs1 778; PWR9LE-NEXT: xsmulsp f0, f0, f1 779; PWR9LE-NEXT: xscvspdpn f1, v3 780; PWR9LE-NEXT: xsmulsp f0, f0, f1 781; PWR9LE-NEXT: xxsldwi vs1, v4, v4, 3 782; PWR9LE-NEXT: xscvspdpn f1, vs1 783; PWR9LE-NEXT: xsmulsp f0, f0, f1 784; PWR9LE-NEXT: xxswapd vs1, v4 785; PWR9LE-NEXT: xscvspdpn f1, vs1 786; PWR9LE-NEXT: xsmulsp f0, f0, f1 787; PWR9LE-NEXT: xxsldwi vs1, v4, v4, 1 788; PWR9LE-NEXT: xscvspdpn f1, vs1 789; PWR9LE-NEXT: xsmulsp f0, f0, f1 790; PWR9LE-NEXT: xscvspdpn f1, v4 791; PWR9LE-NEXT: xsmulsp f0, f0, f1 792; PWR9LE-NEXT: xxsldwi vs1, v5, v5, 3 793; PWR9LE-NEXT: xscvspdpn f1, vs1 794; PWR9LE-NEXT: xsmulsp f0, f0, f1 795; PWR9LE-NEXT: xxswapd vs1, v5 796; PWR9LE-NEXT: xscvspdpn f1, vs1 797; PWR9LE-NEXT: xsmulsp f0, f0, f1 798; PWR9LE-NEXT: xxsldwi vs1, v5, v5, 1 799; PWR9LE-NEXT: xscvspdpn f1, vs1 800; PWR9LE-NEXT: xsmulsp f0, f0, f1 801; PWR9LE-NEXT: xscvspdpn f1, v5 802; PWR9LE-NEXT: xsmulsp f1, f0, f1 803; PWR9LE-NEXT: blr 804; 805; PWR9BE-LABEL: v16f32_b: 806; PWR9BE: # %bb.0: # %entry 807; PWR9BE-NEXT: xscvspdpn f0, v2 808; PWR9BE-NEXT: xsmulsp f0, f1, f0 809; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1 810; PWR9BE-NEXT: xscvspdpn f1, vs1 811; PWR9BE-NEXT: xsmulsp f0, f0, f1 812; PWR9BE-NEXT: xxswapd vs1, v2 813; PWR9BE-NEXT: xscvspdpn f1, vs1 814; PWR9BE-NEXT: xsmulsp f0, f0, f1 815; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 3 816; PWR9BE-NEXT: xscvspdpn f1, vs1 817; PWR9BE-NEXT: xsmulsp f0, f0, f1 818; PWR9BE-NEXT: xscvspdpn f1, v3 819; PWR9BE-NEXT: xsmulsp f0, f0, f1 820; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 1 821; PWR9BE-NEXT: xscvspdpn f1, vs1 822; PWR9BE-NEXT: xsmulsp f0, f0, f1 823; PWR9BE-NEXT: xxswapd vs1, v3 824; PWR9BE-NEXT: xscvspdpn f1, vs1 825; PWR9BE-NEXT: xsmulsp f0, f0, f1 826; PWR9BE-NEXT: xxsldwi vs1, v3, v3, 3 827; PWR9BE-NEXT: xscvspdpn f1, vs1 828; PWR9BE-NEXT: xsmulsp f0, f0, f1 829; PWR9BE-NEXT: xscvspdpn f1, v4 830; PWR9BE-NEXT: xsmulsp f0, f0, f1 831; PWR9BE-NEXT: xxsldwi vs1, v4, v4, 1 832; PWR9BE-NEXT: xscvspdpn f1, vs1 833; PWR9BE-NEXT: xsmulsp f0, f0, f1 834; PWR9BE-NEXT: xxswapd vs1, v4 835; PWR9BE-NEXT: xscvspdpn f1, vs1 836; PWR9BE-NEXT: xsmulsp f0, f0, f1 837; PWR9BE-NEXT: xxsldwi vs1, v4, v4, 3 838; PWR9BE-NEXT: xscvspdpn f1, vs1 839; PWR9BE-NEXT: xsmulsp f0, f0, f1 840; PWR9BE-NEXT: xscvspdpn f1, v5 841; PWR9BE-NEXT: xsmulsp f0, f0, f1 842; PWR9BE-NEXT: xxsldwi vs1, v5, v5, 1 843; PWR9BE-NEXT: xscvspdpn f1, vs1 844; PWR9BE-NEXT: xsmulsp f0, f0, f1 845; PWR9BE-NEXT: xxswapd vs1, v5 846; PWR9BE-NEXT: xscvspdpn f1, vs1 847; PWR9BE-NEXT: xsmulsp f0, f0, f1 848; PWR9BE-NEXT: xxsldwi vs1, v5, v5, 3 849; PWR9BE-NEXT: xscvspdpn f1, vs1 850; PWR9BE-NEXT: xsmulsp f1, f0, f1 851; PWR9BE-NEXT: blr 852; 853; PWR10LE-LABEL: v16f32_b: 854; PWR10LE: # %bb.0: # %entry 855; PWR10LE-NEXT: xxsldwi vs0, v2, v2, 3 856; PWR10LE-NEXT: xscvspdpn f0, vs0 857; PWR10LE-NEXT: xsmulsp f0, f1, f0 858; PWR10LE-NEXT: xxswapd vs1, v2 859; PWR10LE-NEXT: xscvspdpn f1, vs1 860; PWR10LE-NEXT: xsmulsp f0, f0, f1 861; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1 862; PWR10LE-NEXT: xscvspdpn f1, vs1 863; PWR10LE-NEXT: xsmulsp f0, f0, f1 864; PWR10LE-NEXT: xscvspdpn f1, v2 865; PWR10LE-NEXT: xsmulsp f0, f0, f1 866; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 3 867; PWR10LE-NEXT: xscvspdpn f1, vs1 868; PWR10LE-NEXT: xsmulsp f0, f0, f1 869; PWR10LE-NEXT: xxswapd vs1, v3 870; PWR10LE-NEXT: xscvspdpn f1, vs1 871; PWR10LE-NEXT: xsmulsp f0, f0, f1 872; PWR10LE-NEXT: xxsldwi vs1, v3, v3, 1 873; PWR10LE-NEXT: xscvspdpn f1, vs1 874; PWR10LE-NEXT: xsmulsp f0, f0, f1 875; PWR10LE-NEXT: xscvspdpn f1, v3 876; PWR10LE-NEXT: xsmulsp f0, f0, f1 877; PWR10LE-NEXT: xxsldwi vs1, v4, v4, 3 878; PWR10LE-NEXT: xscvspdpn f1, vs1 879; PWR10LE-NEXT: xsmulsp f0, f0, f1 880; PWR10LE-NEXT: xxswapd vs1, v4 881; PWR10LE-NEXT: xscvspdpn f1, vs1 882; PWR10LE-NEXT: xsmulsp f0, f0, f1 883; PWR10LE-NEXT: xxsldwi vs1, v4, v4, 1 884; PWR10LE-NEXT: xscvspdpn f1, vs1 885; PWR10LE-NEXT: xsmulsp f0, f0, f1 886; PWR10LE-NEXT: xscvspdpn f1, v4 887; PWR10LE-NEXT: xsmulsp f0, f0, f1 888; PWR10LE-NEXT: xxsldwi vs1, v5, v5, 3 889; PWR10LE-NEXT: xscvspdpn f1, vs1 890; PWR10LE-NEXT: xsmulsp f0, f0, f1 891; PWR10LE-NEXT: xxswapd vs1, v5 892; PWR10LE-NEXT: xscvspdpn f1, vs1 893; PWR10LE-NEXT: xsmulsp f0, f0, f1 894; PWR10LE-NEXT: xxsldwi vs1, v5, v5, 1 895; PWR10LE-NEXT: xscvspdpn f1, vs1 896; PWR10LE-NEXT: xsmulsp f0, f0, f1 897; PWR10LE-NEXT: xscvspdpn f1, v5 898; PWR10LE-NEXT: xsmulsp f1, f0, f1 899; PWR10LE-NEXT: blr 900; 901; PWR10BE-LABEL: v16f32_b: 902; PWR10BE: # %bb.0: # %entry 903; PWR10BE-NEXT: xscvspdpn f0, v2 904; PWR10BE-NEXT: xsmulsp f0, f1, f0 905; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1 906; PWR10BE-NEXT: xscvspdpn f1, vs1 907; PWR10BE-NEXT: xsmulsp f0, f0, f1 908; PWR10BE-NEXT: xxswapd vs1, v2 909; PWR10BE-NEXT: xscvspdpn f1, vs1 910; PWR10BE-NEXT: xsmulsp f0, f0, f1 911; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 3 912; PWR10BE-NEXT: xscvspdpn f1, vs1 913; PWR10BE-NEXT: xsmulsp f0, f0, f1 914; PWR10BE-NEXT: xscvspdpn f1, v3 915; PWR10BE-NEXT: xsmulsp f0, f0, f1 916; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 1 917; PWR10BE-NEXT: xscvspdpn f1, vs1 918; PWR10BE-NEXT: xsmulsp f0, f0, f1 919; PWR10BE-NEXT: xxswapd vs1, v3 920; PWR10BE-NEXT: xscvspdpn f1, vs1 921; PWR10BE-NEXT: xsmulsp f0, f0, f1 922; PWR10BE-NEXT: xxsldwi vs1, v3, v3, 3 923; PWR10BE-NEXT: xscvspdpn f1, vs1 924; PWR10BE-NEXT: xsmulsp f0, f0, f1 925; PWR10BE-NEXT: xscvspdpn f1, v4 926; PWR10BE-NEXT: xsmulsp f0, f0, f1 927; PWR10BE-NEXT: xxsldwi vs1, v4, v4, 1 928; PWR10BE-NEXT: xscvspdpn f1, vs1 929; PWR10BE-NEXT: xsmulsp f0, f0, f1 930; PWR10BE-NEXT: xxswapd vs1, v4 931; PWR10BE-NEXT: xscvspdpn f1, vs1 932; PWR10BE-NEXT: xsmulsp f0, f0, f1 933; PWR10BE-NEXT: xxsldwi vs1, v4, v4, 3 934; PWR10BE-NEXT: xscvspdpn f1, vs1 935; PWR10BE-NEXT: xsmulsp f0, f0, f1 936; PWR10BE-NEXT: xscvspdpn f1, v5 937; PWR10BE-NEXT: xsmulsp f0, f0, f1 938; PWR10BE-NEXT: xxsldwi vs1, v5, v5, 1 939; PWR10BE-NEXT: xscvspdpn f1, vs1 940; PWR10BE-NEXT: xsmulsp f0, f0, f1 941; PWR10BE-NEXT: xxswapd vs1, v5 942; PWR10BE-NEXT: xscvspdpn f1, vs1 943; PWR10BE-NEXT: xsmulsp f0, f0, f1 944; PWR10BE-NEXT: xxsldwi vs1, v5, v5, 3 945; PWR10BE-NEXT: xscvspdpn f1, vs1 946; PWR10BE-NEXT: xsmulsp f1, f0, f1 947; PWR10BE-NEXT: blr 948entry: 949 %0 = call float @llvm.vector.reduce.fmul.v16f32(float %b, <16 x float> %a) 950 ret float %0 951} 952 953define dso_local float @v16f32_fast(<16 x float> %a) local_unnamed_addr #0 { 954; PWR9LE-LABEL: v16f32_fast: 955; PWR9LE: # %bb.0: # %entry 956; PWR9LE-NEXT: xvmulsp vs0, v3, v5 957; PWR9LE-NEXT: xvmulsp vs1, v2, v4 958; PWR9LE-NEXT: xvmulsp vs0, vs1, vs0 959; PWR9LE-NEXT: xxswapd v2, vs0 960; PWR9LE-NEXT: xvmulsp vs0, vs0, v2 961; PWR9LE-NEXT: xxspltw vs1, vs0, 2 962; PWR9LE-NEXT: xvmulsp vs0, vs0, vs1 963; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 964; PWR9LE-NEXT: xscvspdpn f1, vs0 965; PWR9LE-NEXT: blr 966; 967; PWR9BE-LABEL: v16f32_fast: 968; PWR9BE: # %bb.0: # %entry 969; PWR9BE-NEXT: xvmulsp vs0, v3, v5 970; PWR9BE-NEXT: xvmulsp vs1, v2, v4 971; PWR9BE-NEXT: xvmulsp vs0, vs1, vs0 972; PWR9BE-NEXT: xxswapd v2, vs0 973; PWR9BE-NEXT: xvmulsp vs0, vs0, v2 974; PWR9BE-NEXT: xxspltw vs1, vs0, 1 975; PWR9BE-NEXT: xvmulsp vs0, vs0, vs1 976; PWR9BE-NEXT: xscvspdpn f1, vs0 977; PWR9BE-NEXT: blr 978; 979; PWR10LE-LABEL: v16f32_fast: 980; PWR10LE: # %bb.0: # %entry 981; PWR10LE-NEXT: xvmulsp vs0, v3, v5 982; PWR10LE-NEXT: xvmulsp vs1, v2, v4 983; PWR10LE-NEXT: xvmulsp vs0, vs1, vs0 984; PWR10LE-NEXT: xxswapd v2, vs0 985; PWR10LE-NEXT: xvmulsp vs0, vs0, v2 986; PWR10LE-NEXT: xxspltw vs1, vs0, 2 987; PWR10LE-NEXT: xvmulsp vs0, vs0, vs1 988; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3 989; PWR10LE-NEXT: xscvspdpn f1, vs0 990; PWR10LE-NEXT: blr 991; 992; PWR10BE-LABEL: v16f32_fast: 993; PWR10BE: # %bb.0: # %entry 994; PWR10BE-NEXT: xvmulsp vs0, v3, v5 995; PWR10BE-NEXT: xvmulsp vs1, v2, v4 996; PWR10BE-NEXT: xvmulsp vs0, vs1, vs0 997; PWR10BE-NEXT: xxswapd v2, vs0 998; PWR10BE-NEXT: xvmulsp vs0, vs0, v2 999; PWR10BE-NEXT: xxspltw vs1, vs0, 1 1000; PWR10BE-NEXT: xvmulsp vs0, vs0, vs1 1001; PWR10BE-NEXT: xscvspdpn f1, vs0 1002; PWR10BE-NEXT: blr 1003entry: 1004 %0 = call fast float @llvm.vector.reduce.fmul.v16f32(float 1.000000e+00, <16 x float> %a) 1005 ret float %0 1006} 1007 1008declare float @llvm.vector.reduce.fmul.v2f32(float, <2 x float>) #0 1009declare float @llvm.vector.reduce.fmul.v4f32(float, <4 x float>) #0 1010declare float @llvm.vector.reduce.fmul.v8f32(float, <8 x float>) #0 1011declare float @llvm.vector.reduce.fmul.v16f32(float, <16 x float>) #0 1012 1013;; 1014;; Vectors of f64 1015;; 1016define dso_local double @v2f64(<2 x double> %a) local_unnamed_addr #0 { 1017; PWR9LE-LABEL: v2f64: 1018; PWR9LE: # %bb.0: # %entry 1019; PWR9LE-NEXT: xxswapd vs0, v2 1020; PWR9LE-NEXT: xsmuldp f1, f0, v2 1021; PWR9LE-NEXT: blr 1022; 1023; PWR9BE-LABEL: v2f64: 1024; PWR9BE: # %bb.0: # %entry 1025; PWR9BE-NEXT: xxswapd vs0, v2 1026; PWR9BE-NEXT: xsmuldp f1, v2, f0 1027; PWR9BE-NEXT: blr 1028; 1029; PWR10LE-LABEL: v2f64: 1030; PWR10LE: # %bb.0: # %entry 1031; PWR10LE-NEXT: xxswapd vs0, v2 1032; PWR10LE-NEXT: xsmuldp f1, f0, v2 1033; PWR10LE-NEXT: blr 1034; 1035; PWR10BE-LABEL: v2f64: 1036; PWR10BE: # %bb.0: # %entry 1037; PWR10BE-NEXT: xxswapd vs0, v2 1038; PWR10BE-NEXT: xsmuldp f1, v2, f0 1039; PWR10BE-NEXT: blr 1040entry: 1041 %0 = call double @llvm.vector.reduce.fmul.v2f64(double 1.000000e+00, <2 x double> %a) 1042 ret double %0 1043} 1044 1045define dso_local double @v2f64_b(<2 x double> %a, double %b) local_unnamed_addr #0 { 1046; PWR9LE-LABEL: v2f64_b: 1047; PWR9LE: # %bb.0: # %entry 1048; PWR9LE-NEXT: xxswapd vs0, v2 1049; PWR9LE-NEXT: xsmuldp f0, f1, f0 1050; PWR9LE-NEXT: xsmuldp f1, f0, v2 1051; PWR9LE-NEXT: blr 1052; 1053; PWR9BE-LABEL: v2f64_b: 1054; PWR9BE: # %bb.0: # %entry 1055; PWR9BE-NEXT: xsmuldp f0, f1, v2 1056; PWR9BE-NEXT: xxswapd vs1, v2 1057; PWR9BE-NEXT: xsmuldp f1, f0, f1 1058; PWR9BE-NEXT: blr 1059; 1060; PWR10LE-LABEL: v2f64_b: 1061; PWR10LE: # %bb.0: # %entry 1062; PWR10LE-NEXT: xxswapd vs0, v2 1063; PWR10LE-NEXT: xsmuldp f0, f1, f0 1064; PWR10LE-NEXT: xsmuldp f1, f0, v2 1065; PWR10LE-NEXT: blr 1066; 1067; PWR10BE-LABEL: v2f64_b: 1068; PWR10BE: # %bb.0: # %entry 1069; PWR10BE-NEXT: xsmuldp f0, f1, v2 1070; PWR10BE-NEXT: xxswapd vs1, v2 1071; PWR10BE-NEXT: xsmuldp f1, f0, f1 1072; PWR10BE-NEXT: blr 1073entry: 1074 %0 = call double @llvm.vector.reduce.fmul.v2f64(double %b, <2 x double> %a) 1075 ret double %0 1076} 1077 1078define dso_local double @v2f64_fast(<2 x double> %a) local_unnamed_addr #0 { 1079; PWR9LE-LABEL: v2f64_fast: 1080; PWR9LE: # %bb.0: # %entry 1081; PWR9LE-NEXT: xxswapd vs0, v2 1082; PWR9LE-NEXT: xvmuldp vs0, v2, vs0 1083; PWR9LE-NEXT: xxswapd vs1, vs0 1084; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 1085; PWR9LE-NEXT: blr 1086; 1087; PWR9BE-LABEL: v2f64_fast: 1088; PWR9BE: # %bb.0: # %entry 1089; PWR9BE-NEXT: xxswapd vs0, v2 1090; PWR9BE-NEXT: xvmuldp vs1, v2, vs0 1091; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 1092; PWR9BE-NEXT: blr 1093; 1094; PWR10LE-LABEL: v2f64_fast: 1095; PWR10LE: # %bb.0: # %entry 1096; PWR10LE-NEXT: xxswapd vs0, v2 1097; PWR10LE-NEXT: xvmuldp vs0, v2, vs0 1098; PWR10LE-NEXT: xxswapd vs1, vs0 1099; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 1100; PWR10LE-NEXT: blr 1101; 1102; PWR10BE-LABEL: v2f64_fast: 1103; PWR10BE: # %bb.0: # %entry 1104; PWR10BE-NEXT: xxswapd vs0, v2 1105; PWR10BE-NEXT: xvmuldp vs1, v2, vs0 1106; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 1107; PWR10BE-NEXT: blr 1108entry: 1109 %0 = call fast double @llvm.vector.reduce.fmul.v2f64(double 1.000000e+00, <2 x double> %a) 1110 ret double %0 1111} 1112 1113define dso_local double @v4f64(<4 x double> %a) local_unnamed_addr #0 { 1114; PWR9LE-LABEL: v4f64: 1115; PWR9LE: # %bb.0: # %entry 1116; PWR9LE-NEXT: xxswapd vs0, v2 1117; PWR9LE-NEXT: xxswapd vs1, v3 1118; PWR9LE-NEXT: xsmuldp f0, f0, v2 1119; PWR9LE-NEXT: xsmuldp f0, f0, f1 1120; PWR9LE-NEXT: xsmuldp f1, f0, v3 1121; PWR9LE-NEXT: blr 1122; 1123; PWR9BE-LABEL: v4f64: 1124; PWR9BE: # %bb.0: # %entry 1125; PWR9BE-NEXT: xxswapd vs0, v2 1126; PWR9BE-NEXT: xxswapd vs1, v3 1127; PWR9BE-NEXT: xsmuldp f0, v2, f0 1128; PWR9BE-NEXT: xsmuldp f0, f0, v3 1129; PWR9BE-NEXT: xsmuldp f1, f0, f1 1130; PWR9BE-NEXT: blr 1131; 1132; PWR10LE-LABEL: v4f64: 1133; PWR10LE: # %bb.0: # %entry 1134; PWR10LE-NEXT: xxswapd vs0, v2 1135; PWR10LE-NEXT: xxswapd vs1, v3 1136; PWR10LE-NEXT: xsmuldp f0, f0, v2 1137; PWR10LE-NEXT: xsmuldp f0, f0, f1 1138; PWR10LE-NEXT: xsmuldp f1, f0, v3 1139; PWR10LE-NEXT: blr 1140; 1141; PWR10BE-LABEL: v4f64: 1142; PWR10BE: # %bb.0: # %entry 1143; PWR10BE-NEXT: xxswapd vs0, v2 1144; PWR10BE-NEXT: xxswapd vs1, v3 1145; PWR10BE-NEXT: xsmuldp f0, v2, f0 1146; PWR10BE-NEXT: xsmuldp f0, f0, v3 1147; PWR10BE-NEXT: xsmuldp f1, f0, f1 1148; PWR10BE-NEXT: blr 1149entry: 1150 %0 = call double @llvm.vector.reduce.fmul.v4f64(double 1.000000e+00, <4 x double> %a) 1151 ret double %0 1152} 1153 1154define dso_local double @v4f64_b(<4 x double> %a, double %b) local_unnamed_addr #0 { 1155; PWR9LE-LABEL: v4f64_b: 1156; PWR9LE: # %bb.0: # %entry 1157; PWR9LE-NEXT: xxswapd vs0, v2 1158; PWR9LE-NEXT: xsmuldp f0, f1, f0 1159; PWR9LE-NEXT: xxswapd vs1, v3 1160; PWR9LE-NEXT: xsmuldp f0, f0, v2 1161; PWR9LE-NEXT: xsmuldp f0, f0, f1 1162; PWR9LE-NEXT: xsmuldp f1, f0, v3 1163; PWR9LE-NEXT: blr 1164; 1165; PWR9BE-LABEL: v4f64_b: 1166; PWR9BE: # %bb.0: # %entry 1167; PWR9BE-NEXT: xsmuldp f0, f1, v2 1168; PWR9BE-NEXT: xxswapd vs1, v2 1169; PWR9BE-NEXT: xsmuldp f0, f0, f1 1170; PWR9BE-NEXT: xxswapd vs1, v3 1171; PWR9BE-NEXT: xsmuldp f0, f0, v3 1172; PWR9BE-NEXT: xsmuldp f1, f0, f1 1173; PWR9BE-NEXT: blr 1174; 1175; PWR10LE-LABEL: v4f64_b: 1176; PWR10LE: # %bb.0: # %entry 1177; PWR10LE-NEXT: xxswapd vs0, v2 1178; PWR10LE-NEXT: xsmuldp f0, f1, f0 1179; PWR10LE-NEXT: xxswapd vs1, v3 1180; PWR10LE-NEXT: xsmuldp f0, f0, v2 1181; PWR10LE-NEXT: xsmuldp f0, f0, f1 1182; PWR10LE-NEXT: xsmuldp f1, f0, v3 1183; PWR10LE-NEXT: blr 1184; 1185; PWR10BE-LABEL: v4f64_b: 1186; PWR10BE: # %bb.0: # %entry 1187; PWR10BE-NEXT: xsmuldp f0, f1, v2 1188; PWR10BE-NEXT: xxswapd vs1, v2 1189; PWR10BE-NEXT: xsmuldp f0, f0, f1 1190; PWR10BE-NEXT: xxswapd vs1, v3 1191; PWR10BE-NEXT: xsmuldp f0, f0, v3 1192; PWR10BE-NEXT: xsmuldp f1, f0, f1 1193; PWR10BE-NEXT: blr 1194entry: 1195 %0 = call double @llvm.vector.reduce.fmul.v4f64(double %b, <4 x double> %a) 1196 ret double %0 1197} 1198 1199define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 { 1200; PWR9LE-LABEL: v4f64_fast: 1201; PWR9LE: # %bb.0: # %entry 1202; PWR9LE-NEXT: xvmuldp vs0, v2, v3 1203; PWR9LE-NEXT: xxswapd vs1, vs0 1204; PWR9LE-NEXT: xvmuldp vs0, vs0, vs1 1205; PWR9LE-NEXT: xxswapd vs1, vs0 1206; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 1207; PWR9LE-NEXT: blr 1208; 1209; PWR9BE-LABEL: v4f64_fast: 1210; PWR9BE: # %bb.0: # %entry 1211; PWR9BE-NEXT: xvmuldp vs0, v2, v3 1212; PWR9BE-NEXT: xxswapd vs1, vs0 1213; PWR9BE-NEXT: xvmuldp vs1, vs0, vs1 1214; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 1215; PWR9BE-NEXT: blr 1216; 1217; PWR10LE-LABEL: v4f64_fast: 1218; PWR10LE: # %bb.0: # %entry 1219; PWR10LE-NEXT: xvmuldp vs0, v2, v3 1220; PWR10LE-NEXT: xxswapd vs1, vs0 1221; PWR10LE-NEXT: xvmuldp vs0, vs0, vs1 1222; PWR10LE-NEXT: xxswapd vs1, vs0 1223; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 1224; PWR10LE-NEXT: blr 1225; 1226; PWR10BE-LABEL: v4f64_fast: 1227; PWR10BE: # %bb.0: # %entry 1228; PWR10BE-NEXT: xvmuldp vs0, v2, v3 1229; PWR10BE-NEXT: xxswapd vs1, vs0 1230; PWR10BE-NEXT: xvmuldp vs1, vs0, vs1 1231; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 1232; PWR10BE-NEXT: blr 1233entry: 1234 %0 = call fast double @llvm.vector.reduce.fmul.v4f64(double 1.000000e+00, <4 x double> %a) 1235 ret double %0 1236} 1237 1238define dso_local double @v8f64(<8 x double> %a) local_unnamed_addr #0 { 1239; PWR9LE-LABEL: v8f64: 1240; PWR9LE: # %bb.0: # %entry 1241; PWR9LE-NEXT: xxswapd vs0, v2 1242; PWR9LE-NEXT: xxswapd vs1, v3 1243; PWR9LE-NEXT: xsmuldp f0, f0, v2 1244; PWR9LE-NEXT: xsmuldp f0, f0, f1 1245; PWR9LE-NEXT: xxswapd vs1, v4 1246; PWR9LE-NEXT: xsmuldp f0, f0, v3 1247; PWR9LE-NEXT: xsmuldp f0, f0, f1 1248; PWR9LE-NEXT: xxswapd vs1, v5 1249; PWR9LE-NEXT: xsmuldp f0, f0, v4 1250; PWR9LE-NEXT: xsmuldp f0, f0, f1 1251; PWR9LE-NEXT: xsmuldp f1, f0, v5 1252; PWR9LE-NEXT: blr 1253; 1254; PWR9BE-LABEL: v8f64: 1255; PWR9BE: # %bb.0: # %entry 1256; PWR9BE-NEXT: xxswapd vs0, v2 1257; PWR9BE-NEXT: xxswapd vs1, v3 1258; PWR9BE-NEXT: xsmuldp f0, v2, f0 1259; PWR9BE-NEXT: xsmuldp f0, f0, v3 1260; PWR9BE-NEXT: xsmuldp f0, f0, f1 1261; PWR9BE-NEXT: xxswapd vs1, v4 1262; PWR9BE-NEXT: xsmuldp f0, f0, v4 1263; PWR9BE-NEXT: xsmuldp f0, f0, f1 1264; PWR9BE-NEXT: xxswapd vs1, v5 1265; PWR9BE-NEXT: xsmuldp f0, f0, v5 1266; PWR9BE-NEXT: xsmuldp f1, f0, f1 1267; PWR9BE-NEXT: blr 1268; 1269; PWR10LE-LABEL: v8f64: 1270; PWR10LE: # %bb.0: # %entry 1271; PWR10LE-NEXT: xxswapd vs0, v2 1272; PWR10LE-NEXT: xxswapd vs1, v3 1273; PWR10LE-NEXT: xsmuldp f0, f0, v2 1274; PWR10LE-NEXT: xsmuldp f0, f0, f1 1275; PWR10LE-NEXT: xxswapd vs1, v4 1276; PWR10LE-NEXT: xsmuldp f0, f0, v3 1277; PWR10LE-NEXT: xsmuldp f0, f0, f1 1278; PWR10LE-NEXT: xxswapd vs1, v5 1279; PWR10LE-NEXT: xsmuldp f0, f0, v4 1280; PWR10LE-NEXT: xsmuldp f0, f0, f1 1281; PWR10LE-NEXT: xsmuldp f1, f0, v5 1282; PWR10LE-NEXT: blr 1283; 1284; PWR10BE-LABEL: v8f64: 1285; PWR10BE: # %bb.0: # %entry 1286; PWR10BE-NEXT: xxswapd vs0, v2 1287; PWR10BE-NEXT: xxswapd vs1, v3 1288; PWR10BE-NEXT: xsmuldp f0, v2, f0 1289; PWR10BE-NEXT: xsmuldp f0, f0, v3 1290; PWR10BE-NEXT: xsmuldp f0, f0, f1 1291; PWR10BE-NEXT: xxswapd vs1, v4 1292; PWR10BE-NEXT: xsmuldp f0, f0, v4 1293; PWR10BE-NEXT: xsmuldp f0, f0, f1 1294; PWR10BE-NEXT: xxswapd vs1, v5 1295; PWR10BE-NEXT: xsmuldp f0, f0, v5 1296; PWR10BE-NEXT: xsmuldp f1, f0, f1 1297; PWR10BE-NEXT: blr 1298entry: 1299 %0 = call double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %a) 1300 ret double %0 1301} 1302 1303define dso_local double @v8f64_b(<8 x double> %a, double %b) local_unnamed_addr #0 { 1304; PWR9LE-LABEL: v8f64_b: 1305; PWR9LE: # %bb.0: # %entry 1306; PWR9LE-NEXT: xxswapd vs0, v2 1307; PWR9LE-NEXT: xsmuldp f0, f1, f0 1308; PWR9LE-NEXT: xxswapd vs1, v3 1309; PWR9LE-NEXT: xsmuldp f0, f0, v2 1310; PWR9LE-NEXT: xsmuldp f0, f0, f1 1311; PWR9LE-NEXT: xxswapd vs1, v4 1312; PWR9LE-NEXT: xsmuldp f0, f0, v3 1313; PWR9LE-NEXT: xsmuldp f0, f0, f1 1314; PWR9LE-NEXT: xxswapd vs1, v5 1315; PWR9LE-NEXT: xsmuldp f0, f0, v4 1316; PWR9LE-NEXT: xsmuldp f0, f0, f1 1317; PWR9LE-NEXT: xsmuldp f1, f0, v5 1318; PWR9LE-NEXT: blr 1319; 1320; PWR9BE-LABEL: v8f64_b: 1321; PWR9BE: # %bb.0: # %entry 1322; PWR9BE-NEXT: xsmuldp f0, f1, v2 1323; PWR9BE-NEXT: xxswapd vs1, v2 1324; PWR9BE-NEXT: xsmuldp f0, f0, f1 1325; PWR9BE-NEXT: xxswapd vs1, v3 1326; PWR9BE-NEXT: xsmuldp f0, f0, v3 1327; PWR9BE-NEXT: xsmuldp f0, f0, f1 1328; PWR9BE-NEXT: xxswapd vs1, v4 1329; PWR9BE-NEXT: xsmuldp f0, f0, v4 1330; PWR9BE-NEXT: xsmuldp f0, f0, f1 1331; PWR9BE-NEXT: xxswapd vs1, v5 1332; PWR9BE-NEXT: xsmuldp f0, f0, v5 1333; PWR9BE-NEXT: xsmuldp f1, f0, f1 1334; PWR9BE-NEXT: blr 1335; 1336; PWR10LE-LABEL: v8f64_b: 1337; PWR10LE: # %bb.0: # %entry 1338; PWR10LE-NEXT: xxswapd vs0, v2 1339; PWR10LE-NEXT: xsmuldp f0, f1, f0 1340; PWR10LE-NEXT: xxswapd vs1, v3 1341; PWR10LE-NEXT: xsmuldp f0, f0, v2 1342; PWR10LE-NEXT: xsmuldp f0, f0, f1 1343; PWR10LE-NEXT: xxswapd vs1, v4 1344; PWR10LE-NEXT: xsmuldp f0, f0, v3 1345; PWR10LE-NEXT: xsmuldp f0, f0, f1 1346; PWR10LE-NEXT: xxswapd vs1, v5 1347; PWR10LE-NEXT: xsmuldp f0, f0, v4 1348; PWR10LE-NEXT: xsmuldp f0, f0, f1 1349; PWR10LE-NEXT: xsmuldp f1, f0, v5 1350; PWR10LE-NEXT: blr 1351; 1352; PWR10BE-LABEL: v8f64_b: 1353; PWR10BE: # %bb.0: # %entry 1354; PWR10BE-NEXT: xsmuldp f0, f1, v2 1355; PWR10BE-NEXT: xxswapd vs1, v2 1356; PWR10BE-NEXT: xsmuldp f0, f0, f1 1357; PWR10BE-NEXT: xxswapd vs1, v3 1358; PWR10BE-NEXT: xsmuldp f0, f0, v3 1359; PWR10BE-NEXT: xsmuldp f0, f0, f1 1360; PWR10BE-NEXT: xxswapd vs1, v4 1361; PWR10BE-NEXT: xsmuldp f0, f0, v4 1362; PWR10BE-NEXT: xsmuldp f0, f0, f1 1363; PWR10BE-NEXT: xxswapd vs1, v5 1364; PWR10BE-NEXT: xsmuldp f0, f0, v5 1365; PWR10BE-NEXT: xsmuldp f1, f0, f1 1366; PWR10BE-NEXT: blr 1367entry: 1368 %0 = call double @llvm.vector.reduce.fmul.v8f64(double %b, <8 x double> %a) 1369 ret double %0 1370} 1371 1372define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 { 1373; PWR9LE-LABEL: v8f64_fast: 1374; PWR9LE: # %bb.0: # %entry 1375; PWR9LE-NEXT: xvmuldp vs0, v3, v5 1376; PWR9LE-NEXT: xvmuldp vs1, v2, v4 1377; PWR9LE-NEXT: xvmuldp vs0, vs1, vs0 1378; PWR9LE-NEXT: xxswapd vs1, vs0 1379; PWR9LE-NEXT: xvmuldp vs0, vs0, vs1 1380; PWR9LE-NEXT: xxswapd vs1, vs0 1381; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 1382; PWR9LE-NEXT: blr 1383; 1384; PWR9BE-LABEL: v8f64_fast: 1385; PWR9BE: # %bb.0: # %entry 1386; PWR9BE-NEXT: xvmuldp vs0, v3, v5 1387; PWR9BE-NEXT: xvmuldp vs1, v2, v4 1388; PWR9BE-NEXT: xvmuldp vs0, vs1, vs0 1389; PWR9BE-NEXT: xxswapd vs1, vs0 1390; PWR9BE-NEXT: xvmuldp vs1, vs0, vs1 1391; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 1392; PWR9BE-NEXT: blr 1393; 1394; PWR10LE-LABEL: v8f64_fast: 1395; PWR10LE: # %bb.0: # %entry 1396; PWR10LE-NEXT: xvmuldp vs0, v3, v5 1397; PWR10LE-NEXT: xvmuldp vs1, v2, v4 1398; PWR10LE-NEXT: xvmuldp vs0, vs1, vs0 1399; PWR10LE-NEXT: xxswapd vs1, vs0 1400; PWR10LE-NEXT: xvmuldp vs0, vs0, vs1 1401; PWR10LE-NEXT: xxswapd vs1, vs0 1402; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 1403; PWR10LE-NEXT: blr 1404; 1405; PWR10BE-LABEL: v8f64_fast: 1406; PWR10BE: # %bb.0: # %entry 1407; PWR10BE-NEXT: xvmuldp vs0, v3, v5 1408; PWR10BE-NEXT: xvmuldp vs1, v2, v4 1409; PWR10BE-NEXT: xvmuldp vs0, vs1, vs0 1410; PWR10BE-NEXT: xxswapd vs1, vs0 1411; PWR10BE-NEXT: xvmuldp vs1, vs0, vs1 1412; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 1413; PWR10BE-NEXT: blr 1414entry: 1415 %0 = call fast double @llvm.vector.reduce.fmul.v8f64(double 1.000000e+00, <8 x double> %a) 1416 ret double %0 1417} 1418 1419define dso_local double @v16f64(<16 x double> %a) local_unnamed_addr #0 { 1420; PWR9LE-LABEL: v16f64: 1421; PWR9LE: # %bb.0: # %entry 1422; PWR9LE-NEXT: xxswapd vs0, v2 1423; PWR9LE-NEXT: xxswapd vs1, v3 1424; PWR9LE-NEXT: xsmuldp f0, f0, v2 1425; PWR9LE-NEXT: xsmuldp f0, f0, f1 1426; PWR9LE-NEXT: xxswapd vs1, v4 1427; PWR9LE-NEXT: xsmuldp f0, f0, v3 1428; PWR9LE-NEXT: xsmuldp f0, f0, f1 1429; PWR9LE-NEXT: xxswapd vs1, v5 1430; PWR9LE-NEXT: xsmuldp f0, f0, v4 1431; PWR9LE-NEXT: xsmuldp f0, f0, f1 1432; PWR9LE-NEXT: xxswapd vs1, v6 1433; PWR9LE-NEXT: xsmuldp f0, f0, v5 1434; PWR9LE-NEXT: xsmuldp f0, f0, f1 1435; PWR9LE-NEXT: xxswapd vs1, v7 1436; PWR9LE-NEXT: xsmuldp f0, f0, v6 1437; PWR9LE-NEXT: xsmuldp f0, f0, f1 1438; PWR9LE-NEXT: xxswapd vs1, v8 1439; PWR9LE-NEXT: xsmuldp f0, f0, v7 1440; PWR9LE-NEXT: xsmuldp f0, f0, f1 1441; PWR9LE-NEXT: xxswapd vs1, v9 1442; PWR9LE-NEXT: xsmuldp f0, f0, v8 1443; PWR9LE-NEXT: xsmuldp f0, f0, f1 1444; PWR9LE-NEXT: xsmuldp f1, f0, v9 1445; PWR9LE-NEXT: blr 1446; 1447; PWR9BE-LABEL: v16f64: 1448; PWR9BE: # %bb.0: # %entry 1449; PWR9BE-NEXT: xxswapd vs0, v2 1450; PWR9BE-NEXT: xxswapd vs1, v3 1451; PWR9BE-NEXT: xsmuldp f0, v2, f0 1452; PWR9BE-NEXT: xsmuldp f0, f0, v3 1453; PWR9BE-NEXT: xsmuldp f0, f0, f1 1454; PWR9BE-NEXT: xxswapd vs1, v4 1455; PWR9BE-NEXT: xsmuldp f0, f0, v4 1456; PWR9BE-NEXT: xsmuldp f0, f0, f1 1457; PWR9BE-NEXT: xxswapd vs1, v5 1458; PWR9BE-NEXT: xsmuldp f0, f0, v5 1459; PWR9BE-NEXT: xsmuldp f0, f0, f1 1460; PWR9BE-NEXT: xxswapd vs1, v6 1461; PWR9BE-NEXT: xsmuldp f0, f0, v6 1462; PWR9BE-NEXT: xsmuldp f0, f0, f1 1463; PWR9BE-NEXT: xxswapd vs1, v7 1464; PWR9BE-NEXT: xsmuldp f0, f0, v7 1465; PWR9BE-NEXT: xsmuldp f0, f0, f1 1466; PWR9BE-NEXT: xxswapd vs1, v8 1467; PWR9BE-NEXT: xsmuldp f0, f0, v8 1468; PWR9BE-NEXT: xsmuldp f0, f0, f1 1469; PWR9BE-NEXT: xxswapd vs1, v9 1470; PWR9BE-NEXT: xsmuldp f0, f0, v9 1471; PWR9BE-NEXT: xsmuldp f1, f0, f1 1472; PWR9BE-NEXT: blr 1473; 1474; PWR10LE-LABEL: v16f64: 1475; PWR10LE: # %bb.0: # %entry 1476; PWR10LE-NEXT: xxswapd vs0, v2 1477; PWR10LE-NEXT: xxswapd vs1, v3 1478; PWR10LE-NEXT: xsmuldp f0, f0, v2 1479; PWR10LE-NEXT: xsmuldp f0, f0, f1 1480; PWR10LE-NEXT: xxswapd vs1, v4 1481; PWR10LE-NEXT: xsmuldp f0, f0, v3 1482; PWR10LE-NEXT: xsmuldp f0, f0, f1 1483; PWR10LE-NEXT: xxswapd vs1, v5 1484; PWR10LE-NEXT: xsmuldp f0, f0, v4 1485; PWR10LE-NEXT: xsmuldp f0, f0, f1 1486; PWR10LE-NEXT: xxswapd vs1, v6 1487; PWR10LE-NEXT: xsmuldp f0, f0, v5 1488; PWR10LE-NEXT: xsmuldp f0, f0, f1 1489; PWR10LE-NEXT: xxswapd vs1, v7 1490; PWR10LE-NEXT: xsmuldp f0, f0, v6 1491; PWR10LE-NEXT: xsmuldp f0, f0, f1 1492; PWR10LE-NEXT: xxswapd vs1, v8 1493; PWR10LE-NEXT: xsmuldp f0, f0, v7 1494; PWR10LE-NEXT: xsmuldp f0, f0, f1 1495; PWR10LE-NEXT: xxswapd vs1, v9 1496; PWR10LE-NEXT: xsmuldp f0, f0, v8 1497; PWR10LE-NEXT: xsmuldp f0, f0, f1 1498; PWR10LE-NEXT: xsmuldp f1, f0, v9 1499; PWR10LE-NEXT: blr 1500; 1501; PWR10BE-LABEL: v16f64: 1502; PWR10BE: # %bb.0: # %entry 1503; PWR10BE-NEXT: xxswapd vs0, v2 1504; PWR10BE-NEXT: xxswapd vs1, v3 1505; PWR10BE-NEXT: xsmuldp f0, v2, f0 1506; PWR10BE-NEXT: xsmuldp f0, f0, v3 1507; PWR10BE-NEXT: xsmuldp f0, f0, f1 1508; PWR10BE-NEXT: xxswapd vs1, v4 1509; PWR10BE-NEXT: xsmuldp f0, f0, v4 1510; PWR10BE-NEXT: xsmuldp f0, f0, f1 1511; PWR10BE-NEXT: xxswapd vs1, v5 1512; PWR10BE-NEXT: xsmuldp f0, f0, v5 1513; PWR10BE-NEXT: xsmuldp f0, f0, f1 1514; PWR10BE-NEXT: xxswapd vs1, v6 1515; PWR10BE-NEXT: xsmuldp f0, f0, v6 1516; PWR10BE-NEXT: xsmuldp f0, f0, f1 1517; PWR10BE-NEXT: xxswapd vs1, v7 1518; PWR10BE-NEXT: xsmuldp f0, f0, v7 1519; PWR10BE-NEXT: xsmuldp f0, f0, f1 1520; PWR10BE-NEXT: xxswapd vs1, v8 1521; PWR10BE-NEXT: xsmuldp f0, f0, v8 1522; PWR10BE-NEXT: xsmuldp f0, f0, f1 1523; PWR10BE-NEXT: xxswapd vs1, v9 1524; PWR10BE-NEXT: xsmuldp f0, f0, v9 1525; PWR10BE-NEXT: xsmuldp f1, f0, f1 1526; PWR10BE-NEXT: blr 1527entry: 1528 %0 = call double @llvm.vector.reduce.fmul.v16f64(double 1.000000e+00, <16 x double> %a) 1529 ret double %0 1530} 1531 1532define dso_local double @v16f64_b(<16 x double> %a, double %b) local_unnamed_addr #0 { 1533; PWR9LE-LABEL: v16f64_b: 1534; PWR9LE: # %bb.0: # %entry 1535; PWR9LE-NEXT: xxswapd vs0, v2 1536; PWR9LE-NEXT: xsmuldp f0, f1, f0 1537; PWR9LE-NEXT: xxswapd vs1, v3 1538; PWR9LE-NEXT: xsmuldp f0, f0, v2 1539; PWR9LE-NEXT: xsmuldp f0, f0, f1 1540; PWR9LE-NEXT: xxswapd vs1, v4 1541; PWR9LE-NEXT: xsmuldp f0, f0, v3 1542; PWR9LE-NEXT: xsmuldp f0, f0, f1 1543; PWR9LE-NEXT: xxswapd vs1, v5 1544; PWR9LE-NEXT: xsmuldp f0, f0, v4 1545; PWR9LE-NEXT: xsmuldp f0, f0, f1 1546; PWR9LE-NEXT: xxswapd vs1, v6 1547; PWR9LE-NEXT: xsmuldp f0, f0, v5 1548; PWR9LE-NEXT: xsmuldp f0, f0, f1 1549; PWR9LE-NEXT: xxswapd vs1, v7 1550; PWR9LE-NEXT: xsmuldp f0, f0, v6 1551; PWR9LE-NEXT: xsmuldp f0, f0, f1 1552; PWR9LE-NEXT: xxswapd vs1, v8 1553; PWR9LE-NEXT: xsmuldp f0, f0, v7 1554; PWR9LE-NEXT: xsmuldp f0, f0, f1 1555; PWR9LE-NEXT: xxswapd vs1, v9 1556; PWR9LE-NEXT: xsmuldp f0, f0, v8 1557; PWR9LE-NEXT: xsmuldp f0, f0, f1 1558; PWR9LE-NEXT: xsmuldp f1, f0, v9 1559; PWR9LE-NEXT: blr 1560; 1561; PWR9BE-LABEL: v16f64_b: 1562; PWR9BE: # %bb.0: # %entry 1563; PWR9BE-NEXT: xsmuldp f0, f1, v2 1564; PWR9BE-NEXT: xxswapd vs1, v2 1565; PWR9BE-NEXT: xsmuldp f0, f0, f1 1566; PWR9BE-NEXT: xxswapd vs1, v3 1567; PWR9BE-NEXT: xsmuldp f0, f0, v3 1568; PWR9BE-NEXT: xsmuldp f0, f0, f1 1569; PWR9BE-NEXT: xxswapd vs1, v4 1570; PWR9BE-NEXT: xsmuldp f0, f0, v4 1571; PWR9BE-NEXT: xsmuldp f0, f0, f1 1572; PWR9BE-NEXT: xxswapd vs1, v5 1573; PWR9BE-NEXT: xsmuldp f0, f0, v5 1574; PWR9BE-NEXT: xsmuldp f0, f0, f1 1575; PWR9BE-NEXT: xxswapd vs1, v6 1576; PWR9BE-NEXT: xsmuldp f0, f0, v6 1577; PWR9BE-NEXT: xsmuldp f0, f0, f1 1578; PWR9BE-NEXT: xxswapd vs1, v7 1579; PWR9BE-NEXT: xsmuldp f0, f0, v7 1580; PWR9BE-NEXT: xsmuldp f0, f0, f1 1581; PWR9BE-NEXT: xxswapd vs1, v8 1582; PWR9BE-NEXT: xsmuldp f0, f0, v8 1583; PWR9BE-NEXT: xsmuldp f0, f0, f1 1584; PWR9BE-NEXT: xxswapd vs1, v9 1585; PWR9BE-NEXT: xsmuldp f0, f0, v9 1586; PWR9BE-NEXT: xsmuldp f1, f0, f1 1587; PWR9BE-NEXT: blr 1588; 1589; PWR10LE-LABEL: v16f64_b: 1590; PWR10LE: # %bb.0: # %entry 1591; PWR10LE-NEXT: xxswapd vs0, v2 1592; PWR10LE-NEXT: xsmuldp f0, f1, f0 1593; PWR10LE-NEXT: xxswapd vs1, v3 1594; PWR10LE-NEXT: xsmuldp f0, f0, v2 1595; PWR10LE-NEXT: xsmuldp f0, f0, f1 1596; PWR10LE-NEXT: xxswapd vs1, v4 1597; PWR10LE-NEXT: xsmuldp f0, f0, v3 1598; PWR10LE-NEXT: xsmuldp f0, f0, f1 1599; PWR10LE-NEXT: xxswapd vs1, v5 1600; PWR10LE-NEXT: xsmuldp f0, f0, v4 1601; PWR10LE-NEXT: xsmuldp f0, f0, f1 1602; PWR10LE-NEXT: xxswapd vs1, v6 1603; PWR10LE-NEXT: xsmuldp f0, f0, v5 1604; PWR10LE-NEXT: xsmuldp f0, f0, f1 1605; PWR10LE-NEXT: xxswapd vs1, v7 1606; PWR10LE-NEXT: xsmuldp f0, f0, v6 1607; PWR10LE-NEXT: xsmuldp f0, f0, f1 1608; PWR10LE-NEXT: xxswapd vs1, v8 1609; PWR10LE-NEXT: xsmuldp f0, f0, v7 1610; PWR10LE-NEXT: xsmuldp f0, f0, f1 1611; PWR10LE-NEXT: xxswapd vs1, v9 1612; PWR10LE-NEXT: xsmuldp f0, f0, v8 1613; PWR10LE-NEXT: xsmuldp f0, f0, f1 1614; PWR10LE-NEXT: xsmuldp f1, f0, v9 1615; PWR10LE-NEXT: blr 1616; 1617; PWR10BE-LABEL: v16f64_b: 1618; PWR10BE: # %bb.0: # %entry 1619; PWR10BE-NEXT: xsmuldp f0, f1, v2 1620; PWR10BE-NEXT: xxswapd vs1, v2 1621; PWR10BE-NEXT: xsmuldp f0, f0, f1 1622; PWR10BE-NEXT: xxswapd vs1, v3 1623; PWR10BE-NEXT: xsmuldp f0, f0, v3 1624; PWR10BE-NEXT: xsmuldp f0, f0, f1 1625; PWR10BE-NEXT: xxswapd vs1, v4 1626; PWR10BE-NEXT: xsmuldp f0, f0, v4 1627; PWR10BE-NEXT: xsmuldp f0, f0, f1 1628; PWR10BE-NEXT: xxswapd vs1, v5 1629; PWR10BE-NEXT: xsmuldp f0, f0, v5 1630; PWR10BE-NEXT: xsmuldp f0, f0, f1 1631; PWR10BE-NEXT: xxswapd vs1, v6 1632; PWR10BE-NEXT: xsmuldp f0, f0, v6 1633; PWR10BE-NEXT: xsmuldp f0, f0, f1 1634; PWR10BE-NEXT: xxswapd vs1, v7 1635; PWR10BE-NEXT: xsmuldp f0, f0, v7 1636; PWR10BE-NEXT: xsmuldp f0, f0, f1 1637; PWR10BE-NEXT: xxswapd vs1, v8 1638; PWR10BE-NEXT: xsmuldp f0, f0, v8 1639; PWR10BE-NEXT: xsmuldp f0, f0, f1 1640; PWR10BE-NEXT: xxswapd vs1, v9 1641; PWR10BE-NEXT: xsmuldp f0, f0, v9 1642; PWR10BE-NEXT: xsmuldp f1, f0, f1 1643; PWR10BE-NEXT: blr 1644entry: 1645 %0 = call double @llvm.vector.reduce.fmul.v16f64(double %b, <16 x double> %a) 1646 ret double %0 1647} 1648 1649define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 { 1650; PWR9LE-LABEL: v16f64_fast: 1651; PWR9LE: # %bb.0: # %entry 1652; PWR9LE-NEXT: xvmuldp vs0, v4, v8 1653; PWR9LE-NEXT: xvmuldp vs1, v2, v6 1654; PWR9LE-NEXT: xvmuldp vs2, v5, v9 1655; PWR9LE-NEXT: xvmuldp vs3, v3, v7 1656; PWR9LE-NEXT: xvmuldp vs2, vs3, vs2 1657; PWR9LE-NEXT: xvmuldp vs0, vs1, vs0 1658; PWR9LE-NEXT: xvmuldp vs0, vs0, vs2 1659; PWR9LE-NEXT: xxswapd vs1, vs0 1660; PWR9LE-NEXT: xvmuldp vs0, vs0, vs1 1661; PWR9LE-NEXT: xxswapd vs1, vs0 1662; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 1663; PWR9LE-NEXT: blr 1664; 1665; PWR9BE-LABEL: v16f64_fast: 1666; PWR9BE: # %bb.0: # %entry 1667; PWR9BE-NEXT: xvmuldp vs0, v4, v8 1668; PWR9BE-NEXT: xvmuldp vs1, v2, v6 1669; PWR9BE-NEXT: xvmuldp vs2, v5, v9 1670; PWR9BE-NEXT: xvmuldp vs3, v3, v7 1671; PWR9BE-NEXT: xvmuldp vs2, vs3, vs2 1672; PWR9BE-NEXT: xvmuldp vs0, vs1, vs0 1673; PWR9BE-NEXT: xvmuldp vs0, vs0, vs2 1674; PWR9BE-NEXT: xxswapd vs1, vs0 1675; PWR9BE-NEXT: xvmuldp vs1, vs0, vs1 1676; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 1677; PWR9BE-NEXT: blr 1678; 1679; PWR10LE-LABEL: v16f64_fast: 1680; PWR10LE: # %bb.0: # %entry 1681; PWR10LE-NEXT: xvmuldp vs0, v4, v8 1682; PWR10LE-NEXT: xvmuldp vs1, v2, v6 1683; PWR10LE-NEXT: xvmuldp vs2, v5, v9 1684; PWR10LE-NEXT: xvmuldp vs3, v3, v7 1685; PWR10LE-NEXT: xvmuldp vs2, vs3, vs2 1686; PWR10LE-NEXT: xvmuldp vs0, vs1, vs0 1687; PWR10LE-NEXT: xvmuldp vs0, vs0, vs2 1688; PWR10LE-NEXT: xxswapd vs1, vs0 1689; PWR10LE-NEXT: xvmuldp vs0, vs0, vs1 1690; PWR10LE-NEXT: xxswapd vs1, vs0 1691; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 1692; PWR10LE-NEXT: blr 1693; 1694; PWR10BE-LABEL: v16f64_fast: 1695; PWR10BE: # %bb.0: # %entry 1696; PWR10BE-NEXT: xvmuldp vs0, v4, v8 1697; PWR10BE-NEXT: xvmuldp vs1, v2, v6 1698; PWR10BE-NEXT: xvmuldp vs2, v5, v9 1699; PWR10BE-NEXT: xvmuldp vs3, v3, v7 1700; PWR10BE-NEXT: xvmuldp vs2, vs3, vs2 1701; PWR10BE-NEXT: xvmuldp vs0, vs1, vs0 1702; PWR10BE-NEXT: xvmuldp vs0, vs0, vs2 1703; PWR10BE-NEXT: xxswapd vs1, vs0 1704; PWR10BE-NEXT: xvmuldp vs1, vs0, vs1 1705; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 1706; PWR10BE-NEXT: blr 1707entry: 1708 %0 = call fast double @llvm.vector.reduce.fmul.v16f64(double 1.000000e+00, <16 x double> %a) 1709 ret double %0 1710} 1711 1712declare double @llvm.vector.reduce.fmul.v2f64(double, <2 x double>) #0 1713declare double @llvm.vector.reduce.fmul.v4f64(double, <4 x double>) #0 1714declare double @llvm.vector.reduce.fmul.v8f64(double, <8 x double>) #0 1715declare double @llvm.vector.reduce.fmul.v16f64(double, <16 x double>) #0 1716 1717attributes #0 = { nounwind } 1718