1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 3; RUN: -mcpu=pwr9 -mtriple=powerpc64le < %s | FileCheck %s --check-prefix=PWR9LE 4; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 5; RUN: -mcpu=pwr9 -mtriple=powerpc64 < %s | FileCheck %s --check-prefix=PWR9BE 6; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 7; RUN: -mcpu=pwr10 -mattr=-paired-vector-memops -mtriple=powerpc64le < %s | \ 8; RUN: FileCheck %s --check-prefix=PWR10LE 9; RUN: llc -verify-machineinstrs -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr \ 10; RUN: -mcpu=pwr10 -mattr=-paired-vector-memops -mtriple=powerpc64 < %s | \ 11; RUN: FileCheck %s --check-prefix=PWR10BE 12 13;; 14;; Vectors of f32 15;; 16define dso_local float @v2f32(<2 x float> %a) local_unnamed_addr #0 { 17; PWR9LE-LABEL: v2f32: 18; PWR9LE: # %bb.0: # %entry 19; PWR9LE-NEXT: xxswapd vs0, v2 20; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 3 21; PWR9LE-NEXT: xscvspdpn f0, vs0 22; PWR9LE-NEXT: xscvspdpn f1, vs1 23; PWR9LE-NEXT: xsmaxdp f1, f1, f0 24; PWR9LE-NEXT: blr 25; 26; PWR9BE-LABEL: v2f32: 27; PWR9BE: # %bb.0: # %entry 28; PWR9BE-NEXT: xxsldwi vs1, v2, v2, 1 29; PWR9BE-NEXT: xscvspdpn f0, v2 30; PWR9BE-NEXT: xscvspdpn f1, vs1 31; PWR9BE-NEXT: xsmaxdp f1, f0, f1 32; PWR9BE-NEXT: blr 33; 34; PWR10LE-LABEL: v2f32: 35; PWR10LE: # %bb.0: # %entry 36; PWR10LE-NEXT: xxswapd vs0, v2 37; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 3 38; PWR10LE-NEXT: xscvspdpn f0, vs0 39; PWR10LE-NEXT: xscvspdpn f1, vs1 40; PWR10LE-NEXT: xsmaxdp f1, f1, f0 41; PWR10LE-NEXT: blr 42; 43; PWR10BE-LABEL: v2f32: 44; PWR10BE: # %bb.0: # %entry 45; PWR10BE-NEXT: xxsldwi vs1, v2, v2, 1 46; PWR10BE-NEXT: xscvspdpn f0, v2 47; PWR10BE-NEXT: xscvspdpn f1, vs1 48; PWR10BE-NEXT: xsmaxdp f1, f0, f1 49; PWR10BE-NEXT: blr 50entry: 51 %0 = call float @llvm.vector.reduce.fmax.v2f32(<2 x float> %a) 52 ret float %0 53} 54 55define dso_local float @v2f32_fast(<2 x float> %a) local_unnamed_addr #0 { 56; PWR9LE-LABEL: v2f32_fast: 57; PWR9LE: # %bb.0: # %entry 58; PWR9LE-NEXT: xxspltw vs0, v2, 2 59; PWR9LE-NEXT: xvmaxsp vs0, v2, vs0 60; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 61; PWR9LE-NEXT: xscvspdpn f1, vs0 62; PWR9LE-NEXT: blr 63; 64; PWR9BE-LABEL: v2f32_fast: 65; PWR9BE: # %bb.0: # %entry 66; PWR9BE-NEXT: xxspltw vs0, v2, 1 67; PWR9BE-NEXT: xvmaxsp vs0, v2, vs0 68; PWR9BE-NEXT: xscvspdpn f1, vs0 69; PWR9BE-NEXT: blr 70; 71; PWR10LE-LABEL: v2f32_fast: 72; PWR10LE: # %bb.0: # %entry 73; PWR10LE-NEXT: xxspltw vs0, v2, 2 74; PWR10LE-NEXT: xvmaxsp vs0, v2, vs0 75; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3 76; PWR10LE-NEXT: xscvspdpn f1, vs0 77; PWR10LE-NEXT: blr 78; 79; PWR10BE-LABEL: v2f32_fast: 80; PWR10BE: # %bb.0: # %entry 81; PWR10BE-NEXT: xxspltw vs0, v2, 1 82; PWR10BE-NEXT: xvmaxsp vs0, v2, vs0 83; PWR10BE-NEXT: xscvspdpn f1, vs0 84; PWR10BE-NEXT: blr 85entry: 86 %0 = call fast float @llvm.vector.reduce.fmax.v2f32(<2 x float> %a) 87 ret float %0 88} 89 90define dso_local float @v4f32(<4 x float> %a) local_unnamed_addr #0 { 91; PWR9LE-LABEL: v4f32: 92; PWR9LE: # %bb.0: # %entry 93; PWR9LE-NEXT: xxsldwi vs2, v2, v2, 3 94; PWR9LE-NEXT: xxswapd vs3, v2 95; PWR9LE-NEXT: xscvspdpn f0, v2 96; PWR9LE-NEXT: xscvspdpn f2, vs2 97; PWR9LE-NEXT: xxsldwi vs1, v2, v2, 1 98; PWR9LE-NEXT: xscvspdpn f3, vs3 99; PWR9LE-NEXT: xscvspdpn f1, vs1 100; PWR9LE-NEXT: xsmaxdp f2, f2, f3 101; PWR9LE-NEXT: xsmaxdp f1, f2, f1 102; PWR9LE-NEXT: xsmaxdp f1, f1, f0 103; PWR9LE-NEXT: blr 104; 105; PWR9BE-LABEL: v4f32: 106; PWR9BE: # %bb.0: # %entry 107; PWR9BE-NEXT: xxsldwi vs2, v2, v2, 1 108; PWR9BE-NEXT: xxswapd vs1, v2 109; PWR9BE-NEXT: xscvspdpn f3, v2 110; PWR9BE-NEXT: xscvspdpn f2, vs2 111; PWR9BE-NEXT: xxsldwi vs0, v2, v2, 3 112; PWR9BE-NEXT: xscvspdpn f1, vs1 113; PWR9BE-NEXT: xscvspdpn f0, vs0 114; PWR9BE-NEXT: xsmaxdp f2, f3, f2 115; PWR9BE-NEXT: xsmaxdp f1, f2, f1 116; PWR9BE-NEXT: xsmaxdp f1, f1, f0 117; PWR9BE-NEXT: blr 118; 119; PWR10LE-LABEL: v4f32: 120; PWR10LE: # %bb.0: # %entry 121; PWR10LE-NEXT: xxsldwi vs2, v2, v2, 3 122; PWR10LE-NEXT: xxswapd vs3, v2 123; PWR10LE-NEXT: xxsldwi vs1, v2, v2, 1 124; PWR10LE-NEXT: xscvspdpn f0, v2 125; PWR10LE-NEXT: xscvspdpn f2, vs2 126; PWR10LE-NEXT: xscvspdpn f3, vs3 127; PWR10LE-NEXT: xscvspdpn f1, vs1 128; PWR10LE-NEXT: xsmaxdp f2, f2, f3 129; PWR10LE-NEXT: xsmaxdp f1, f2, f1 130; PWR10LE-NEXT: xsmaxdp f1, f1, f0 131; PWR10LE-NEXT: blr 132; 133; PWR10BE-LABEL: v4f32: 134; PWR10BE: # %bb.0: # %entry 135; PWR10BE-NEXT: xxsldwi vs2, v2, v2, 1 136; PWR10BE-NEXT: xxswapd vs1, v2 137; PWR10BE-NEXT: xscvspdpn f3, v2 138; PWR10BE-NEXT: xxsldwi vs0, v2, v2, 3 139; PWR10BE-NEXT: xscvspdpn f2, vs2 140; PWR10BE-NEXT: xscvspdpn f1, vs1 141; PWR10BE-NEXT: xscvspdpn f0, vs0 142; PWR10BE-NEXT: xsmaxdp f2, f3, f2 143; PWR10BE-NEXT: xsmaxdp f1, f2, f1 144; PWR10BE-NEXT: xsmaxdp f1, f1, f0 145; PWR10BE-NEXT: blr 146entry: 147 %0 = call float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a) 148 ret float %0 149} 150 151define dso_local float @v4f32_fast(<4 x float> %a) local_unnamed_addr #0 { 152; PWR9LE-LABEL: v4f32_fast: 153; PWR9LE: # %bb.0: # %entry 154; PWR9LE-NEXT: xxswapd v3, v2 155; PWR9LE-NEXT: xvmaxsp vs0, v2, v3 156; PWR9LE-NEXT: xxspltw vs1, vs0, 2 157; PWR9LE-NEXT: xvmaxsp vs0, vs0, vs1 158; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 159; PWR9LE-NEXT: xscvspdpn f1, vs0 160; PWR9LE-NEXT: blr 161; 162; PWR9BE-LABEL: v4f32_fast: 163; PWR9BE: # %bb.0: # %entry 164; PWR9BE-NEXT: xxswapd v3, v2 165; PWR9BE-NEXT: xvmaxsp vs0, v2, v3 166; PWR9BE-NEXT: xxspltw vs1, vs0, 1 167; PWR9BE-NEXT: xvmaxsp vs0, vs0, vs1 168; PWR9BE-NEXT: xscvspdpn f1, vs0 169; PWR9BE-NEXT: blr 170; 171; PWR10LE-LABEL: v4f32_fast: 172; PWR10LE: # %bb.0: # %entry 173; PWR10LE-NEXT: xxswapd v3, v2 174; PWR10LE-NEXT: xvmaxsp vs0, v2, v3 175; PWR10LE-NEXT: xxspltw vs1, vs0, 2 176; PWR10LE-NEXT: xvmaxsp vs0, vs0, vs1 177; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3 178; PWR10LE-NEXT: xscvspdpn f1, vs0 179; PWR10LE-NEXT: blr 180; 181; PWR10BE-LABEL: v4f32_fast: 182; PWR10BE: # %bb.0: # %entry 183; PWR10BE-NEXT: xxswapd v3, v2 184; PWR10BE-NEXT: xvmaxsp vs0, v2, v3 185; PWR10BE-NEXT: xxspltw vs1, vs0, 1 186; PWR10BE-NEXT: xvmaxsp vs0, vs0, vs1 187; PWR10BE-NEXT: xscvspdpn f1, vs0 188; PWR10BE-NEXT: blr 189entry: 190 %0 = call fast float @llvm.vector.reduce.fmax.v4f32(<4 x float> %a) 191 ret float %0 192} 193 194define dso_local float @v8f32(<8 x float> %a) local_unnamed_addr #0 { 195; PWR9LE-LABEL: v8f32: 196; PWR9LE: # %bb.0: # %entry 197; PWR9LE-NEXT: xvmaxsp vs0, v2, v3 198; PWR9LE-NEXT: xxswapd vs1, vs0 199; PWR9LE-NEXT: xxsldwi vs2, vs0, vs0, 3 200; PWR9LE-NEXT: xscvspdpn f1, vs1 201; PWR9LE-NEXT: xscvspdpn f2, vs2 202; PWR9LE-NEXT: xsmaxdp f1, f2, f1 203; PWR9LE-NEXT: xxsldwi vs2, vs0, vs0, 1 204; PWR9LE-NEXT: xscvspdpn f0, vs0 205; PWR9LE-NEXT: xscvspdpn f2, vs2 206; PWR9LE-NEXT: xsmaxdp f1, f1, f2 207; PWR9LE-NEXT: xsmaxdp f1, f1, f0 208; PWR9LE-NEXT: blr 209; 210; PWR9BE-LABEL: v8f32: 211; PWR9BE: # %bb.0: # %entry 212; PWR9BE-NEXT: xvmaxsp vs0, v2, v3 213; PWR9BE-NEXT: xxsldwi vs2, vs0, vs0, 1 214; PWR9BE-NEXT: xscvspdpn f1, vs0 215; PWR9BE-NEXT: xscvspdpn f2, vs2 216; PWR9BE-NEXT: xsmaxdp f1, f1, f2 217; PWR9BE-NEXT: xxswapd vs2, vs0 218; PWR9BE-NEXT: xxsldwi vs0, vs0, vs0, 3 219; PWR9BE-NEXT: xscvspdpn f2, vs2 220; PWR9BE-NEXT: xscvspdpn f0, vs0 221; PWR9BE-NEXT: xsmaxdp f1, f1, f2 222; PWR9BE-NEXT: xsmaxdp f1, f1, f0 223; PWR9BE-NEXT: blr 224; 225; PWR10LE-LABEL: v8f32: 226; PWR10LE: # %bb.0: # %entry 227; PWR10LE-NEXT: xvmaxsp vs0, v2, v3 228; PWR10LE-NEXT: xxswapd vs1, vs0 229; PWR10LE-NEXT: xxsldwi vs2, vs0, vs0, 3 230; PWR10LE-NEXT: xscvspdpn f1, vs1 231; PWR10LE-NEXT: xscvspdpn f2, vs2 232; PWR10LE-NEXT: xsmaxdp f1, f2, f1 233; PWR10LE-NEXT: xxsldwi vs2, vs0, vs0, 1 234; PWR10LE-NEXT: xscvspdpn f0, vs0 235; PWR10LE-NEXT: xscvspdpn f2, vs2 236; PWR10LE-NEXT: xsmaxdp f1, f1, f2 237; PWR10LE-NEXT: xsmaxdp f1, f1, f0 238; PWR10LE-NEXT: blr 239; 240; PWR10BE-LABEL: v8f32: 241; PWR10BE: # %bb.0: # %entry 242; PWR10BE-NEXT: xvmaxsp vs0, v2, v3 243; PWR10BE-NEXT: xxsldwi vs2, vs0, vs0, 1 244; PWR10BE-NEXT: xscvspdpn f1, vs0 245; PWR10BE-NEXT: xscvspdpn f2, vs2 246; PWR10BE-NEXT: xsmaxdp f1, f1, f2 247; PWR10BE-NEXT: xxswapd vs2, vs0 248; PWR10BE-NEXT: xxsldwi vs0, vs0, vs0, 3 249; PWR10BE-NEXT: xscvspdpn f2, vs2 250; PWR10BE-NEXT: xscvspdpn f0, vs0 251; PWR10BE-NEXT: xsmaxdp f1, f1, f2 252; PWR10BE-NEXT: xsmaxdp f1, f1, f0 253; PWR10BE-NEXT: blr 254entry: 255 %0 = call float @llvm.vector.reduce.fmax.v8f32(<8 x float> %a) 256 ret float %0 257} 258 259define dso_local float @v8f32_fast(<8 x float> %a) local_unnamed_addr #0 { 260; PWR9LE-LABEL: v8f32_fast: 261; PWR9LE: # %bb.0: # %entry 262; PWR9LE-NEXT: xvmaxsp vs0, v2, v3 263; PWR9LE-NEXT: xxswapd v2, vs0 264; PWR9LE-NEXT: xvmaxsp vs0, vs0, v2 265; PWR9LE-NEXT: xxspltw vs1, vs0, 2 266; PWR9LE-NEXT: xvmaxsp vs0, vs0, vs1 267; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 268; PWR9LE-NEXT: xscvspdpn f1, vs0 269; PWR9LE-NEXT: blr 270; 271; PWR9BE-LABEL: v8f32_fast: 272; PWR9BE: # %bb.0: # %entry 273; PWR9BE-NEXT: xvmaxsp vs0, v2, v3 274; PWR9BE-NEXT: xxswapd v2, vs0 275; PWR9BE-NEXT: xvmaxsp vs0, vs0, v2 276; PWR9BE-NEXT: xxspltw vs1, vs0, 1 277; PWR9BE-NEXT: xvmaxsp vs0, vs0, vs1 278; PWR9BE-NEXT: xscvspdpn f1, vs0 279; PWR9BE-NEXT: blr 280; 281; PWR10LE-LABEL: v8f32_fast: 282; PWR10LE: # %bb.0: # %entry 283; PWR10LE-NEXT: xvmaxsp vs0, v2, v3 284; PWR10LE-NEXT: xxswapd v2, vs0 285; PWR10LE-NEXT: xvmaxsp vs0, vs0, v2 286; PWR10LE-NEXT: xxspltw vs1, vs0, 2 287; PWR10LE-NEXT: xvmaxsp vs0, vs0, vs1 288; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3 289; PWR10LE-NEXT: xscvspdpn f1, vs0 290; PWR10LE-NEXT: blr 291; 292; PWR10BE-LABEL: v8f32_fast: 293; PWR10BE: # %bb.0: # %entry 294; PWR10BE-NEXT: xvmaxsp vs0, v2, v3 295; PWR10BE-NEXT: xxswapd v2, vs0 296; PWR10BE-NEXT: xvmaxsp vs0, vs0, v2 297; PWR10BE-NEXT: xxspltw vs1, vs0, 1 298; PWR10BE-NEXT: xvmaxsp vs0, vs0, vs1 299; PWR10BE-NEXT: xscvspdpn f1, vs0 300; PWR10BE-NEXT: blr 301entry: 302 %0 = call fast float @llvm.vector.reduce.fmax.v8f32(<8 x float> %a) 303 ret float %0 304} 305 306define dso_local float @v16f32(<16 x float> %a) local_unnamed_addr #0 { 307; PWR9LE-LABEL: v16f32: 308; PWR9LE: # %bb.0: # %entry 309; PWR9LE-NEXT: xvmaxsp vs0, v3, v5 310; PWR9LE-NEXT: xvmaxsp vs1, v2, v4 311; PWR9LE-NEXT: xvmaxsp vs0, vs1, vs0 312; PWR9LE-NEXT: xxswapd vs1, vs0 313; PWR9LE-NEXT: xxsldwi vs2, vs0, vs0, 3 314; PWR9LE-NEXT: xscvspdpn f1, vs1 315; PWR9LE-NEXT: xscvspdpn f2, vs2 316; PWR9LE-NEXT: xsmaxdp f1, f2, f1 317; PWR9LE-NEXT: xxsldwi vs2, vs0, vs0, 1 318; PWR9LE-NEXT: xscvspdpn f0, vs0 319; PWR9LE-NEXT: xscvspdpn f2, vs2 320; PWR9LE-NEXT: xsmaxdp f1, f1, f2 321; PWR9LE-NEXT: xsmaxdp f1, f1, f0 322; PWR9LE-NEXT: blr 323; 324; PWR9BE-LABEL: v16f32: 325; PWR9BE: # %bb.0: # %entry 326; PWR9BE-NEXT: xvmaxsp vs0, v3, v5 327; PWR9BE-NEXT: xvmaxsp vs1, v2, v4 328; PWR9BE-NEXT: xvmaxsp vs0, vs1, vs0 329; PWR9BE-NEXT: xxsldwi vs2, vs0, vs0, 1 330; PWR9BE-NEXT: xscvspdpn f1, vs0 331; PWR9BE-NEXT: xscvspdpn f2, vs2 332; PWR9BE-NEXT: xsmaxdp f1, f1, f2 333; PWR9BE-NEXT: xxswapd vs2, vs0 334; PWR9BE-NEXT: xxsldwi vs0, vs0, vs0, 3 335; PWR9BE-NEXT: xscvspdpn f2, vs2 336; PWR9BE-NEXT: xscvspdpn f0, vs0 337; PWR9BE-NEXT: xsmaxdp f1, f1, f2 338; PWR9BE-NEXT: xsmaxdp f1, f1, f0 339; PWR9BE-NEXT: blr 340; 341; PWR10LE-LABEL: v16f32: 342; PWR10LE: # %bb.0: # %entry 343; PWR10LE-NEXT: xvmaxsp vs0, v3, v5 344; PWR10LE-NEXT: xvmaxsp vs1, v2, v4 345; PWR10LE-NEXT: xvmaxsp vs0, vs1, vs0 346; PWR10LE-NEXT: xxswapd vs1, vs0 347; PWR10LE-NEXT: xxsldwi vs2, vs0, vs0, 3 348; PWR10LE-NEXT: xscvspdpn f1, vs1 349; PWR10LE-NEXT: xscvspdpn f2, vs2 350; PWR10LE-NEXT: xsmaxdp f1, f2, f1 351; PWR10LE-NEXT: xxsldwi vs2, vs0, vs0, 1 352; PWR10LE-NEXT: xscvspdpn f0, vs0 353; PWR10LE-NEXT: xscvspdpn f2, vs2 354; PWR10LE-NEXT: xsmaxdp f1, f1, f2 355; PWR10LE-NEXT: xsmaxdp f1, f1, f0 356; PWR10LE-NEXT: blr 357; 358; PWR10BE-LABEL: v16f32: 359; PWR10BE: # %bb.0: # %entry 360; PWR10BE-NEXT: xvmaxsp vs0, v3, v5 361; PWR10BE-NEXT: xvmaxsp vs1, v2, v4 362; PWR10BE-NEXT: xvmaxsp vs0, vs1, vs0 363; PWR10BE-NEXT: xxsldwi vs2, vs0, vs0, 1 364; PWR10BE-NEXT: xscvspdpn f1, vs0 365; PWR10BE-NEXT: xscvspdpn f2, vs2 366; PWR10BE-NEXT: xsmaxdp f1, f1, f2 367; PWR10BE-NEXT: xxswapd vs2, vs0 368; PWR10BE-NEXT: xxsldwi vs0, vs0, vs0, 3 369; PWR10BE-NEXT: xscvspdpn f2, vs2 370; PWR10BE-NEXT: xscvspdpn f0, vs0 371; PWR10BE-NEXT: xsmaxdp f1, f1, f2 372; PWR10BE-NEXT: xsmaxdp f1, f1, f0 373; PWR10BE-NEXT: blr 374entry: 375 %0 = call float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a) 376 ret float %0 377} 378 379define dso_local float @v16f32_fast(<16 x float> %a) local_unnamed_addr #0 { 380; PWR9LE-LABEL: v16f32_fast: 381; PWR9LE: # %bb.0: # %entry 382; PWR9LE-NEXT: xvmaxsp vs0, v3, v5 383; PWR9LE-NEXT: xvmaxsp vs1, v2, v4 384; PWR9LE-NEXT: xvmaxsp vs0, vs1, vs0 385; PWR9LE-NEXT: xxswapd v2, vs0 386; PWR9LE-NEXT: xvmaxsp vs0, vs0, v2 387; PWR9LE-NEXT: xxspltw vs1, vs0, 2 388; PWR9LE-NEXT: xvmaxsp vs0, vs0, vs1 389; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 390; PWR9LE-NEXT: xscvspdpn f1, vs0 391; PWR9LE-NEXT: blr 392; 393; PWR9BE-LABEL: v16f32_fast: 394; PWR9BE: # %bb.0: # %entry 395; PWR9BE-NEXT: xvmaxsp vs0, v3, v5 396; PWR9BE-NEXT: xvmaxsp vs1, v2, v4 397; PWR9BE-NEXT: xvmaxsp vs0, vs1, vs0 398; PWR9BE-NEXT: xxswapd v2, vs0 399; PWR9BE-NEXT: xvmaxsp vs0, vs0, v2 400; PWR9BE-NEXT: xxspltw vs1, vs0, 1 401; PWR9BE-NEXT: xvmaxsp vs0, vs0, vs1 402; PWR9BE-NEXT: xscvspdpn f1, vs0 403; PWR9BE-NEXT: blr 404; 405; PWR10LE-LABEL: v16f32_fast: 406; PWR10LE: # %bb.0: # %entry 407; PWR10LE-NEXT: xvmaxsp vs0, v3, v5 408; PWR10LE-NEXT: xvmaxsp vs1, v2, v4 409; PWR10LE-NEXT: xvmaxsp vs0, vs1, vs0 410; PWR10LE-NEXT: xxswapd v2, vs0 411; PWR10LE-NEXT: xvmaxsp vs0, vs0, v2 412; PWR10LE-NEXT: xxspltw vs1, vs0, 2 413; PWR10LE-NEXT: xvmaxsp vs0, vs0, vs1 414; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3 415; PWR10LE-NEXT: xscvspdpn f1, vs0 416; PWR10LE-NEXT: blr 417; 418; PWR10BE-LABEL: v16f32_fast: 419; PWR10BE: # %bb.0: # %entry 420; PWR10BE-NEXT: xvmaxsp vs0, v3, v5 421; PWR10BE-NEXT: xvmaxsp vs1, v2, v4 422; PWR10BE-NEXT: xvmaxsp vs0, vs1, vs0 423; PWR10BE-NEXT: xxswapd v2, vs0 424; PWR10BE-NEXT: xvmaxsp vs0, vs0, v2 425; PWR10BE-NEXT: xxspltw vs1, vs0, 1 426; PWR10BE-NEXT: xvmaxsp vs0, vs0, vs1 427; PWR10BE-NEXT: xscvspdpn f1, vs0 428; PWR10BE-NEXT: blr 429entry: 430 %0 = call fast float @llvm.vector.reduce.fmax.v16f32(<16 x float> %a) 431 ret float %0 432} 433 434define dso_local float @v32f32(<32 x float> %a) local_unnamed_addr #0 { 435; PWR9LE-LABEL: v32f32: 436; PWR9LE: # %bb.0: # %entry 437; PWR9LE-NEXT: xvmaxsp vs0, v5, v9 438; PWR9LE-NEXT: xvmaxsp vs1, v3, v7 439; PWR9LE-NEXT: xvmaxsp vs2, v2, v6 440; PWR9LE-NEXT: xvmaxsp vs0, vs1, vs0 441; PWR9LE-NEXT: xvmaxsp vs1, v4, v8 442; PWR9LE-NEXT: xvmaxsp vs1, vs2, vs1 443; PWR9LE-NEXT: xvmaxsp vs0, vs1, vs0 444; PWR9LE-NEXT: xxswapd vs1, vs0 445; PWR9LE-NEXT: xxsldwi vs2, vs0, vs0, 3 446; PWR9LE-NEXT: xscvspdpn f1, vs1 447; PWR9LE-NEXT: xscvspdpn f2, vs2 448; PWR9LE-NEXT: xsmaxdp f1, f2, f1 449; PWR9LE-NEXT: xxsldwi vs2, vs0, vs0, 1 450; PWR9LE-NEXT: xscvspdpn f0, vs0 451; PWR9LE-NEXT: xscvspdpn f2, vs2 452; PWR9LE-NEXT: xsmaxdp f1, f1, f2 453; PWR9LE-NEXT: xsmaxdp f1, f1, f0 454; PWR9LE-NEXT: blr 455; 456; PWR9BE-LABEL: v32f32: 457; PWR9BE: # %bb.0: # %entry 458; PWR9BE-NEXT: xvmaxsp vs0, v5, v9 459; PWR9BE-NEXT: xvmaxsp vs1, v3, v7 460; PWR9BE-NEXT: xvmaxsp vs2, v2, v6 461; PWR9BE-NEXT: xvmaxsp vs0, vs1, vs0 462; PWR9BE-NEXT: xvmaxsp vs1, v4, v8 463; PWR9BE-NEXT: xvmaxsp vs1, vs2, vs1 464; PWR9BE-NEXT: xvmaxsp vs0, vs1, vs0 465; PWR9BE-NEXT: xxsldwi vs2, vs0, vs0, 1 466; PWR9BE-NEXT: xscvspdpn f1, vs0 467; PWR9BE-NEXT: xscvspdpn f2, vs2 468; PWR9BE-NEXT: xsmaxdp f1, f1, f2 469; PWR9BE-NEXT: xxswapd vs2, vs0 470; PWR9BE-NEXT: xxsldwi vs0, vs0, vs0, 3 471; PWR9BE-NEXT: xscvspdpn f2, vs2 472; PWR9BE-NEXT: xscvspdpn f0, vs0 473; PWR9BE-NEXT: xsmaxdp f1, f1, f2 474; PWR9BE-NEXT: xsmaxdp f1, f1, f0 475; PWR9BE-NEXT: blr 476; 477; PWR10LE-LABEL: v32f32: 478; PWR10LE: # %bb.0: # %entry 479; PWR10LE-NEXT: xvmaxsp vs0, v5, v9 480; PWR10LE-NEXT: xvmaxsp vs1, v3, v7 481; PWR10LE-NEXT: xvmaxsp vs2, v2, v6 482; PWR10LE-NEXT: xvmaxsp vs0, vs1, vs0 483; PWR10LE-NEXT: xvmaxsp vs1, v4, v8 484; PWR10LE-NEXT: xvmaxsp vs1, vs2, vs1 485; PWR10LE-NEXT: xvmaxsp vs0, vs1, vs0 486; PWR10LE-NEXT: xxswapd vs1, vs0 487; PWR10LE-NEXT: xxsldwi vs2, vs0, vs0, 3 488; PWR10LE-NEXT: xscvspdpn f1, vs1 489; PWR10LE-NEXT: xscvspdpn f2, vs2 490; PWR10LE-NEXT: xsmaxdp f1, f2, f1 491; PWR10LE-NEXT: xxsldwi vs2, vs0, vs0, 1 492; PWR10LE-NEXT: xscvspdpn f0, vs0 493; PWR10LE-NEXT: xscvspdpn f2, vs2 494; PWR10LE-NEXT: xsmaxdp f1, f1, f2 495; PWR10LE-NEXT: xsmaxdp f1, f1, f0 496; PWR10LE-NEXT: blr 497; 498; PWR10BE-LABEL: v32f32: 499; PWR10BE: # %bb.0: # %entry 500; PWR10BE-NEXT: xvmaxsp vs0, v5, v9 501; PWR10BE-NEXT: xvmaxsp vs1, v3, v7 502; PWR10BE-NEXT: xvmaxsp vs2, v2, v6 503; PWR10BE-NEXT: xvmaxsp vs0, vs1, vs0 504; PWR10BE-NEXT: xvmaxsp vs1, v4, v8 505; PWR10BE-NEXT: xvmaxsp vs1, vs2, vs1 506; PWR10BE-NEXT: xvmaxsp vs0, vs1, vs0 507; PWR10BE-NEXT: xxsldwi vs2, vs0, vs0, 1 508; PWR10BE-NEXT: xscvspdpn f1, vs0 509; PWR10BE-NEXT: xscvspdpn f2, vs2 510; PWR10BE-NEXT: xsmaxdp f1, f1, f2 511; PWR10BE-NEXT: xxswapd vs2, vs0 512; PWR10BE-NEXT: xxsldwi vs0, vs0, vs0, 3 513; PWR10BE-NEXT: xscvspdpn f2, vs2 514; PWR10BE-NEXT: xscvspdpn f0, vs0 515; PWR10BE-NEXT: xsmaxdp f1, f1, f2 516; PWR10BE-NEXT: xsmaxdp f1, f1, f0 517; PWR10BE-NEXT: blr 518entry: 519 %0 = call float @llvm.vector.reduce.fmax.v32f32(<32 x float> %a) 520 ret float %0 521} 522 523define dso_local float @v32f32_fast(<32 x float> %a) local_unnamed_addr #0 { 524; PWR9LE-LABEL: v32f32_fast: 525; PWR9LE: # %bb.0: # %entry 526; PWR9LE-NEXT: xvmaxsp vs0, v4, v8 527; PWR9LE-NEXT: xvmaxsp vs1, v2, v6 528; PWR9LE-NEXT: xvmaxsp vs2, v5, v9 529; PWR9LE-NEXT: xvmaxsp vs3, v3, v7 530; PWR9LE-NEXT: xvmaxsp vs2, vs3, vs2 531; PWR9LE-NEXT: xvmaxsp vs0, vs1, vs0 532; PWR9LE-NEXT: xvmaxsp vs0, vs0, vs2 533; PWR9LE-NEXT: xxswapd v2, vs0 534; PWR9LE-NEXT: xvmaxsp vs0, vs0, v2 535; PWR9LE-NEXT: xxspltw vs1, vs0, 2 536; PWR9LE-NEXT: xvmaxsp vs0, vs0, vs1 537; PWR9LE-NEXT: xxsldwi vs0, vs0, vs0, 3 538; PWR9LE-NEXT: xscvspdpn f1, vs0 539; PWR9LE-NEXT: blr 540; 541; PWR9BE-LABEL: v32f32_fast: 542; PWR9BE: # %bb.0: # %entry 543; PWR9BE-NEXT: xvmaxsp vs0, v4, v8 544; PWR9BE-NEXT: xvmaxsp vs1, v2, v6 545; PWR9BE-NEXT: xvmaxsp vs2, v5, v9 546; PWR9BE-NEXT: xvmaxsp vs3, v3, v7 547; PWR9BE-NEXT: xvmaxsp vs2, vs3, vs2 548; PWR9BE-NEXT: xvmaxsp vs0, vs1, vs0 549; PWR9BE-NEXT: xvmaxsp vs0, vs0, vs2 550; PWR9BE-NEXT: xxswapd v2, vs0 551; PWR9BE-NEXT: xvmaxsp vs0, vs0, v2 552; PWR9BE-NEXT: xxspltw vs1, vs0, 1 553; PWR9BE-NEXT: xvmaxsp vs0, vs0, vs1 554; PWR9BE-NEXT: xscvspdpn f1, vs0 555; PWR9BE-NEXT: blr 556; 557; PWR10LE-LABEL: v32f32_fast: 558; PWR10LE: # %bb.0: # %entry 559; PWR10LE-NEXT: xvmaxsp vs0, v4, v8 560; PWR10LE-NEXT: xvmaxsp vs1, v2, v6 561; PWR10LE-NEXT: xvmaxsp vs2, v5, v9 562; PWR10LE-NEXT: xvmaxsp vs3, v3, v7 563; PWR10LE-NEXT: xvmaxsp vs2, vs3, vs2 564; PWR10LE-NEXT: xvmaxsp vs0, vs1, vs0 565; PWR10LE-NEXT: xvmaxsp vs0, vs0, vs2 566; PWR10LE-NEXT: xxswapd v2, vs0 567; PWR10LE-NEXT: xvmaxsp vs0, vs0, v2 568; PWR10LE-NEXT: xxspltw vs1, vs0, 2 569; PWR10LE-NEXT: xvmaxsp vs0, vs0, vs1 570; PWR10LE-NEXT: xxsldwi vs0, vs0, vs0, 3 571; PWR10LE-NEXT: xscvspdpn f1, vs0 572; PWR10LE-NEXT: blr 573; 574; PWR10BE-LABEL: v32f32_fast: 575; PWR10BE: # %bb.0: # %entry 576; PWR10BE-NEXT: xvmaxsp vs0, v4, v8 577; PWR10BE-NEXT: xvmaxsp vs1, v2, v6 578; PWR10BE-NEXT: xvmaxsp vs2, v5, v9 579; PWR10BE-NEXT: xvmaxsp vs3, v3, v7 580; PWR10BE-NEXT: xvmaxsp vs2, vs3, vs2 581; PWR10BE-NEXT: xvmaxsp vs0, vs1, vs0 582; PWR10BE-NEXT: xvmaxsp vs0, vs0, vs2 583; PWR10BE-NEXT: xxswapd v2, vs0 584; PWR10BE-NEXT: xvmaxsp vs0, vs0, v2 585; PWR10BE-NEXT: xxspltw vs1, vs0, 1 586; PWR10BE-NEXT: xvmaxsp vs0, vs0, vs1 587; PWR10BE-NEXT: xscvspdpn f1, vs0 588; PWR10BE-NEXT: blr 589entry: 590 %0 = call fast float @llvm.vector.reduce.fmax.v32f32(<32 x float> %a) 591 ret float %0 592} 593 594declare float @llvm.vector.reduce.fmax.v2f32(<2 x float>) #0 595declare float @llvm.vector.reduce.fmax.v4f32(<4 x float>) #0 596declare float @llvm.vector.reduce.fmax.v8f32(<8 x float>) #0 597declare float @llvm.vector.reduce.fmax.v16f32(<16 x float>) #0 598declare float @llvm.vector.reduce.fmax.v32f32(<32 x float>) #0 599 600;; 601;; Vectors of f64 602;; 603define dso_local double @v2f64(<2 x double> %a) local_unnamed_addr #0 { 604; PWR9LE-LABEL: v2f64: 605; PWR9LE: # %bb.0: # %entry 606; PWR9LE-NEXT: xxswapd vs0, v2 607; PWR9LE-NEXT: xsmaxdp f1, f0, v2 608; PWR9LE-NEXT: blr 609; 610; PWR9BE-LABEL: v2f64: 611; PWR9BE: # %bb.0: # %entry 612; PWR9BE-NEXT: xxswapd vs0, v2 613; PWR9BE-NEXT: xsmaxdp f1, v2, f0 614; PWR9BE-NEXT: blr 615; 616; PWR10LE-LABEL: v2f64: 617; PWR10LE: # %bb.0: # %entry 618; PWR10LE-NEXT: xxswapd vs0, v2 619; PWR10LE-NEXT: xsmaxdp f1, f0, v2 620; PWR10LE-NEXT: blr 621; 622; PWR10BE-LABEL: v2f64: 623; PWR10BE: # %bb.0: # %entry 624; PWR10BE-NEXT: xxswapd vs0, v2 625; PWR10BE-NEXT: xsmaxdp f1, v2, f0 626; PWR10BE-NEXT: blr 627entry: 628 %0 = call double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a) 629 ret double %0 630} 631 632define dso_local double @v2f64_fast(<2 x double> %a) local_unnamed_addr #0 { 633; PWR9LE-LABEL: v2f64_fast: 634; PWR9LE: # %bb.0: # %entry 635; PWR9LE-NEXT: xxswapd vs0, v2 636; PWR9LE-NEXT: xvmaxdp vs0, v2, vs0 637; PWR9LE-NEXT: xxswapd vs1, vs0 638; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 639; PWR9LE-NEXT: blr 640; 641; PWR9BE-LABEL: v2f64_fast: 642; PWR9BE: # %bb.0: # %entry 643; PWR9BE-NEXT: xxswapd vs0, v2 644; PWR9BE-NEXT: xvmaxdp vs1, v2, vs0 645; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 646; PWR9BE-NEXT: blr 647; 648; PWR10LE-LABEL: v2f64_fast: 649; PWR10LE: # %bb.0: # %entry 650; PWR10LE-NEXT: xxswapd vs0, v2 651; PWR10LE-NEXT: xvmaxdp vs0, v2, vs0 652; PWR10LE-NEXT: xxswapd vs1, vs0 653; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 654; PWR10LE-NEXT: blr 655; 656; PWR10BE-LABEL: v2f64_fast: 657; PWR10BE: # %bb.0: # %entry 658; PWR10BE-NEXT: xxswapd vs0, v2 659; PWR10BE-NEXT: xvmaxdp vs1, v2, vs0 660; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 661; PWR10BE-NEXT: blr 662entry: 663 %0 = call fast double @llvm.vector.reduce.fmax.v2f64(<2 x double> %a) 664 ret double %0 665} 666 667define dso_local double @v4f64(<4 x double> %a) local_unnamed_addr #0 { 668; PWR9LE-LABEL: v4f64: 669; PWR9LE: # %bb.0: # %entry 670; PWR9LE-NEXT: xvmaxdp vs0, v2, v3 671; PWR9LE-NEXT: xxswapd vs1, vs0 672; PWR9LE-NEXT: xsmaxdp f1, f1, f0 673; PWR9LE-NEXT: blr 674; 675; PWR9BE-LABEL: v4f64: 676; PWR9BE: # %bb.0: # %entry 677; PWR9BE-NEXT: xvmaxdp vs0, v2, v3 678; PWR9BE-NEXT: xxswapd vs1, vs0 679; PWR9BE-NEXT: xsmaxdp f1, f0, f1 680; PWR9BE-NEXT: blr 681; 682; PWR10LE-LABEL: v4f64: 683; PWR10LE: # %bb.0: # %entry 684; PWR10LE-NEXT: xvmaxdp vs0, v2, v3 685; PWR10LE-NEXT: xxswapd vs1, vs0 686; PWR10LE-NEXT: xsmaxdp f1, f1, f0 687; PWR10LE-NEXT: blr 688; 689; PWR10BE-LABEL: v4f64: 690; PWR10BE: # %bb.0: # %entry 691; PWR10BE-NEXT: xvmaxdp vs0, v2, v3 692; PWR10BE-NEXT: xxswapd vs1, vs0 693; PWR10BE-NEXT: xsmaxdp f1, f0, f1 694; PWR10BE-NEXT: blr 695entry: 696 %0 = call double @llvm.vector.reduce.fmax.v4f64(<4 x double> %a) 697 ret double %0 698} 699 700define dso_local double @v4f64_fast(<4 x double> %a) local_unnamed_addr #0 { 701; PWR9LE-LABEL: v4f64_fast: 702; PWR9LE: # %bb.0: # %entry 703; PWR9LE-NEXT: xvmaxdp vs0, v2, v3 704; PWR9LE-NEXT: xxswapd vs1, vs0 705; PWR9LE-NEXT: xvmaxdp vs0, vs0, vs1 706; PWR9LE-NEXT: xxswapd vs1, vs0 707; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 708; PWR9LE-NEXT: blr 709; 710; PWR9BE-LABEL: v4f64_fast: 711; PWR9BE: # %bb.0: # %entry 712; PWR9BE-NEXT: xvmaxdp vs0, v2, v3 713; PWR9BE-NEXT: xxswapd vs1, vs0 714; PWR9BE-NEXT: xvmaxdp vs1, vs0, vs1 715; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 716; PWR9BE-NEXT: blr 717; 718; PWR10LE-LABEL: v4f64_fast: 719; PWR10LE: # %bb.0: # %entry 720; PWR10LE-NEXT: xvmaxdp vs0, v2, v3 721; PWR10LE-NEXT: xxswapd vs1, vs0 722; PWR10LE-NEXT: xvmaxdp vs0, vs0, vs1 723; PWR10LE-NEXT: xxswapd vs1, vs0 724; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 725; PWR10LE-NEXT: blr 726; 727; PWR10BE-LABEL: v4f64_fast: 728; PWR10BE: # %bb.0: # %entry 729; PWR10BE-NEXT: xvmaxdp vs0, v2, v3 730; PWR10BE-NEXT: xxswapd vs1, vs0 731; PWR10BE-NEXT: xvmaxdp vs1, vs0, vs1 732; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 733; PWR10BE-NEXT: blr 734entry: 735 %0 = call fast double @llvm.vector.reduce.fmax.v4f64(<4 x double> %a) 736 ret double %0 737} 738 739define dso_local double @v8f64(<8 x double> %a) local_unnamed_addr #0 { 740; PWR9LE-LABEL: v8f64: 741; PWR9LE: # %bb.0: # %entry 742; PWR9LE-NEXT: xvmaxdp vs0, v3, v5 743; PWR9LE-NEXT: xvmaxdp vs1, v2, v4 744; PWR9LE-NEXT: xvmaxdp vs0, vs1, vs0 745; PWR9LE-NEXT: xxswapd vs1, vs0 746; PWR9LE-NEXT: xsmaxdp f1, f1, f0 747; PWR9LE-NEXT: blr 748; 749; PWR9BE-LABEL: v8f64: 750; PWR9BE: # %bb.0: # %entry 751; PWR9BE-NEXT: xvmaxdp vs0, v3, v5 752; PWR9BE-NEXT: xvmaxdp vs1, v2, v4 753; PWR9BE-NEXT: xvmaxdp vs0, vs1, vs0 754; PWR9BE-NEXT: xxswapd vs1, vs0 755; PWR9BE-NEXT: xsmaxdp f1, f0, f1 756; PWR9BE-NEXT: blr 757; 758; PWR10LE-LABEL: v8f64: 759; PWR10LE: # %bb.0: # %entry 760; PWR10LE-NEXT: xvmaxdp vs0, v3, v5 761; PWR10LE-NEXT: xvmaxdp vs1, v2, v4 762; PWR10LE-NEXT: xvmaxdp vs0, vs1, vs0 763; PWR10LE-NEXT: xxswapd vs1, vs0 764; PWR10LE-NEXT: xsmaxdp f1, f1, f0 765; PWR10LE-NEXT: blr 766; 767; PWR10BE-LABEL: v8f64: 768; PWR10BE: # %bb.0: # %entry 769; PWR10BE-NEXT: xvmaxdp vs0, v3, v5 770; PWR10BE-NEXT: xvmaxdp vs1, v2, v4 771; PWR10BE-NEXT: xvmaxdp vs0, vs1, vs0 772; PWR10BE-NEXT: xxswapd vs1, vs0 773; PWR10BE-NEXT: xsmaxdp f1, f0, f1 774; PWR10BE-NEXT: blr 775entry: 776 %0 = call double @llvm.vector.reduce.fmax.v8f64(<8 x double> %a) 777 ret double %0 778} 779 780define dso_local double @v8f64_fast(<8 x double> %a) local_unnamed_addr #0 { 781; PWR9LE-LABEL: v8f64_fast: 782; PWR9LE: # %bb.0: # %entry 783; PWR9LE-NEXT: xvmaxdp vs0, v3, v5 784; PWR9LE-NEXT: xvmaxdp vs1, v2, v4 785; PWR9LE-NEXT: xvmaxdp vs0, vs1, vs0 786; PWR9LE-NEXT: xxswapd vs1, vs0 787; PWR9LE-NEXT: xvmaxdp vs0, vs0, vs1 788; PWR9LE-NEXT: xxswapd vs1, vs0 789; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 790; PWR9LE-NEXT: blr 791; 792; PWR9BE-LABEL: v8f64_fast: 793; PWR9BE: # %bb.0: # %entry 794; PWR9BE-NEXT: xvmaxdp vs0, v3, v5 795; PWR9BE-NEXT: xvmaxdp vs1, v2, v4 796; PWR9BE-NEXT: xvmaxdp vs0, vs1, vs0 797; PWR9BE-NEXT: xxswapd vs1, vs0 798; PWR9BE-NEXT: xvmaxdp vs1, vs0, vs1 799; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 800; PWR9BE-NEXT: blr 801; 802; PWR10LE-LABEL: v8f64_fast: 803; PWR10LE: # %bb.0: # %entry 804; PWR10LE-NEXT: xvmaxdp vs0, v3, v5 805; PWR10LE-NEXT: xvmaxdp vs1, v2, v4 806; PWR10LE-NEXT: xvmaxdp vs0, vs1, vs0 807; PWR10LE-NEXT: xxswapd vs1, vs0 808; PWR10LE-NEXT: xvmaxdp vs0, vs0, vs1 809; PWR10LE-NEXT: xxswapd vs1, vs0 810; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 811; PWR10LE-NEXT: blr 812; 813; PWR10BE-LABEL: v8f64_fast: 814; PWR10BE: # %bb.0: # %entry 815; PWR10BE-NEXT: xvmaxdp vs0, v3, v5 816; PWR10BE-NEXT: xvmaxdp vs1, v2, v4 817; PWR10BE-NEXT: xvmaxdp vs0, vs1, vs0 818; PWR10BE-NEXT: xxswapd vs1, vs0 819; PWR10BE-NEXT: xvmaxdp vs1, vs0, vs1 820; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 821; PWR10BE-NEXT: blr 822entry: 823 %0 = call fast double @llvm.vector.reduce.fmax.v8f64(<8 x double> %a) 824 ret double %0 825} 826 827define dso_local double @v16f64(<16 x double> %a) local_unnamed_addr #0 { 828; PWR9LE-LABEL: v16f64: 829; PWR9LE: # %bb.0: # %entry 830; PWR9LE-NEXT: xvmaxdp vs0, v5, v9 831; PWR9LE-NEXT: xvmaxdp vs1, v3, v7 832; PWR9LE-NEXT: xvmaxdp vs2, v2, v6 833; PWR9LE-NEXT: xvmaxdp vs0, vs1, vs0 834; PWR9LE-NEXT: xvmaxdp vs1, v4, v8 835; PWR9LE-NEXT: xvmaxdp vs1, vs2, vs1 836; PWR9LE-NEXT: xvmaxdp vs0, vs1, vs0 837; PWR9LE-NEXT: xxswapd vs1, vs0 838; PWR9LE-NEXT: xsmaxdp f1, f1, f0 839; PWR9LE-NEXT: blr 840; 841; PWR9BE-LABEL: v16f64: 842; PWR9BE: # %bb.0: # %entry 843; PWR9BE-NEXT: xvmaxdp vs0, v5, v9 844; PWR9BE-NEXT: xvmaxdp vs1, v3, v7 845; PWR9BE-NEXT: xvmaxdp vs2, v2, v6 846; PWR9BE-NEXT: xvmaxdp vs0, vs1, vs0 847; PWR9BE-NEXT: xvmaxdp vs1, v4, v8 848; PWR9BE-NEXT: xvmaxdp vs1, vs2, vs1 849; PWR9BE-NEXT: xvmaxdp vs0, vs1, vs0 850; PWR9BE-NEXT: xxswapd vs1, vs0 851; PWR9BE-NEXT: xsmaxdp f1, f0, f1 852; PWR9BE-NEXT: blr 853; 854; PWR10LE-LABEL: v16f64: 855; PWR10LE: # %bb.0: # %entry 856; PWR10LE-NEXT: xvmaxdp vs0, v5, v9 857; PWR10LE-NEXT: xvmaxdp vs1, v3, v7 858; PWR10LE-NEXT: xvmaxdp vs2, v2, v6 859; PWR10LE-NEXT: xvmaxdp vs0, vs1, vs0 860; PWR10LE-NEXT: xvmaxdp vs1, v4, v8 861; PWR10LE-NEXT: xvmaxdp vs1, vs2, vs1 862; PWR10LE-NEXT: xvmaxdp vs0, vs1, vs0 863; PWR10LE-NEXT: xxswapd vs1, vs0 864; PWR10LE-NEXT: xsmaxdp f1, f1, f0 865; PWR10LE-NEXT: blr 866; 867; PWR10BE-LABEL: v16f64: 868; PWR10BE: # %bb.0: # %entry 869; PWR10BE-NEXT: xvmaxdp vs0, v5, v9 870; PWR10BE-NEXT: xvmaxdp vs1, v3, v7 871; PWR10BE-NEXT: xvmaxdp vs2, v2, v6 872; PWR10BE-NEXT: xvmaxdp vs0, vs1, vs0 873; PWR10BE-NEXT: xvmaxdp vs1, v4, v8 874; PWR10BE-NEXT: xvmaxdp vs1, vs2, vs1 875; PWR10BE-NEXT: xvmaxdp vs0, vs1, vs0 876; PWR10BE-NEXT: xxswapd vs1, vs0 877; PWR10BE-NEXT: xsmaxdp f1, f0, f1 878; PWR10BE-NEXT: blr 879entry: 880 %0 = call double @llvm.vector.reduce.fmax.v16f64(<16 x double> %a) 881 ret double %0 882} 883 884define dso_local double @v16f64_fast(<16 x double> %a) local_unnamed_addr #0 { 885; PWR9LE-LABEL: v16f64_fast: 886; PWR9LE: # %bb.0: # %entry 887; PWR9LE-NEXT: xvmaxdp vs0, v4, v8 888; PWR9LE-NEXT: xvmaxdp vs1, v2, v6 889; PWR9LE-NEXT: xvmaxdp vs2, v5, v9 890; PWR9LE-NEXT: xvmaxdp vs3, v3, v7 891; PWR9LE-NEXT: xvmaxdp vs2, vs3, vs2 892; PWR9LE-NEXT: xvmaxdp vs0, vs1, vs0 893; PWR9LE-NEXT: xvmaxdp vs0, vs0, vs2 894; PWR9LE-NEXT: xxswapd vs1, vs0 895; PWR9LE-NEXT: xvmaxdp vs0, vs0, vs1 896; PWR9LE-NEXT: xxswapd vs1, vs0 897; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 898; PWR9LE-NEXT: blr 899; 900; PWR9BE-LABEL: v16f64_fast: 901; PWR9BE: # %bb.0: # %entry 902; PWR9BE-NEXT: xvmaxdp vs0, v4, v8 903; PWR9BE-NEXT: xvmaxdp vs1, v2, v6 904; PWR9BE-NEXT: xvmaxdp vs2, v5, v9 905; PWR9BE-NEXT: xvmaxdp vs3, v3, v7 906; PWR9BE-NEXT: xvmaxdp vs2, vs3, vs2 907; PWR9BE-NEXT: xvmaxdp vs0, vs1, vs0 908; PWR9BE-NEXT: xvmaxdp vs0, vs0, vs2 909; PWR9BE-NEXT: xxswapd vs1, vs0 910; PWR9BE-NEXT: xvmaxdp vs1, vs0, vs1 911; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 912; PWR9BE-NEXT: blr 913; 914; PWR10LE-LABEL: v16f64_fast: 915; PWR10LE: # %bb.0: # %entry 916; PWR10LE-NEXT: xvmaxdp vs0, v4, v8 917; PWR10LE-NEXT: xvmaxdp vs1, v2, v6 918; PWR10LE-NEXT: xvmaxdp vs2, v5, v9 919; PWR10LE-NEXT: xvmaxdp vs3, v3, v7 920; PWR10LE-NEXT: xvmaxdp vs2, vs3, vs2 921; PWR10LE-NEXT: xvmaxdp vs0, vs1, vs0 922; PWR10LE-NEXT: xvmaxdp vs0, vs0, vs2 923; PWR10LE-NEXT: xxswapd vs1, vs0 924; PWR10LE-NEXT: xvmaxdp vs0, vs0, vs1 925; PWR10LE-NEXT: xxswapd vs1, vs0 926; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 927; PWR10LE-NEXT: blr 928; 929; PWR10BE-LABEL: v16f64_fast: 930; PWR10BE: # %bb.0: # %entry 931; PWR10BE-NEXT: xvmaxdp vs0, v4, v8 932; PWR10BE-NEXT: xvmaxdp vs1, v2, v6 933; PWR10BE-NEXT: xvmaxdp vs2, v5, v9 934; PWR10BE-NEXT: xvmaxdp vs3, v3, v7 935; PWR10BE-NEXT: xvmaxdp vs2, vs3, vs2 936; PWR10BE-NEXT: xvmaxdp vs0, vs1, vs0 937; PWR10BE-NEXT: xvmaxdp vs0, vs0, vs2 938; PWR10BE-NEXT: xxswapd vs1, vs0 939; PWR10BE-NEXT: xvmaxdp vs1, vs0, vs1 940; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 941; PWR10BE-NEXT: blr 942entry: 943 %0 = call fast double @llvm.vector.reduce.fmax.v16f64(<16 x double> %a) 944 ret double %0 945} 946 947define dso_local double @v32f64(<32 x double> %a) local_unnamed_addr #0 { 948; PWR9LE-LABEL: v32f64: 949; PWR9LE: # %bb.0: # %entry 950; PWR9LE-NEXT: lxv vs3, 272(r1) 951; PWR9LE-NEXT: lxv vs2, 240(r1) 952; PWR9LE-NEXT: xvmaxdp vs4, v5, v13 953; PWR9LE-NEXT: lxv vs1, 256(r1) 954; PWR9LE-NEXT: lxv vs0, 224(r1) 955; PWR9LE-NEXT: xvmaxdp vs3, v9, vs3 956; PWR9LE-NEXT: xvmaxdp vs2, v7, vs2 957; PWR9LE-NEXT: xvmaxdp vs1, v8, vs1 958; PWR9LE-NEXT: xvmaxdp vs0, v6, vs0 959; PWR9LE-NEXT: xvmaxdp vs3, vs4, vs3 960; PWR9LE-NEXT: xvmaxdp vs4, v3, v11 961; PWR9LE-NEXT: xvmaxdp vs2, vs4, vs2 962; PWR9LE-NEXT: xvmaxdp vs2, vs2, vs3 963; PWR9LE-NEXT: xvmaxdp vs3, v4, v12 964; PWR9LE-NEXT: xvmaxdp vs1, vs3, vs1 965; PWR9LE-NEXT: xvmaxdp vs3, v2, v10 966; PWR9LE-NEXT: xvmaxdp vs0, vs3, vs0 967; PWR9LE-NEXT: xvmaxdp vs0, vs0, vs1 968; PWR9LE-NEXT: xvmaxdp vs0, vs0, vs2 969; PWR9LE-NEXT: xxswapd vs1, vs0 970; PWR9LE-NEXT: xsmaxdp f1, f1, f0 971; PWR9LE-NEXT: blr 972; 973; PWR9BE-LABEL: v32f64: 974; PWR9BE: # %bb.0: # %entry 975; PWR9BE-NEXT: lxv vs3, 288(r1) 976; PWR9BE-NEXT: lxv vs2, 256(r1) 977; PWR9BE-NEXT: xvmaxdp vs4, v5, v13 978; PWR9BE-NEXT: lxv vs1, 272(r1) 979; PWR9BE-NEXT: lxv vs0, 240(r1) 980; PWR9BE-NEXT: xvmaxdp vs3, v9, vs3 981; PWR9BE-NEXT: xvmaxdp vs2, v7, vs2 982; PWR9BE-NEXT: xvmaxdp vs1, v8, vs1 983; PWR9BE-NEXT: xvmaxdp vs0, v6, vs0 984; PWR9BE-NEXT: xvmaxdp vs3, vs4, vs3 985; PWR9BE-NEXT: xvmaxdp vs4, v3, v11 986; PWR9BE-NEXT: xvmaxdp vs2, vs4, vs2 987; PWR9BE-NEXT: xvmaxdp vs2, vs2, vs3 988; PWR9BE-NEXT: xvmaxdp vs3, v4, v12 989; PWR9BE-NEXT: xvmaxdp vs1, vs3, vs1 990; PWR9BE-NEXT: xvmaxdp vs3, v2, v10 991; PWR9BE-NEXT: xvmaxdp vs0, vs3, vs0 992; PWR9BE-NEXT: xvmaxdp vs0, vs0, vs1 993; PWR9BE-NEXT: xvmaxdp vs0, vs0, vs2 994; PWR9BE-NEXT: xxswapd vs1, vs0 995; PWR9BE-NEXT: xsmaxdp f1, f0, f1 996; PWR9BE-NEXT: blr 997; 998; PWR10LE-LABEL: v32f64: 999; PWR10LE: # %bb.0: # %entry 1000; PWR10LE-NEXT: lxv vs3, 272(r1) 1001; PWR10LE-NEXT: lxv vs2, 240(r1) 1002; PWR10LE-NEXT: xvmaxdp vs4, v5, v13 1003; PWR10LE-NEXT: xvmaxdp vs3, v9, vs3 1004; PWR10LE-NEXT: lxv vs1, 256(r1) 1005; PWR10LE-NEXT: xvmaxdp vs2, v7, vs2 1006; PWR10LE-NEXT: lxv vs0, 224(r1) 1007; PWR10LE-NEXT: xvmaxdp vs1, v8, vs1 1008; PWR10LE-NEXT: xvmaxdp vs0, v6, vs0 1009; PWR10LE-NEXT: xvmaxdp vs3, vs4, vs3 1010; PWR10LE-NEXT: xvmaxdp vs4, v3, v11 1011; PWR10LE-NEXT: xvmaxdp vs2, vs4, vs2 1012; PWR10LE-NEXT: xvmaxdp vs2, vs2, vs3 1013; PWR10LE-NEXT: xvmaxdp vs3, v4, v12 1014; PWR10LE-NEXT: xvmaxdp vs1, vs3, vs1 1015; PWR10LE-NEXT: xvmaxdp vs3, v2, v10 1016; PWR10LE-NEXT: xvmaxdp vs0, vs3, vs0 1017; PWR10LE-NEXT: xvmaxdp vs0, vs0, vs1 1018; PWR10LE-NEXT: xvmaxdp vs0, vs0, vs2 1019; PWR10LE-NEXT: xxswapd vs1, vs0 1020; PWR10LE-NEXT: xsmaxdp f1, f1, f0 1021; PWR10LE-NEXT: blr 1022; 1023; PWR10BE-LABEL: v32f64: 1024; PWR10BE: # %bb.0: # %entry 1025; PWR10BE-NEXT: lxv vs3, 288(r1) 1026; PWR10BE-NEXT: lxv vs2, 256(r1) 1027; PWR10BE-NEXT: xvmaxdp vs4, v5, v13 1028; PWR10BE-NEXT: xvmaxdp vs3, v9, vs3 1029; PWR10BE-NEXT: lxv vs1, 272(r1) 1030; PWR10BE-NEXT: xvmaxdp vs2, v7, vs2 1031; PWR10BE-NEXT: lxv vs0, 240(r1) 1032; PWR10BE-NEXT: xvmaxdp vs1, v8, vs1 1033; PWR10BE-NEXT: xvmaxdp vs0, v6, vs0 1034; PWR10BE-NEXT: xvmaxdp vs3, vs4, vs3 1035; PWR10BE-NEXT: xvmaxdp vs4, v3, v11 1036; PWR10BE-NEXT: xvmaxdp vs2, vs4, vs2 1037; PWR10BE-NEXT: xvmaxdp vs2, vs2, vs3 1038; PWR10BE-NEXT: xvmaxdp vs3, v4, v12 1039; PWR10BE-NEXT: xvmaxdp vs1, vs3, vs1 1040; PWR10BE-NEXT: xvmaxdp vs3, v2, v10 1041; PWR10BE-NEXT: xvmaxdp vs0, vs3, vs0 1042; PWR10BE-NEXT: xvmaxdp vs0, vs0, vs1 1043; PWR10BE-NEXT: xvmaxdp vs0, vs0, vs2 1044; PWR10BE-NEXT: xxswapd vs1, vs0 1045; PWR10BE-NEXT: xsmaxdp f1, f0, f1 1046; PWR10BE-NEXT: blr 1047entry: 1048 %0 = call double @llvm.vector.reduce.fmax.v32f64(<32 x double> %a) 1049 ret double %0 1050} 1051 1052define dso_local double @v32f64_fast(<32 x double> %a) local_unnamed_addr #0 { 1053; PWR9LE-LABEL: v32f64_fast: 1054; PWR9LE: # %bb.0: # %entry 1055; PWR9LE-NEXT: lxv vs0, 256(r1) 1056; PWR9LE-NEXT: lxv vs1, 224(r1) 1057; PWR9LE-NEXT: lxv vs2, 272(r1) 1058; PWR9LE-NEXT: lxv vs3, 240(r1) 1059; PWR9LE-NEXT: xvmaxdp vs4, v3, v11 1060; PWR9LE-NEXT: xvmaxdp vs5, v5, v13 1061; PWR9LE-NEXT: xvmaxdp vs6, v2, v10 1062; PWR9LE-NEXT: xvmaxdp vs7, v4, v12 1063; PWR9LE-NEXT: xvmaxdp vs3, v7, vs3 1064; PWR9LE-NEXT: xvmaxdp vs2, v9, vs2 1065; PWR9LE-NEXT: xvmaxdp vs1, v6, vs1 1066; PWR9LE-NEXT: xvmaxdp vs0, v8, vs0 1067; PWR9LE-NEXT: xvmaxdp vs0, vs7, vs0 1068; PWR9LE-NEXT: xvmaxdp vs1, vs6, vs1 1069; PWR9LE-NEXT: xvmaxdp vs2, vs5, vs2 1070; PWR9LE-NEXT: xvmaxdp vs3, vs4, vs3 1071; PWR9LE-NEXT: xvmaxdp vs2, vs3, vs2 1072; PWR9LE-NEXT: xvmaxdp vs0, vs1, vs0 1073; PWR9LE-NEXT: xvmaxdp vs0, vs0, vs2 1074; PWR9LE-NEXT: xxswapd vs1, vs0 1075; PWR9LE-NEXT: xvmaxdp vs0, vs0, vs1 1076; PWR9LE-NEXT: xxswapd vs1, vs0 1077; PWR9LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 1078; PWR9LE-NEXT: blr 1079; 1080; PWR9BE-LABEL: v32f64_fast: 1081; PWR9BE: # %bb.0: # %entry 1082; PWR9BE-NEXT: lxv vs0, 272(r1) 1083; PWR9BE-NEXT: lxv vs1, 240(r1) 1084; PWR9BE-NEXT: lxv vs2, 288(r1) 1085; PWR9BE-NEXT: lxv vs3, 256(r1) 1086; PWR9BE-NEXT: xvmaxdp vs4, v3, v11 1087; PWR9BE-NEXT: xvmaxdp vs5, v5, v13 1088; PWR9BE-NEXT: xvmaxdp vs6, v2, v10 1089; PWR9BE-NEXT: xvmaxdp vs7, v4, v12 1090; PWR9BE-NEXT: xvmaxdp vs3, v7, vs3 1091; PWR9BE-NEXT: xvmaxdp vs2, v9, vs2 1092; PWR9BE-NEXT: xvmaxdp vs1, v6, vs1 1093; PWR9BE-NEXT: xvmaxdp vs0, v8, vs0 1094; PWR9BE-NEXT: xvmaxdp vs0, vs7, vs0 1095; PWR9BE-NEXT: xvmaxdp vs1, vs6, vs1 1096; PWR9BE-NEXT: xvmaxdp vs2, vs5, vs2 1097; PWR9BE-NEXT: xvmaxdp vs3, vs4, vs3 1098; PWR9BE-NEXT: xvmaxdp vs2, vs3, vs2 1099; PWR9BE-NEXT: xvmaxdp vs0, vs1, vs0 1100; PWR9BE-NEXT: xvmaxdp vs0, vs0, vs2 1101; PWR9BE-NEXT: xxswapd vs1, vs0 1102; PWR9BE-NEXT: xvmaxdp vs1, vs0, vs1 1103; PWR9BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 1104; PWR9BE-NEXT: blr 1105; 1106; PWR10LE-LABEL: v32f64_fast: 1107; PWR10LE: # %bb.0: # %entry 1108; PWR10LE-NEXT: lxv vs0, 256(r1) 1109; PWR10LE-NEXT: lxv vs1, 224(r1) 1110; PWR10LE-NEXT: xvmaxdp vs4, v3, v11 1111; PWR10LE-NEXT: xvmaxdp vs5, v5, v13 1112; PWR10LE-NEXT: xvmaxdp vs6, v2, v10 1113; PWR10LE-NEXT: xvmaxdp vs7, v4, v12 1114; PWR10LE-NEXT: xvmaxdp vs1, v6, vs1 1115; PWR10LE-NEXT: lxv vs2, 272(r1) 1116; PWR10LE-NEXT: lxv vs3, 240(r1) 1117; PWR10LE-NEXT: xvmaxdp vs3, v7, vs3 1118; PWR10LE-NEXT: xvmaxdp vs2, v9, vs2 1119; PWR10LE-NEXT: xvmaxdp vs0, v8, vs0 1120; PWR10LE-NEXT: xvmaxdp vs0, vs7, vs0 1121; PWR10LE-NEXT: xvmaxdp vs1, vs6, vs1 1122; PWR10LE-NEXT: xvmaxdp vs2, vs5, vs2 1123; PWR10LE-NEXT: xvmaxdp vs3, vs4, vs3 1124; PWR10LE-NEXT: xvmaxdp vs2, vs3, vs2 1125; PWR10LE-NEXT: xvmaxdp vs0, vs1, vs0 1126; PWR10LE-NEXT: xvmaxdp vs0, vs0, vs2 1127; PWR10LE-NEXT: xxswapd vs1, vs0 1128; PWR10LE-NEXT: xvmaxdp vs0, vs0, vs1 1129; PWR10LE-NEXT: xxswapd vs1, vs0 1130; PWR10LE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 1131; PWR10LE-NEXT: blr 1132; 1133; PWR10BE-LABEL: v32f64_fast: 1134; PWR10BE: # %bb.0: # %entry 1135; PWR10BE-NEXT: lxv vs0, 272(r1) 1136; PWR10BE-NEXT: lxv vs1, 240(r1) 1137; PWR10BE-NEXT: xvmaxdp vs4, v3, v11 1138; PWR10BE-NEXT: xvmaxdp vs5, v5, v13 1139; PWR10BE-NEXT: xvmaxdp vs6, v2, v10 1140; PWR10BE-NEXT: xvmaxdp vs7, v4, v12 1141; PWR10BE-NEXT: xvmaxdp vs1, v6, vs1 1142; PWR10BE-NEXT: lxv vs2, 288(r1) 1143; PWR10BE-NEXT: lxv vs3, 256(r1) 1144; PWR10BE-NEXT: xvmaxdp vs3, v7, vs3 1145; PWR10BE-NEXT: xvmaxdp vs2, v9, vs2 1146; PWR10BE-NEXT: xvmaxdp vs0, v8, vs0 1147; PWR10BE-NEXT: xvmaxdp vs0, vs7, vs0 1148; PWR10BE-NEXT: xvmaxdp vs1, vs6, vs1 1149; PWR10BE-NEXT: xvmaxdp vs2, vs5, vs2 1150; PWR10BE-NEXT: xvmaxdp vs3, vs4, vs3 1151; PWR10BE-NEXT: xvmaxdp vs2, vs3, vs2 1152; PWR10BE-NEXT: xvmaxdp vs0, vs1, vs0 1153; PWR10BE-NEXT: xvmaxdp vs0, vs0, vs2 1154; PWR10BE-NEXT: xxswapd vs1, vs0 1155; PWR10BE-NEXT: xvmaxdp vs1, vs0, vs1 1156; PWR10BE-NEXT: # kill: def $f1 killed $f1 killed $vsl1 1157; PWR10BE-NEXT: blr 1158entry: 1159 %0 = call fast double @llvm.vector.reduce.fmax.v32f64(<32 x double> %a) 1160 ret double %0 1161} 1162 1163declare double @llvm.vector.reduce.fmax.v2f64(<2 x double>) #0 1164declare double @llvm.vector.reduce.fmax.v4f64(<4 x double>) #0 1165declare double @llvm.vector.reduce.fmax.v8f64(<8 x double>) #0 1166declare double @llvm.vector.reduce.fmax.v16f64(<16 x double>) #0 1167declare double @llvm.vector.reduce.fmax.v32f64(<32 x double>) #0 1168 1169attributes #0 = { nounwind } 1170