1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs \ 3; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s 4; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs \ 5; RUN: -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s \ 6; RUN: -check-prefix=CHECK-P8 7 8; Function Attrs: norecurse nounwind 9define dso_local void @qpAdd(fp128* nocapture readonly %a, fp128* nocapture %res) { 10; CHECK-LABEL: qpAdd: 11; CHECK: # %bb.0: # %entry 12; CHECK-NEXT: lxv v2, 0(r3) 13; CHECK-NEXT: xsaddqp v2, v2, v2 14; CHECK-NEXT: stxv v2, 0(r4) 15; CHECK-NEXT: blr 16; 17; CHECK-P8-LABEL: qpAdd: 18; CHECK-P8: # %bb.0: # %entry 19; CHECK-P8-NEXT: mflr r0 20; CHECK-P8-NEXT: .cfi_def_cfa_offset 48 21; CHECK-P8-NEXT: .cfi_offset lr, 16 22; CHECK-P8-NEXT: .cfi_offset r30, -16 23; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill 24; CHECK-P8-NEXT: std r0, 16(r1) 25; CHECK-P8-NEXT: stdu r1, -48(r1) 26; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 27; CHECK-P8-NEXT: mr r30, r4 28; CHECK-P8-NEXT: xxswapd v2, vs0 29; CHECK-P8-NEXT: vmr v3, v2 30; CHECK-P8-NEXT: bl __addkf3 31; CHECK-P8-NEXT: nop 32; CHECK-P8-NEXT: xxswapd vs0, v2 33; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 34; CHECK-P8-NEXT: addi r1, r1, 48 35; CHECK-P8-NEXT: ld r0, 16(r1) 36; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 37; CHECK-P8-NEXT: mtlr r0 38; CHECK-P8-NEXT: blr 39entry: 40 %0 = load fp128, fp128* %a, align 16 41 %add = fadd fp128 %0, %0 42 store fp128 %add, fp128* %res, align 16 43 ret void 44} 45 46; Function Attrs: norecurse nounwind 47define dso_local void @qpSub(fp128* nocapture readonly %a, fp128* nocapture %res) { 48; CHECK-LABEL: qpSub: 49; CHECK: # %bb.0: # %entry 50; CHECK-NEXT: lxv v2, 0(r3) 51; CHECK-NEXT: xssubqp v2, v2, v2 52; CHECK-NEXT: stxv v2, 0(r4) 53; CHECK-NEXT: blr 54; 55; CHECK-P8-LABEL: qpSub: 56; CHECK-P8: # %bb.0: # %entry 57; CHECK-P8-NEXT: mflr r0 58; CHECK-P8-NEXT: .cfi_def_cfa_offset 48 59; CHECK-P8-NEXT: .cfi_offset lr, 16 60; CHECK-P8-NEXT: .cfi_offset r30, -16 61; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill 62; CHECK-P8-NEXT: std r0, 16(r1) 63; CHECK-P8-NEXT: stdu r1, -48(r1) 64; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 65; CHECK-P8-NEXT: mr r30, r4 66; CHECK-P8-NEXT: xxswapd v2, vs0 67; CHECK-P8-NEXT: vmr v3, v2 68; CHECK-P8-NEXT: bl __subkf3 69; CHECK-P8-NEXT: nop 70; CHECK-P8-NEXT: xxswapd vs0, v2 71; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 72; CHECK-P8-NEXT: addi r1, r1, 48 73; CHECK-P8-NEXT: ld r0, 16(r1) 74; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 75; CHECK-P8-NEXT: mtlr r0 76; CHECK-P8-NEXT: blr 77entry: 78 %0 = load fp128, fp128* %a, align 16 79 %sub = fsub fp128 %0, %0 80 store fp128 %sub, fp128* %res, align 16 81 ret void 82} 83 84; Function Attrs: norecurse nounwind 85define dso_local void @qpMul(fp128* nocapture readonly %a, fp128* nocapture %res) { 86; CHECK-LABEL: qpMul: 87; CHECK: # %bb.0: # %entry 88; CHECK-NEXT: lxv v2, 0(r3) 89; CHECK-NEXT: xsmulqp v2, v2, v2 90; CHECK-NEXT: stxv v2, 0(r4) 91; CHECK-NEXT: blr 92; 93; CHECK-P8-LABEL: qpMul: 94; CHECK-P8: # %bb.0: # %entry 95; CHECK-P8-NEXT: mflr r0 96; CHECK-P8-NEXT: .cfi_def_cfa_offset 48 97; CHECK-P8-NEXT: .cfi_offset lr, 16 98; CHECK-P8-NEXT: .cfi_offset r30, -16 99; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill 100; CHECK-P8-NEXT: std r0, 16(r1) 101; CHECK-P8-NEXT: stdu r1, -48(r1) 102; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 103; CHECK-P8-NEXT: mr r30, r4 104; CHECK-P8-NEXT: xxswapd v2, vs0 105; CHECK-P8-NEXT: vmr v3, v2 106; CHECK-P8-NEXT: bl __mulkf3 107; CHECK-P8-NEXT: nop 108; CHECK-P8-NEXT: xxswapd vs0, v2 109; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 110; CHECK-P8-NEXT: addi r1, r1, 48 111; CHECK-P8-NEXT: ld r0, 16(r1) 112; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 113; CHECK-P8-NEXT: mtlr r0 114; CHECK-P8-NEXT: blr 115entry: 116 %0 = load fp128, fp128* %a, align 16 117 %mul = fmul fp128 %0, %0 118 store fp128 %mul, fp128* %res, align 16 119 ret void 120} 121 122; Function Attrs: norecurse nounwind 123define dso_local void @qpDiv(fp128* nocapture readonly %a, fp128* nocapture %res) { 124; CHECK-LABEL: qpDiv: 125; CHECK: # %bb.0: # %entry 126; CHECK-NEXT: lxv v2, 0(r3) 127; CHECK-NEXT: xsdivqp v2, v2, v2 128; CHECK-NEXT: stxv v2, 0(r4) 129; CHECK-NEXT: blr 130; 131; CHECK-P8-LABEL: qpDiv: 132; CHECK-P8: # %bb.0: # %entry 133; CHECK-P8-NEXT: mflr r0 134; CHECK-P8-NEXT: .cfi_def_cfa_offset 48 135; CHECK-P8-NEXT: .cfi_offset lr, 16 136; CHECK-P8-NEXT: .cfi_offset r30, -16 137; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill 138; CHECK-P8-NEXT: std r0, 16(r1) 139; CHECK-P8-NEXT: stdu r1, -48(r1) 140; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 141; CHECK-P8-NEXT: mr r30, r4 142; CHECK-P8-NEXT: xxswapd v2, vs0 143; CHECK-P8-NEXT: vmr v3, v2 144; CHECK-P8-NEXT: bl __divkf3 145; CHECK-P8-NEXT: nop 146; CHECK-P8-NEXT: xxswapd vs0, v2 147; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 148; CHECK-P8-NEXT: addi r1, r1, 48 149; CHECK-P8-NEXT: ld r0, 16(r1) 150; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 151; CHECK-P8-NEXT: mtlr r0 152; CHECK-P8-NEXT: blr 153entry: 154 %0 = load fp128, fp128* %a, align 16 155 %div = fdiv fp128 %0, %0 156 store fp128 %div, fp128* %res, align 16 157 ret void 158} 159 160define dso_local void @testLdNSt(i8* nocapture readonly %PtrC, fp128* nocapture %PtrF) { 161; CHECK-LABEL: testLdNSt: 162; CHECK: # %bb.0: # %entry 163; CHECK-NEXT: li r5, 4 164; CHECK-NEXT: lxvx vs0, r3, r5 165; CHECK-NEXT: li r3, 8 166; CHECK-NEXT: stxvx vs0, r4, r3 167; CHECK-NEXT: blr 168; 169; CHECK-P8-LABEL: testLdNSt: 170; CHECK-P8: # %bb.0: # %entry 171; CHECK-P8-NEXT: addi r3, r3, 4 172; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 173; CHECK-P8-NEXT: addi r3, r4, 8 174; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 175; CHECK-P8-NEXT: blr 176entry: 177 %add.ptr = getelementptr inbounds i8, i8* %PtrC, i64 4 178 %0 = bitcast i8* %add.ptr to fp128* 179 %1 = load fp128, fp128* %0, align 16 180 %2 = bitcast fp128* %PtrF to i8* 181 %add.ptr1 = getelementptr inbounds i8, i8* %2, i64 8 182 %3 = bitcast i8* %add.ptr1 to fp128* 183 store fp128 %1, fp128* %3, align 16 184 ret void 185} 186 187define dso_local void @qpSqrt(fp128* nocapture readonly %a, fp128* nocapture %res) { 188; CHECK-LABEL: qpSqrt: 189; CHECK: # %bb.0: # %entry 190; CHECK-NEXT: lxv v2, 0(r3) 191; CHECK-NEXT: xssqrtqp v2, v2 192; CHECK-NEXT: stxv v2, 0(r4) 193; CHECK-NEXT: blr 194; 195; CHECK-P8-LABEL: qpSqrt: 196; CHECK-P8: # %bb.0: # %entry 197; CHECK-P8-NEXT: mflr r0 198; CHECK-P8-NEXT: .cfi_def_cfa_offset 48 199; CHECK-P8-NEXT: .cfi_offset lr, 16 200; CHECK-P8-NEXT: .cfi_offset r30, -16 201; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill 202; CHECK-P8-NEXT: std r0, 16(r1) 203; CHECK-P8-NEXT: stdu r1, -48(r1) 204; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 205; CHECK-P8-NEXT: mr r30, r4 206; CHECK-P8-NEXT: xxswapd v2, vs0 207; CHECK-P8-NEXT: bl sqrtf128 208; CHECK-P8-NEXT: nop 209; CHECK-P8-NEXT: xxswapd vs0, v2 210; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 211; CHECK-P8-NEXT: addi r1, r1, 48 212; CHECK-P8-NEXT: ld r0, 16(r1) 213; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 214; CHECK-P8-NEXT: mtlr r0 215; CHECK-P8-NEXT: blr 216entry: 217 %0 = load fp128, fp128* %a, align 16 218 %1 = tail call fp128 @llvm.sqrt.f128(fp128 %0) 219 store fp128 %1, fp128* %res, align 16 220 ret void 221 222} 223declare fp128 @llvm.sqrt.f128(fp128 %Val) 224 225define dso_local void @qpCpsgn(fp128* nocapture readonly %a, fp128* nocapture readonly %b, 226; CHECK-LABEL: qpCpsgn: 227; CHECK: # %bb.0: # %entry 228; CHECK-NEXT: lxv v2, 0(r3) 229; CHECK-NEXT: lxv v3, 0(r4) 230; CHECK-NEXT: xscpsgnqp v2, v3, v2 231; CHECK-NEXT: stxv v2, 0(r5) 232; CHECK-NEXT: blr 233; 234; CHECK-P8-LABEL: qpCpsgn: 235; CHECK-P8: # %bb.0: # %entry 236; CHECK-P8-NEXT: lxvd2x vs0, 0, r4 237; CHECK-P8-NEXT: addi r4, r1, -16 238; CHECK-P8-NEXT: stxvd2x vs0, 0, r4 239; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 240; CHECK-P8-NEXT: addi r3, r1, -32 241; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 242; CHECK-P8-NEXT: lbz r4, -1(r1) 243; CHECK-P8-NEXT: lbz r6, -17(r1) 244; CHECK-P8-NEXT: rlwimi r6, r4, 0, 0, 24 245; CHECK-P8-NEXT: stb r6, -17(r1) 246; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 247; CHECK-P8-NEXT: stxvd2x vs0, 0, r5 248; CHECK-P8-NEXT: blr 249 fp128* nocapture %res) { 250entry: 251 %0 = load fp128, fp128* %a, align 16 252 %1 = load fp128, fp128* %b, align 16 253 %2 = tail call fp128 @llvm.copysign.f128(fp128 %0, fp128 %1) 254 store fp128 %2, fp128* %res, align 16 255 ret void 256 257} 258declare fp128 @llvm.copysign.f128(fp128 %Mag, fp128 %Sgn) 259 260define dso_local void @qpAbs(fp128* nocapture readonly %a, fp128* nocapture %res) { 261; CHECK-LABEL: qpAbs: 262; CHECK: # %bb.0: # %entry 263; CHECK-NEXT: lxv v2, 0(r3) 264; CHECK-NEXT: xsabsqp v2, v2 265; CHECK-NEXT: stxv v2, 0(r4) 266; CHECK-NEXT: blr 267; 268; CHECK-P8-LABEL: qpAbs: 269; CHECK-P8: # %bb.0: # %entry 270; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 271; CHECK-P8-NEXT: addi r3, r1, -16 272; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 273; CHECK-P8-NEXT: lbz r5, -1(r1) 274; CHECK-P8-NEXT: clrlwi r5, r5, 25 275; CHECK-P8-NEXT: stb r5, -1(r1) 276; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 277; CHECK-P8-NEXT: stxvd2x vs0, 0, r4 278; CHECK-P8-NEXT: blr 279entry: 280 %0 = load fp128, fp128* %a, align 16 281 %1 = tail call fp128 @llvm.fabs.f128(fp128 %0) 282 store fp128 %1, fp128* %res, align 16 283 ret void 284 285} 286declare fp128 @llvm.fabs.f128(fp128 %Val) 287 288define dso_local void @qpNAbs(fp128* nocapture readonly %a, fp128* nocapture %res) { 289; CHECK-LABEL: qpNAbs: 290; CHECK: # %bb.0: # %entry 291; CHECK-NEXT: lxv v2, 0(r3) 292; CHECK-NEXT: xsnabsqp v2, v2 293; CHECK-NEXT: stxv v2, 0(r4) 294; CHECK-NEXT: blr 295; 296; CHECK-P8-LABEL: qpNAbs: 297; CHECK-P8: # %bb.0: # %entry 298; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 299; CHECK-P8-NEXT: addi r3, r1, -32 300; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 301; CHECK-P8-NEXT: lbz r5, -17(r1) 302; CHECK-P8-NEXT: clrlwi r5, r5, 25 303; CHECK-P8-NEXT: stb r5, -17(r1) 304; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 305; CHECK-P8-NEXT: addi r3, r1, -16 306; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 307; CHECK-P8-NEXT: lbz r5, -1(r1) 308; CHECK-P8-NEXT: xori r5, r5, 128 309; CHECK-P8-NEXT: stb r5, -1(r1) 310; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 311; CHECK-P8-NEXT: stxvd2x vs0, 0, r4 312; CHECK-P8-NEXT: blr 313entry: 314 %0 = load fp128, fp128* %a, align 16 315 %1 = tail call fp128 @llvm.fabs.f128(fp128 %0) 316 %neg = fsub fp128 0xL00000000000000008000000000000000, %1 317 store fp128 %neg, fp128* %res, align 16 318 ret void 319 320} 321 322define dso_local void @qpNeg(fp128* nocapture readonly %a, fp128* nocapture %res) { 323; CHECK-LABEL: qpNeg: 324; CHECK: # %bb.0: # %entry 325; CHECK-NEXT: lxv v2, 0(r3) 326; CHECK-NEXT: xsnegqp v2, v2 327; CHECK-NEXT: stxv v2, 0(r4) 328; CHECK-NEXT: blr 329; 330; CHECK-P8-LABEL: qpNeg: 331; CHECK-P8: # %bb.0: # %entry 332; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 333; CHECK-P8-NEXT: addi r3, r1, -16 334; CHECK-P8-NEXT: stxvd2x vs0, 0, r3 335; CHECK-P8-NEXT: lbz r5, -1(r1) 336; CHECK-P8-NEXT: xori r5, r5, 128 337; CHECK-P8-NEXT: stb r5, -1(r1) 338; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 339; CHECK-P8-NEXT: stxvd2x vs0, 0, r4 340; CHECK-P8-NEXT: blr 341entry: 342 %0 = load fp128, fp128* %a, align 16 343 %sub = fsub fp128 0xL00000000000000008000000000000000, %0 344 store fp128 %sub, fp128* %res, align 16 345 ret void 346 347} 348 349define fp128 @qp_sin(fp128* nocapture readonly %a) { 350; CHECK-LABEL: qp_sin: 351; CHECK: # %bb.0: # %entry 352; CHECK-NEXT: mflr r0 353; CHECK-NEXT: std r0, 16(r1) 354; CHECK-NEXT: stdu r1, -32(r1) 355; CHECK-NEXT: .cfi_def_cfa_offset 32 356; CHECK-NEXT: .cfi_offset lr, 16 357; CHECK-NEXT: lxv v2, 0(r3) 358; CHECK-NEXT: bl sinf128 359; CHECK-NEXT: nop 360; CHECK-NEXT: addi r1, r1, 32 361; CHECK-NEXT: ld r0, 16(r1) 362; CHECK-NEXT: mtlr r0 363; CHECK-NEXT: blr 364; 365; CHECK-P8-LABEL: qp_sin: 366; CHECK-P8: # %bb.0: # %entry 367; CHECK-P8-NEXT: mflr r0 368; CHECK-P8-NEXT: std r0, 16(r1) 369; CHECK-P8-NEXT: stdu r1, -32(r1) 370; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 371; CHECK-P8-NEXT: .cfi_offset lr, 16 372; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 373; CHECK-P8-NEXT: xxswapd v2, vs0 374; CHECK-P8-NEXT: bl sinf128 375; CHECK-P8-NEXT: nop 376; CHECK-P8-NEXT: addi r1, r1, 32 377; CHECK-P8-NEXT: ld r0, 16(r1) 378; CHECK-P8-NEXT: mtlr r0 379; CHECK-P8-NEXT: blr 380entry: 381 %0 = load fp128, fp128* %a, align 16 382 %1 = tail call fp128 @llvm.sin.f128(fp128 %0) 383 ret fp128 %1 384} 385declare fp128 @llvm.sin.f128(fp128 %Val) 386 387define fp128 @qp_cos(fp128* nocapture readonly %a) { 388; CHECK-LABEL: qp_cos: 389; CHECK: # %bb.0: # %entry 390; CHECK-NEXT: mflr r0 391; CHECK-NEXT: std r0, 16(r1) 392; CHECK-NEXT: stdu r1, -32(r1) 393; CHECK-NEXT: .cfi_def_cfa_offset 32 394; CHECK-NEXT: .cfi_offset lr, 16 395; CHECK-NEXT: lxv v2, 0(r3) 396; CHECK-NEXT: bl cosf128 397; CHECK-NEXT: nop 398; CHECK-NEXT: addi r1, r1, 32 399; CHECK-NEXT: ld r0, 16(r1) 400; CHECK-NEXT: mtlr r0 401; CHECK-NEXT: blr 402; 403; CHECK-P8-LABEL: qp_cos: 404; CHECK-P8: # %bb.0: # %entry 405; CHECK-P8-NEXT: mflr r0 406; CHECK-P8-NEXT: std r0, 16(r1) 407; CHECK-P8-NEXT: stdu r1, -32(r1) 408; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 409; CHECK-P8-NEXT: .cfi_offset lr, 16 410; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 411; CHECK-P8-NEXT: xxswapd v2, vs0 412; CHECK-P8-NEXT: bl cosf128 413; CHECK-P8-NEXT: nop 414; CHECK-P8-NEXT: addi r1, r1, 32 415; CHECK-P8-NEXT: ld r0, 16(r1) 416; CHECK-P8-NEXT: mtlr r0 417; CHECK-P8-NEXT: blr 418entry: 419 %0 = load fp128, fp128* %a, align 16 420 %1 = tail call fp128 @llvm.cos.f128(fp128 %0) 421 ret fp128 %1 422} 423declare fp128 @llvm.cos.f128(fp128 %Val) 424 425define fp128 @qp_log(fp128* nocapture readonly %a) { 426; CHECK-LABEL: qp_log: 427; CHECK: # %bb.0: # %entry 428; CHECK-NEXT: mflr r0 429; CHECK-NEXT: std r0, 16(r1) 430; CHECK-NEXT: stdu r1, -32(r1) 431; CHECK-NEXT: .cfi_def_cfa_offset 32 432; CHECK-NEXT: .cfi_offset lr, 16 433; CHECK-NEXT: lxv v2, 0(r3) 434; CHECK-NEXT: bl logf128 435; CHECK-NEXT: nop 436; CHECK-NEXT: addi r1, r1, 32 437; CHECK-NEXT: ld r0, 16(r1) 438; CHECK-NEXT: mtlr r0 439; CHECK-NEXT: blr 440; 441; CHECK-P8-LABEL: qp_log: 442; CHECK-P8: # %bb.0: # %entry 443; CHECK-P8-NEXT: mflr r0 444; CHECK-P8-NEXT: std r0, 16(r1) 445; CHECK-P8-NEXT: stdu r1, -32(r1) 446; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 447; CHECK-P8-NEXT: .cfi_offset lr, 16 448; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 449; CHECK-P8-NEXT: xxswapd v2, vs0 450; CHECK-P8-NEXT: bl logf128 451; CHECK-P8-NEXT: nop 452; CHECK-P8-NEXT: addi r1, r1, 32 453; CHECK-P8-NEXT: ld r0, 16(r1) 454; CHECK-P8-NEXT: mtlr r0 455; CHECK-P8-NEXT: blr 456entry: 457 %0 = load fp128, fp128* %a, align 16 458 %1 = tail call fp128 @llvm.log.f128(fp128 %0) 459 ret fp128 %1 460} 461declare fp128 @llvm.log.f128(fp128 %Val) 462 463define fp128 @qp_log10(fp128* nocapture readonly %a) { 464; CHECK-LABEL: qp_log10: 465; CHECK: # %bb.0: # %entry 466; CHECK-NEXT: mflr r0 467; CHECK-NEXT: std r0, 16(r1) 468; CHECK-NEXT: stdu r1, -32(r1) 469; CHECK-NEXT: .cfi_def_cfa_offset 32 470; CHECK-NEXT: .cfi_offset lr, 16 471; CHECK-NEXT: lxv v2, 0(r3) 472; CHECK-NEXT: bl log10f128 473; CHECK-NEXT: nop 474; CHECK-NEXT: addi r1, r1, 32 475; CHECK-NEXT: ld r0, 16(r1) 476; CHECK-NEXT: mtlr r0 477; CHECK-NEXT: blr 478; 479; CHECK-P8-LABEL: qp_log10: 480; CHECK-P8: # %bb.0: # %entry 481; CHECK-P8-NEXT: mflr r0 482; CHECK-P8-NEXT: std r0, 16(r1) 483; CHECK-P8-NEXT: stdu r1, -32(r1) 484; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 485; CHECK-P8-NEXT: .cfi_offset lr, 16 486; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 487; CHECK-P8-NEXT: xxswapd v2, vs0 488; CHECK-P8-NEXT: bl log10f128 489; CHECK-P8-NEXT: nop 490; CHECK-P8-NEXT: addi r1, r1, 32 491; CHECK-P8-NEXT: ld r0, 16(r1) 492; CHECK-P8-NEXT: mtlr r0 493; CHECK-P8-NEXT: blr 494entry: 495 %0 = load fp128, fp128* %a, align 16 496 %1 = tail call fp128 @llvm.log10.f128(fp128 %0) 497 ret fp128 %1 498} 499declare fp128 @llvm.log10.f128(fp128 %Val) 500 501define fp128 @qp_log2(fp128* nocapture readonly %a) { 502; CHECK-LABEL: qp_log2: 503; CHECK: # %bb.0: # %entry 504; CHECK-NEXT: mflr r0 505; CHECK-NEXT: std r0, 16(r1) 506; CHECK-NEXT: stdu r1, -32(r1) 507; CHECK-NEXT: .cfi_def_cfa_offset 32 508; CHECK-NEXT: .cfi_offset lr, 16 509; CHECK-NEXT: lxv v2, 0(r3) 510; CHECK-NEXT: bl log2f128 511; CHECK-NEXT: nop 512; CHECK-NEXT: addi r1, r1, 32 513; CHECK-NEXT: ld r0, 16(r1) 514; CHECK-NEXT: mtlr r0 515; CHECK-NEXT: blr 516; 517; CHECK-P8-LABEL: qp_log2: 518; CHECK-P8: # %bb.0: # %entry 519; CHECK-P8-NEXT: mflr r0 520; CHECK-P8-NEXT: std r0, 16(r1) 521; CHECK-P8-NEXT: stdu r1, -32(r1) 522; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 523; CHECK-P8-NEXT: .cfi_offset lr, 16 524; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 525; CHECK-P8-NEXT: xxswapd v2, vs0 526; CHECK-P8-NEXT: bl log2f128 527; CHECK-P8-NEXT: nop 528; CHECK-P8-NEXT: addi r1, r1, 32 529; CHECK-P8-NEXT: ld r0, 16(r1) 530; CHECK-P8-NEXT: mtlr r0 531; CHECK-P8-NEXT: blr 532entry: 533 %0 = load fp128, fp128* %a, align 16 534 %1 = tail call fp128 @llvm.log2.f128(fp128 %0) 535 ret fp128 %1 536} 537declare fp128 @llvm.log2.f128(fp128 %Val) 538 539define fp128 @qp_minnum(fp128* nocapture readonly %a, 540; CHECK-LABEL: qp_minnum: 541; CHECK: # %bb.0: # %entry 542; CHECK-NEXT: mflr r0 543; CHECK-NEXT: std r0, 16(r1) 544; CHECK-NEXT: stdu r1, -32(r1) 545; CHECK-NEXT: .cfi_def_cfa_offset 32 546; CHECK-NEXT: .cfi_offset lr, 16 547; CHECK-NEXT: lxv v2, 0(r3) 548; CHECK-NEXT: lxv v3, 0(r4) 549; CHECK-NEXT: bl fminf128 550; CHECK-NEXT: nop 551; CHECK-NEXT: addi r1, r1, 32 552; CHECK-NEXT: ld r0, 16(r1) 553; CHECK-NEXT: mtlr r0 554; CHECK-NEXT: blr 555; 556; CHECK-P8-LABEL: qp_minnum: 557; CHECK-P8: # %bb.0: # %entry 558; CHECK-P8-NEXT: mflr r0 559; CHECK-P8-NEXT: std r0, 16(r1) 560; CHECK-P8-NEXT: stdu r1, -32(r1) 561; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 562; CHECK-P8-NEXT: .cfi_offset lr, 16 563; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 564; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 565; CHECK-P8-NEXT: xxswapd v2, vs0 566; CHECK-P8-NEXT: xxswapd v3, vs1 567; CHECK-P8-NEXT: bl fminf128 568; CHECK-P8-NEXT: nop 569; CHECK-P8-NEXT: addi r1, r1, 32 570; CHECK-P8-NEXT: ld r0, 16(r1) 571; CHECK-P8-NEXT: mtlr r0 572; CHECK-P8-NEXT: blr 573 fp128* nocapture readonly %b) { 574entry: 575 %0 = load fp128, fp128* %a, align 16 576 %1 = load fp128, fp128* %b, align 16 577 %2 = tail call fp128 @llvm.minnum.f128(fp128 %0, fp128 %1) 578 ret fp128 %2 579} 580declare fp128 @llvm.minnum.f128(fp128 %Val0, fp128 %Val1) 581 582define fp128 @qp_maxnum(fp128* nocapture readonly %a, 583; CHECK-LABEL: qp_maxnum: 584; CHECK: # %bb.0: # %entry 585; CHECK-NEXT: mflr r0 586; CHECK-NEXT: std r0, 16(r1) 587; CHECK-NEXT: stdu r1, -32(r1) 588; CHECK-NEXT: .cfi_def_cfa_offset 32 589; CHECK-NEXT: .cfi_offset lr, 16 590; CHECK-NEXT: lxv v2, 0(r3) 591; CHECK-NEXT: lxv v3, 0(r4) 592; CHECK-NEXT: bl fmaxf128 593; CHECK-NEXT: nop 594; CHECK-NEXT: addi r1, r1, 32 595; CHECK-NEXT: ld r0, 16(r1) 596; CHECK-NEXT: mtlr r0 597; CHECK-NEXT: blr 598; 599; CHECK-P8-LABEL: qp_maxnum: 600; CHECK-P8: # %bb.0: # %entry 601; CHECK-P8-NEXT: mflr r0 602; CHECK-P8-NEXT: std r0, 16(r1) 603; CHECK-P8-NEXT: stdu r1, -32(r1) 604; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 605; CHECK-P8-NEXT: .cfi_offset lr, 16 606; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 607; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 608; CHECK-P8-NEXT: xxswapd v2, vs0 609; CHECK-P8-NEXT: xxswapd v3, vs1 610; CHECK-P8-NEXT: bl fmaxf128 611; CHECK-P8-NEXT: nop 612; CHECK-P8-NEXT: addi r1, r1, 32 613; CHECK-P8-NEXT: ld r0, 16(r1) 614; CHECK-P8-NEXT: mtlr r0 615; CHECK-P8-NEXT: blr 616 fp128* nocapture readonly %b) { 617entry: 618 %0 = load fp128, fp128* %a, align 16 619 %1 = load fp128, fp128* %b, align 16 620 %2 = tail call fp128 @llvm.maxnum.f128(fp128 %0, fp128 %1) 621 ret fp128 %2 622} 623declare fp128 @llvm.maxnum.f128(fp128 %Val0, fp128 %Val1) 624 625define fp128 @qp_pow(fp128* nocapture readonly %a, 626; CHECK-LABEL: qp_pow: 627; CHECK: # %bb.0: # %entry 628; CHECK-NEXT: mflr r0 629; CHECK-NEXT: std r0, 16(r1) 630; CHECK-NEXT: stdu r1, -32(r1) 631; CHECK-NEXT: .cfi_def_cfa_offset 32 632; CHECK-NEXT: .cfi_offset lr, 16 633; CHECK-NEXT: lxv v2, 0(r3) 634; CHECK-NEXT: lxv v3, 0(r4) 635; CHECK-NEXT: bl powf128 636; CHECK-NEXT: nop 637; CHECK-NEXT: addi r1, r1, 32 638; CHECK-NEXT: ld r0, 16(r1) 639; CHECK-NEXT: mtlr r0 640; CHECK-NEXT: blr 641; 642; CHECK-P8-LABEL: qp_pow: 643; CHECK-P8: # %bb.0: # %entry 644; CHECK-P8-NEXT: mflr r0 645; CHECK-P8-NEXT: std r0, 16(r1) 646; CHECK-P8-NEXT: stdu r1, -32(r1) 647; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 648; CHECK-P8-NEXT: .cfi_offset lr, 16 649; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 650; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 651; CHECK-P8-NEXT: xxswapd v2, vs0 652; CHECK-P8-NEXT: xxswapd v3, vs1 653; CHECK-P8-NEXT: bl powf128 654; CHECK-P8-NEXT: nop 655; CHECK-P8-NEXT: addi r1, r1, 32 656; CHECK-P8-NEXT: ld r0, 16(r1) 657; CHECK-P8-NEXT: mtlr r0 658; CHECK-P8-NEXT: blr 659 fp128* nocapture readonly %b) { 660entry: 661 %0 = load fp128, fp128* %a, align 16 662 %1 = load fp128, fp128* %b, align 16 663 %2 = tail call fp128 @llvm.pow.f128(fp128 %0, fp128 %1) 664 ret fp128 %2 665} 666declare fp128 @llvm.pow.f128(fp128 %Val, fp128 %Power) 667 668define fp128 @qp_exp(fp128* nocapture readonly %a) { 669; CHECK-LABEL: qp_exp: 670; CHECK: # %bb.0: # %entry 671; CHECK-NEXT: mflr r0 672; CHECK-NEXT: std r0, 16(r1) 673; CHECK-NEXT: stdu r1, -32(r1) 674; CHECK-NEXT: .cfi_def_cfa_offset 32 675; CHECK-NEXT: .cfi_offset lr, 16 676; CHECK-NEXT: lxv v2, 0(r3) 677; CHECK-NEXT: bl expf128 678; CHECK-NEXT: nop 679; CHECK-NEXT: addi r1, r1, 32 680; CHECK-NEXT: ld r0, 16(r1) 681; CHECK-NEXT: mtlr r0 682; CHECK-NEXT: blr 683; 684; CHECK-P8-LABEL: qp_exp: 685; CHECK-P8: # %bb.0: # %entry 686; CHECK-P8-NEXT: mflr r0 687; CHECK-P8-NEXT: std r0, 16(r1) 688; CHECK-P8-NEXT: stdu r1, -32(r1) 689; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 690; CHECK-P8-NEXT: .cfi_offset lr, 16 691; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 692; CHECK-P8-NEXT: xxswapd v2, vs0 693; CHECK-P8-NEXT: bl expf128 694; CHECK-P8-NEXT: nop 695; CHECK-P8-NEXT: addi r1, r1, 32 696; CHECK-P8-NEXT: ld r0, 16(r1) 697; CHECK-P8-NEXT: mtlr r0 698; CHECK-P8-NEXT: blr 699entry: 700 %0 = load fp128, fp128* %a, align 16 701 %1 = tail call fp128 @llvm.exp.f128(fp128 %0) 702 ret fp128 %1 703} 704declare fp128 @llvm.exp.f128(fp128 %Val) 705 706define fp128 @qp_exp2(fp128* nocapture readonly %a) { 707; CHECK-LABEL: qp_exp2: 708; CHECK: # %bb.0: # %entry 709; CHECK-NEXT: mflr r0 710; CHECK-NEXT: std r0, 16(r1) 711; CHECK-NEXT: stdu r1, -32(r1) 712; CHECK-NEXT: .cfi_def_cfa_offset 32 713; CHECK-NEXT: .cfi_offset lr, 16 714; CHECK-NEXT: lxv v2, 0(r3) 715; CHECK-NEXT: bl exp2f128 716; CHECK-NEXT: nop 717; CHECK-NEXT: addi r1, r1, 32 718; CHECK-NEXT: ld r0, 16(r1) 719; CHECK-NEXT: mtlr r0 720; CHECK-NEXT: blr 721; 722; CHECK-P8-LABEL: qp_exp2: 723; CHECK-P8: # %bb.0: # %entry 724; CHECK-P8-NEXT: mflr r0 725; CHECK-P8-NEXT: std r0, 16(r1) 726; CHECK-P8-NEXT: stdu r1, -32(r1) 727; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 728; CHECK-P8-NEXT: .cfi_offset lr, 16 729; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 730; CHECK-P8-NEXT: xxswapd v2, vs0 731; CHECK-P8-NEXT: bl exp2f128 732; CHECK-P8-NEXT: nop 733; CHECK-P8-NEXT: addi r1, r1, 32 734; CHECK-P8-NEXT: ld r0, 16(r1) 735; CHECK-P8-NEXT: mtlr r0 736; CHECK-P8-NEXT: blr 737entry: 738 %0 = load fp128, fp128* %a, align 16 739 %1 = tail call fp128 @llvm.exp2.f128(fp128 %0) 740 ret fp128 %1 741} 742declare fp128 @llvm.exp2.f128(fp128 %Val) 743 744define dso_local void @qp_powi(fp128* nocapture readonly %a, i32* nocapture readonly %b, 745; CHECK-LABEL: qp_powi: 746; CHECK: # %bb.0: # %entry 747; CHECK-NEXT: mflr r0 748; CHECK-NEXT: .cfi_def_cfa_offset 48 749; CHECK-NEXT: .cfi_offset lr, 16 750; CHECK-NEXT: .cfi_offset r30, -16 751; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill 752; CHECK-NEXT: std r0, 16(r1) 753; CHECK-NEXT: stdu r1, -48(r1) 754; CHECK-NEXT: lxv v2, 0(r3) 755; CHECK-NEXT: mr r30, r5 756; CHECK-NEXT: lwz r5, 0(r4) 757; CHECK-NEXT: bl __powikf2 758; CHECK-NEXT: nop 759; CHECK-NEXT: stxv v2, 0(r30) 760; CHECK-NEXT: addi r1, r1, 48 761; CHECK-NEXT: ld r0, 16(r1) 762; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 763; CHECK-NEXT: mtlr r0 764; CHECK-NEXT: blr 765; 766; CHECK-P8-LABEL: qp_powi: 767; CHECK-P8: # %bb.0: # %entry 768; CHECK-P8-NEXT: mflr r0 769; CHECK-P8-NEXT: .cfi_def_cfa_offset 48 770; CHECK-P8-NEXT: .cfi_offset lr, 16 771; CHECK-P8-NEXT: .cfi_offset r30, -16 772; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill 773; CHECK-P8-NEXT: std r0, 16(r1) 774; CHECK-P8-NEXT: stdu r1, -48(r1) 775; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 776; CHECK-P8-NEXT: lwz r3, 0(r4) 777; CHECK-P8-NEXT: mr r30, r5 778; CHECK-P8-NEXT: mr r5, r3 779; CHECK-P8-NEXT: xxswapd v2, vs0 780; CHECK-P8-NEXT: bl __powikf2 781; CHECK-P8-NEXT: nop 782; CHECK-P8-NEXT: xxswapd vs0, v2 783; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 784; CHECK-P8-NEXT: addi r1, r1, 48 785; CHECK-P8-NEXT: ld r0, 16(r1) 786; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 787; CHECK-P8-NEXT: mtlr r0 788; CHECK-P8-NEXT: blr 789 fp128* nocapture %res) { 790entry: 791 %0 = load fp128, fp128* %a, align 16 792 %1 = load i32, i32* %b, align 8 793 %2 = tail call fp128 @llvm.powi.f128.i32(fp128 %0, i32 %1) 794 store fp128 %2, fp128* %res, align 16 795 ret void 796} 797declare fp128 @llvm.powi.f128.i32(fp128 %Val, i32 %power) 798 799@a = common dso_local global fp128 0xL00000000000000000000000000000000, align 16 800@b = common dso_local global fp128 0xL00000000000000000000000000000000, align 16 801 802define fp128 @qp_frem() #0 { 803; CHECK-LABEL: qp_frem: 804; CHECK: # %bb.0: # %entry 805; CHECK-NEXT: mflr r0 806; CHECK-NEXT: std r0, 16(r1) 807; CHECK-NEXT: stdu r1, -32(r1) 808; CHECK-NEXT: .cfi_def_cfa_offset 32 809; CHECK-NEXT: .cfi_offset lr, 16 810; CHECK-NEXT: addis r3, r2, a@toc@ha 811; CHECK-NEXT: addi r3, r3, a@toc@l 812; CHECK-NEXT: lxv v2, 0(r3) 813; CHECK-NEXT: addis r3, r2, b@toc@ha 814; CHECK-NEXT: addi r3, r3, b@toc@l 815; CHECK-NEXT: lxv v3, 0(r3) 816; CHECK-NEXT: bl fmodf128 817; CHECK-NEXT: nop 818; CHECK-NEXT: addi r1, r1, 32 819; CHECK-NEXT: ld r0, 16(r1) 820; CHECK-NEXT: mtlr r0 821; CHECK-NEXT: blr 822; 823; CHECK-P8-LABEL: qp_frem: 824; CHECK-P8: # %bb.0: # %entry 825; CHECK-P8-NEXT: mflr r0 826; CHECK-P8-NEXT: std r0, 16(r1) 827; CHECK-P8-NEXT: stdu r1, -32(r1) 828; CHECK-P8-NEXT: .cfi_def_cfa_offset 32 829; CHECK-P8-NEXT: .cfi_offset lr, 16 830; CHECK-P8-NEXT: addis r3, r2, a@toc@ha 831; CHECK-P8-NEXT: addis r4, r2, b@toc@ha 832; CHECK-P8-NEXT: addi r3, r3, a@toc@l 833; CHECK-P8-NEXT: addi r4, r4, b@toc@l 834; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 835; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 836; CHECK-P8-NEXT: xxswapd v2, vs0 837; CHECK-P8-NEXT: xxswapd v3, vs1 838; CHECK-P8-NEXT: bl fmodf128 839; CHECK-P8-NEXT: nop 840; CHECK-P8-NEXT: addi r1, r1, 32 841; CHECK-P8-NEXT: ld r0, 16(r1) 842; CHECK-P8-NEXT: mtlr r0 843; CHECK-P8-NEXT: blr 844entry: 845 %0 = load fp128, fp128* @a, align 16 846 %1 = load fp128, fp128* @b, align 16 847 %rem = frem fp128 %0, %1 848 ret fp128 %rem 849} 850 851define dso_local void @qpCeil(fp128* nocapture readonly %a, fp128* nocapture %res) { 852; CHECK-LABEL: qpCeil: 853; CHECK: # %bb.0: # %entry 854; CHECK-NEXT: lxv v2, 0(r3) 855; CHECK-NEXT: xsrqpi 1, v2, v2, 2 856; CHECK-NEXT: stxv v2, 0(r4) 857; CHECK-NEXT: blr 858; 859; CHECK-P8-LABEL: qpCeil: 860; CHECK-P8: # %bb.0: # %entry 861; CHECK-P8-NEXT: mflr r0 862; CHECK-P8-NEXT: .cfi_def_cfa_offset 48 863; CHECK-P8-NEXT: .cfi_offset lr, 16 864; CHECK-P8-NEXT: .cfi_offset r30, -16 865; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill 866; CHECK-P8-NEXT: std r0, 16(r1) 867; CHECK-P8-NEXT: stdu r1, -48(r1) 868; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 869; CHECK-P8-NEXT: mr r30, r4 870; CHECK-P8-NEXT: xxswapd v2, vs0 871; CHECK-P8-NEXT: bl ceilf128 872; CHECK-P8-NEXT: nop 873; CHECK-P8-NEXT: xxswapd vs0, v2 874; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 875; CHECK-P8-NEXT: addi r1, r1, 48 876; CHECK-P8-NEXT: ld r0, 16(r1) 877; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 878; CHECK-P8-NEXT: mtlr r0 879; CHECK-P8-NEXT: blr 880entry: 881 %0 = load fp128, fp128* %a, align 16 882 %1 = tail call fp128 @llvm.ceil.f128(fp128 %0) 883 store fp128 %1, fp128* %res, align 16 884 ret void 885} 886declare fp128 @llvm.ceil.f128(fp128 %Val) 887 888define dso_local void @qpFloor(fp128* nocapture readonly %a, fp128* nocapture %res) { 889; CHECK-LABEL: qpFloor: 890; CHECK: # %bb.0: # %entry 891; CHECK-NEXT: lxv v2, 0(r3) 892; CHECK-NEXT: xsrqpi 1, v2, v2, 3 893; CHECK-NEXT: stxv v2, 0(r4) 894; CHECK-NEXT: blr 895; 896; CHECK-P8-LABEL: qpFloor: 897; CHECK-P8: # %bb.0: # %entry 898; CHECK-P8-NEXT: mflr r0 899; CHECK-P8-NEXT: .cfi_def_cfa_offset 48 900; CHECK-P8-NEXT: .cfi_offset lr, 16 901; CHECK-P8-NEXT: .cfi_offset r30, -16 902; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill 903; CHECK-P8-NEXT: std r0, 16(r1) 904; CHECK-P8-NEXT: stdu r1, -48(r1) 905; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 906; CHECK-P8-NEXT: mr r30, r4 907; CHECK-P8-NEXT: xxswapd v2, vs0 908; CHECK-P8-NEXT: bl floorf128 909; CHECK-P8-NEXT: nop 910; CHECK-P8-NEXT: xxswapd vs0, v2 911; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 912; CHECK-P8-NEXT: addi r1, r1, 48 913; CHECK-P8-NEXT: ld r0, 16(r1) 914; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 915; CHECK-P8-NEXT: mtlr r0 916; CHECK-P8-NEXT: blr 917entry: 918 %0 = load fp128, fp128* %a, align 16 919 %1 = tail call fp128 @llvm.floor.f128(fp128 %0) 920 store fp128 %1, fp128* %res, align 16 921 ret void 922} 923declare fp128 @llvm.floor.f128(fp128 %Val) 924 925define dso_local void @qpTrunc(fp128* nocapture readonly %a, fp128* nocapture %res) { 926; CHECK-LABEL: qpTrunc: 927; CHECK: # %bb.0: # %entry 928; CHECK-NEXT: lxv v2, 0(r3) 929; CHECK-NEXT: xsrqpi 1, v2, v2, 1 930; CHECK-NEXT: stxv v2, 0(r4) 931; CHECK-NEXT: blr 932; 933; CHECK-P8-LABEL: qpTrunc: 934; CHECK-P8: # %bb.0: # %entry 935; CHECK-P8-NEXT: mflr r0 936; CHECK-P8-NEXT: .cfi_def_cfa_offset 48 937; CHECK-P8-NEXT: .cfi_offset lr, 16 938; CHECK-P8-NEXT: .cfi_offset r30, -16 939; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill 940; CHECK-P8-NEXT: std r0, 16(r1) 941; CHECK-P8-NEXT: stdu r1, -48(r1) 942; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 943; CHECK-P8-NEXT: mr r30, r4 944; CHECK-P8-NEXT: xxswapd v2, vs0 945; CHECK-P8-NEXT: bl truncf128 946; CHECK-P8-NEXT: nop 947; CHECK-P8-NEXT: xxswapd vs0, v2 948; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 949; CHECK-P8-NEXT: addi r1, r1, 48 950; CHECK-P8-NEXT: ld r0, 16(r1) 951; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 952; CHECK-P8-NEXT: mtlr r0 953; CHECK-P8-NEXT: blr 954entry: 955 %0 = load fp128, fp128* %a, align 16 956 %1 = tail call fp128 @llvm.trunc.f128(fp128 %0) 957 store fp128 %1, fp128* %res, align 16 958 ret void 959} 960declare fp128 @llvm.trunc.f128(fp128 %Val) 961 962define dso_local void @qpRound(fp128* nocapture readonly %a, fp128* nocapture %res) { 963; CHECK-LABEL: qpRound: 964; CHECK: # %bb.0: # %entry 965; CHECK-NEXT: lxv v2, 0(r3) 966; CHECK-NEXT: xsrqpi 0, v2, v2, 0 967; CHECK-NEXT: stxv v2, 0(r4) 968; CHECK-NEXT: blr 969; 970; CHECK-P8-LABEL: qpRound: 971; CHECK-P8: # %bb.0: # %entry 972; CHECK-P8-NEXT: mflr r0 973; CHECK-P8-NEXT: .cfi_def_cfa_offset 48 974; CHECK-P8-NEXT: .cfi_offset lr, 16 975; CHECK-P8-NEXT: .cfi_offset r30, -16 976; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill 977; CHECK-P8-NEXT: std r0, 16(r1) 978; CHECK-P8-NEXT: stdu r1, -48(r1) 979; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 980; CHECK-P8-NEXT: mr r30, r4 981; CHECK-P8-NEXT: xxswapd v2, vs0 982; CHECK-P8-NEXT: bl roundf128 983; CHECK-P8-NEXT: nop 984; CHECK-P8-NEXT: xxswapd vs0, v2 985; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 986; CHECK-P8-NEXT: addi r1, r1, 48 987; CHECK-P8-NEXT: ld r0, 16(r1) 988; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 989; CHECK-P8-NEXT: mtlr r0 990; CHECK-P8-NEXT: blr 991entry: 992 %0 = load fp128, fp128* %a, align 16 993 %1 = tail call fp128 @llvm.round.f128(fp128 %0) 994 store fp128 %1, fp128* %res, align 16 995 ret void 996} 997declare fp128 @llvm.round.f128(fp128 %Val) 998 999define dso_local void @qpLRound(fp128* nocapture readonly %a, i32* nocapture %res) { 1000; CHECK-LABEL: qpLRound: 1001; CHECK: # %bb.0: # %entry 1002; CHECK-NEXT: mflr r0 1003; CHECK-NEXT: .cfi_def_cfa_offset 48 1004; CHECK-NEXT: .cfi_offset lr, 16 1005; CHECK-NEXT: .cfi_offset r30, -16 1006; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill 1007; CHECK-NEXT: std r0, 16(r1) 1008; CHECK-NEXT: stdu r1, -48(r1) 1009; CHECK-NEXT: lxv v2, 0(r3) 1010; CHECK-NEXT: mr r30, r4 1011; CHECK-NEXT: bl lroundf128 1012; CHECK-NEXT: nop 1013; CHECK-NEXT: stw r3, 0(r30) 1014; CHECK-NEXT: addi r1, r1, 48 1015; CHECK-NEXT: ld r0, 16(r1) 1016; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 1017; CHECK-NEXT: mtlr r0 1018; CHECK-NEXT: blr 1019; 1020; CHECK-P8-LABEL: qpLRound: 1021; CHECK-P8: # %bb.0: # %entry 1022; CHECK-P8-NEXT: mflr r0 1023; CHECK-P8-NEXT: .cfi_def_cfa_offset 48 1024; CHECK-P8-NEXT: .cfi_offset lr, 16 1025; CHECK-P8-NEXT: .cfi_offset r30, -16 1026; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill 1027; CHECK-P8-NEXT: std r0, 16(r1) 1028; CHECK-P8-NEXT: stdu r1, -48(r1) 1029; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 1030; CHECK-P8-NEXT: mr r30, r4 1031; CHECK-P8-NEXT: xxswapd v2, vs0 1032; CHECK-P8-NEXT: bl lroundf128 1033; CHECK-P8-NEXT: nop 1034; CHECK-P8-NEXT: stw r3, 0(r30) 1035; CHECK-P8-NEXT: addi r1, r1, 48 1036; CHECK-P8-NEXT: ld r0, 16(r1) 1037; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 1038; CHECK-P8-NEXT: mtlr r0 1039; CHECK-P8-NEXT: blr 1040entry: 1041 %0 = load fp128, fp128* %a, align 16 1042 %1 = tail call i32 @llvm.lround.f128(fp128 %0) 1043 store i32 %1, i32* %res, align 16 1044 ret void 1045} 1046declare i32 @llvm.lround.f128(fp128 %Val) 1047 1048define dso_local void @qpLLRound(fp128* nocapture readonly %a, i64* nocapture %res) { 1049; CHECK-LABEL: qpLLRound: 1050; CHECK: # %bb.0: # %entry 1051; CHECK-NEXT: mflr r0 1052; CHECK-NEXT: .cfi_def_cfa_offset 48 1053; CHECK-NEXT: .cfi_offset lr, 16 1054; CHECK-NEXT: .cfi_offset r30, -16 1055; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill 1056; CHECK-NEXT: std r0, 16(r1) 1057; CHECK-NEXT: stdu r1, -48(r1) 1058; CHECK-NEXT: lxv v2, 0(r3) 1059; CHECK-NEXT: mr r30, r4 1060; CHECK-NEXT: bl llroundf128 1061; CHECK-NEXT: nop 1062; CHECK-NEXT: std r3, 0(r30) 1063; CHECK-NEXT: addi r1, r1, 48 1064; CHECK-NEXT: ld r0, 16(r1) 1065; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 1066; CHECK-NEXT: mtlr r0 1067; CHECK-NEXT: blr 1068; 1069; CHECK-P8-LABEL: qpLLRound: 1070; CHECK-P8: # %bb.0: # %entry 1071; CHECK-P8-NEXT: mflr r0 1072; CHECK-P8-NEXT: .cfi_def_cfa_offset 48 1073; CHECK-P8-NEXT: .cfi_offset lr, 16 1074; CHECK-P8-NEXT: .cfi_offset r30, -16 1075; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill 1076; CHECK-P8-NEXT: std r0, 16(r1) 1077; CHECK-P8-NEXT: stdu r1, -48(r1) 1078; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 1079; CHECK-P8-NEXT: mr r30, r4 1080; CHECK-P8-NEXT: xxswapd v2, vs0 1081; CHECK-P8-NEXT: bl llroundf128 1082; CHECK-P8-NEXT: nop 1083; CHECK-P8-NEXT: std r3, 0(r30) 1084; CHECK-P8-NEXT: addi r1, r1, 48 1085; CHECK-P8-NEXT: ld r0, 16(r1) 1086; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 1087; CHECK-P8-NEXT: mtlr r0 1088; CHECK-P8-NEXT: blr 1089entry: 1090 %0 = load fp128, fp128* %a, align 16 1091 %1 = tail call i64 @llvm.llround.f128(fp128 %0) 1092 store i64 %1, i64* %res, align 16 1093 ret void 1094} 1095declare i64 @llvm.llround.f128(fp128 %Val) 1096 1097define dso_local void @qpRint(fp128* nocapture readonly %a, fp128* nocapture %res) { 1098; CHECK-LABEL: qpRint: 1099; CHECK: # %bb.0: # %entry 1100; CHECK-NEXT: lxv v2, 0(r3) 1101; CHECK-NEXT: xsrqpix 0, v2, v2, 3 1102; CHECK-NEXT: stxv v2, 0(r4) 1103; CHECK-NEXT: blr 1104; 1105; CHECK-P8-LABEL: qpRint: 1106; CHECK-P8: # %bb.0: # %entry 1107; CHECK-P8-NEXT: mflr r0 1108; CHECK-P8-NEXT: .cfi_def_cfa_offset 48 1109; CHECK-P8-NEXT: .cfi_offset lr, 16 1110; CHECK-P8-NEXT: .cfi_offset r30, -16 1111; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill 1112; CHECK-P8-NEXT: std r0, 16(r1) 1113; CHECK-P8-NEXT: stdu r1, -48(r1) 1114; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 1115; CHECK-P8-NEXT: mr r30, r4 1116; CHECK-P8-NEXT: xxswapd v2, vs0 1117; CHECK-P8-NEXT: bl rintf128 1118; CHECK-P8-NEXT: nop 1119; CHECK-P8-NEXT: xxswapd vs0, v2 1120; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 1121; CHECK-P8-NEXT: addi r1, r1, 48 1122; CHECK-P8-NEXT: ld r0, 16(r1) 1123; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 1124; CHECK-P8-NEXT: mtlr r0 1125; CHECK-P8-NEXT: blr 1126entry: 1127 %0 = load fp128, fp128* %a, align 16 1128 %1 = tail call fp128 @llvm.rint.f128(fp128 %0) 1129 store fp128 %1, fp128* %res, align 16 1130 ret void 1131} 1132declare fp128 @llvm.rint.f128(fp128 %Val) 1133 1134define dso_local void @qpLRint(fp128* nocapture readonly %a, i32* nocapture %res) { 1135; CHECK-LABEL: qpLRint: 1136; CHECK: # %bb.0: # %entry 1137; CHECK-NEXT: mflr r0 1138; CHECK-NEXT: .cfi_def_cfa_offset 48 1139; CHECK-NEXT: .cfi_offset lr, 16 1140; CHECK-NEXT: .cfi_offset r30, -16 1141; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill 1142; CHECK-NEXT: std r0, 16(r1) 1143; CHECK-NEXT: stdu r1, -48(r1) 1144; CHECK-NEXT: lxv v2, 0(r3) 1145; CHECK-NEXT: mr r30, r4 1146; CHECK-NEXT: bl lrintf128 1147; CHECK-NEXT: nop 1148; CHECK-NEXT: stw r3, 0(r30) 1149; CHECK-NEXT: addi r1, r1, 48 1150; CHECK-NEXT: ld r0, 16(r1) 1151; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 1152; CHECK-NEXT: mtlr r0 1153; CHECK-NEXT: blr 1154; 1155; CHECK-P8-LABEL: qpLRint: 1156; CHECK-P8: # %bb.0: # %entry 1157; CHECK-P8-NEXT: mflr r0 1158; CHECK-P8-NEXT: .cfi_def_cfa_offset 48 1159; CHECK-P8-NEXT: .cfi_offset lr, 16 1160; CHECK-P8-NEXT: .cfi_offset r30, -16 1161; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill 1162; CHECK-P8-NEXT: std r0, 16(r1) 1163; CHECK-P8-NEXT: stdu r1, -48(r1) 1164; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 1165; CHECK-P8-NEXT: mr r30, r4 1166; CHECK-P8-NEXT: xxswapd v2, vs0 1167; CHECK-P8-NEXT: bl lrintf128 1168; CHECK-P8-NEXT: nop 1169; CHECK-P8-NEXT: stw r3, 0(r30) 1170; CHECK-P8-NEXT: addi r1, r1, 48 1171; CHECK-P8-NEXT: ld r0, 16(r1) 1172; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 1173; CHECK-P8-NEXT: mtlr r0 1174; CHECK-P8-NEXT: blr 1175entry: 1176 %0 = load fp128, fp128* %a, align 16 1177 %1 = tail call i32 @llvm.lrint.f128(fp128 %0) 1178 store i32 %1, i32* %res, align 16 1179 ret void 1180} 1181declare i32 @llvm.lrint.f128(fp128 %Val) 1182 1183define dso_local void @qpLLRint(fp128* nocapture readonly %a, i64* nocapture %res) { 1184; CHECK-LABEL: qpLLRint: 1185; CHECK: # %bb.0: # %entry 1186; CHECK-NEXT: mflr r0 1187; CHECK-NEXT: .cfi_def_cfa_offset 48 1188; CHECK-NEXT: .cfi_offset lr, 16 1189; CHECK-NEXT: .cfi_offset r30, -16 1190; CHECK-NEXT: std r30, -16(r1) # 8-byte Folded Spill 1191; CHECK-NEXT: std r0, 16(r1) 1192; CHECK-NEXT: stdu r1, -48(r1) 1193; CHECK-NEXT: lxv v2, 0(r3) 1194; CHECK-NEXT: mr r30, r4 1195; CHECK-NEXT: bl llrintf128 1196; CHECK-NEXT: nop 1197; CHECK-NEXT: std r3, 0(r30) 1198; CHECK-NEXT: addi r1, r1, 48 1199; CHECK-NEXT: ld r0, 16(r1) 1200; CHECK-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 1201; CHECK-NEXT: mtlr r0 1202; CHECK-NEXT: blr 1203; 1204; CHECK-P8-LABEL: qpLLRint: 1205; CHECK-P8: # %bb.0: # %entry 1206; CHECK-P8-NEXT: mflr r0 1207; CHECK-P8-NEXT: .cfi_def_cfa_offset 48 1208; CHECK-P8-NEXT: .cfi_offset lr, 16 1209; CHECK-P8-NEXT: .cfi_offset r30, -16 1210; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill 1211; CHECK-P8-NEXT: std r0, 16(r1) 1212; CHECK-P8-NEXT: stdu r1, -48(r1) 1213; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 1214; CHECK-P8-NEXT: mr r30, r4 1215; CHECK-P8-NEXT: xxswapd v2, vs0 1216; CHECK-P8-NEXT: bl llrintf128 1217; CHECK-P8-NEXT: nop 1218; CHECK-P8-NEXT: std r3, 0(r30) 1219; CHECK-P8-NEXT: addi r1, r1, 48 1220; CHECK-P8-NEXT: ld r0, 16(r1) 1221; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 1222; CHECK-P8-NEXT: mtlr r0 1223; CHECK-P8-NEXT: blr 1224entry: 1225 %0 = load fp128, fp128* %a, align 16 1226 %1 = tail call i64 @llvm.llrint.f128(fp128 %0) 1227 store i64 %1, i64* %res, align 16 1228 ret void 1229} 1230declare i64 @llvm.llrint.f128(fp128 %Val) 1231 1232define dso_local void @qpNearByInt(fp128* nocapture readonly %a, fp128* nocapture %res) { 1233; CHECK-LABEL: qpNearByInt: 1234; CHECK: # %bb.0: # %entry 1235; CHECK-NEXT: lxv v2, 0(r3) 1236; CHECK-NEXT: xsrqpi 0, v2, v2, 3 1237; CHECK-NEXT: stxv v2, 0(r4) 1238; CHECK-NEXT: blr 1239; 1240; CHECK-P8-LABEL: qpNearByInt: 1241; CHECK-P8: # %bb.0: # %entry 1242; CHECK-P8-NEXT: mflr r0 1243; CHECK-P8-NEXT: .cfi_def_cfa_offset 48 1244; CHECK-P8-NEXT: .cfi_offset lr, 16 1245; CHECK-P8-NEXT: .cfi_offset r30, -16 1246; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill 1247; CHECK-P8-NEXT: std r0, 16(r1) 1248; CHECK-P8-NEXT: stdu r1, -48(r1) 1249; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 1250; CHECK-P8-NEXT: mr r30, r4 1251; CHECK-P8-NEXT: xxswapd v2, vs0 1252; CHECK-P8-NEXT: bl nearbyintf128 1253; CHECK-P8-NEXT: nop 1254; CHECK-P8-NEXT: xxswapd vs0, v2 1255; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 1256; CHECK-P8-NEXT: addi r1, r1, 48 1257; CHECK-P8-NEXT: ld r0, 16(r1) 1258; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 1259; CHECK-P8-NEXT: mtlr r0 1260; CHECK-P8-NEXT: blr 1261entry: 1262 %0 = load fp128, fp128* %a, align 16 1263 %1 = tail call fp128 @llvm.nearbyint.f128(fp128 %0) 1264 store fp128 %1, fp128* %res, align 16 1265 ret void 1266} 1267declare fp128 @llvm.nearbyint.f128(fp128 %Val) 1268 1269define dso_local void @qpFMA(fp128* %a, fp128* %b, fp128* %c, fp128* %res) { 1270; CHECK-LABEL: qpFMA: 1271; CHECK: # %bb.0: # %entry 1272; CHECK-NEXT: lxv v2, 0(r3) 1273; CHECK-NEXT: lxv v3, 0(r4) 1274; CHECK-NEXT: lxv v4, 0(r5) 1275; CHECK-NEXT: xsmaddqp v4, v2, v3 1276; CHECK-NEXT: stxv v4, 0(r6) 1277; CHECK-NEXT: blr 1278; 1279; CHECK-P8-LABEL: qpFMA: 1280; CHECK-P8: # %bb.0: # %entry 1281; CHECK-P8-NEXT: mflr r0 1282; CHECK-P8-NEXT: .cfi_def_cfa_offset 48 1283; CHECK-P8-NEXT: .cfi_offset lr, 16 1284; CHECK-P8-NEXT: .cfi_offset r30, -16 1285; CHECK-P8-NEXT: std r30, -16(r1) # 8-byte Folded Spill 1286; CHECK-P8-NEXT: std r0, 16(r1) 1287; CHECK-P8-NEXT: stdu r1, -48(r1) 1288; CHECK-P8-NEXT: lxvd2x vs0, 0, r3 1289; CHECK-P8-NEXT: lxvd2x vs1, 0, r4 1290; CHECK-P8-NEXT: mr r30, r6 1291; CHECK-P8-NEXT: lxvd2x vs2, 0, r5 1292; CHECK-P8-NEXT: xxswapd v2, vs0 1293; CHECK-P8-NEXT: xxswapd v3, vs1 1294; CHECK-P8-NEXT: xxswapd v4, vs2 1295; CHECK-P8-NEXT: bl fmaf128 1296; CHECK-P8-NEXT: nop 1297; CHECK-P8-NEXT: xxswapd vs0, v2 1298; CHECK-P8-NEXT: stxvd2x vs0, 0, r30 1299; CHECK-P8-NEXT: addi r1, r1, 48 1300; CHECK-P8-NEXT: ld r0, 16(r1) 1301; CHECK-P8-NEXT: ld r30, -16(r1) # 8-byte Folded Reload 1302; CHECK-P8-NEXT: mtlr r0 1303; CHECK-P8-NEXT: blr 1304entry: 1305 %0 = load fp128, fp128* %a, align 16 1306 %1 = load fp128, fp128* %b, align 16 1307 %2 = load fp128, fp128* %c, align 16 1308 %3 = tail call fp128 @llvm.fma.f128(fp128 %0, fp128 %1, fp128 %2) 1309 store fp128 %3, fp128* %res, align 16 1310 ret void 1311} 1312declare fp128 @llvm.fma.f128(fp128, fp128, fp128) 1313