1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mcpu=pwr9 -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs \
3; RUN:   -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s
4; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown -verify-machineinstrs \
5; RUN:   -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | FileCheck %s \
6; RUN:   -check-prefix=CHECK-P8
7
8; Function Attrs: norecurse nounwind
9define dso_local void @qpAdd(fp128* nocapture readonly %a, fp128* nocapture %res) {
10; CHECK-LABEL: qpAdd:
11; CHECK:       # %bb.0: # %entry
12; CHECK-NEXT:    lxv v2, 0(r3)
13; CHECK-NEXT:    xsaddqp v2, v2, v2
14; CHECK-NEXT:    stxv v2, 0(r4)
15; CHECK-NEXT:    blr
16;
17; CHECK-P8-LABEL: qpAdd:
18; CHECK-P8:       # %bb.0: # %entry
19; CHECK-P8-NEXT:    mflr r0
20; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
21; CHECK-P8-NEXT:    .cfi_offset lr, 16
22; CHECK-P8-NEXT:    .cfi_offset r30, -16
23; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
24; CHECK-P8-NEXT:    std r0, 16(r1)
25; CHECK-P8-NEXT:    stdu r1, -48(r1)
26; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
27; CHECK-P8-NEXT:    mr r30, r4
28; CHECK-P8-NEXT:    xxswapd v2, vs0
29; CHECK-P8-NEXT:    vmr v3, v2
30; CHECK-P8-NEXT:    bl __addkf3
31; CHECK-P8-NEXT:    nop
32; CHECK-P8-NEXT:    xxswapd vs0, v2
33; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
34; CHECK-P8-NEXT:    addi r1, r1, 48
35; CHECK-P8-NEXT:    ld r0, 16(r1)
36; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
37; CHECK-P8-NEXT:    mtlr r0
38; CHECK-P8-NEXT:    blr
39entry:
40  %0 = load fp128, fp128* %a, align 16
41  %add = fadd fp128 %0, %0
42  store fp128 %add, fp128* %res, align 16
43  ret void
44}
45
46; Function Attrs: norecurse nounwind
47define dso_local void @qpSub(fp128* nocapture readonly %a, fp128* nocapture %res) {
48; CHECK-LABEL: qpSub:
49; CHECK:       # %bb.0: # %entry
50; CHECK-NEXT:    lxv v2, 0(r3)
51; CHECK-NEXT:    xssubqp v2, v2, v2
52; CHECK-NEXT:    stxv v2, 0(r4)
53; CHECK-NEXT:    blr
54;
55; CHECK-P8-LABEL: qpSub:
56; CHECK-P8:       # %bb.0: # %entry
57; CHECK-P8-NEXT:    mflr r0
58; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
59; CHECK-P8-NEXT:    .cfi_offset lr, 16
60; CHECK-P8-NEXT:    .cfi_offset r30, -16
61; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
62; CHECK-P8-NEXT:    std r0, 16(r1)
63; CHECK-P8-NEXT:    stdu r1, -48(r1)
64; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
65; CHECK-P8-NEXT:    mr r30, r4
66; CHECK-P8-NEXT:    xxswapd v2, vs0
67; CHECK-P8-NEXT:    vmr v3, v2
68; CHECK-P8-NEXT:    bl __subkf3
69; CHECK-P8-NEXT:    nop
70; CHECK-P8-NEXT:    xxswapd vs0, v2
71; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
72; CHECK-P8-NEXT:    addi r1, r1, 48
73; CHECK-P8-NEXT:    ld r0, 16(r1)
74; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
75; CHECK-P8-NEXT:    mtlr r0
76; CHECK-P8-NEXT:    blr
77entry:
78  %0 = load fp128, fp128* %a, align 16
79  %sub = fsub fp128 %0, %0
80  store fp128 %sub, fp128* %res, align 16
81  ret void
82}
83
84; Function Attrs: norecurse nounwind
85define dso_local void @qpMul(fp128* nocapture readonly %a, fp128* nocapture %res) {
86; CHECK-LABEL: qpMul:
87; CHECK:       # %bb.0: # %entry
88; CHECK-NEXT:    lxv v2, 0(r3)
89; CHECK-NEXT:    xsmulqp v2, v2, v2
90; CHECK-NEXT:    stxv v2, 0(r4)
91; CHECK-NEXT:    blr
92;
93; CHECK-P8-LABEL: qpMul:
94; CHECK-P8:       # %bb.0: # %entry
95; CHECK-P8-NEXT:    mflr r0
96; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
97; CHECK-P8-NEXT:    .cfi_offset lr, 16
98; CHECK-P8-NEXT:    .cfi_offset r30, -16
99; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
100; CHECK-P8-NEXT:    std r0, 16(r1)
101; CHECK-P8-NEXT:    stdu r1, -48(r1)
102; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
103; CHECK-P8-NEXT:    mr r30, r4
104; CHECK-P8-NEXT:    xxswapd v2, vs0
105; CHECK-P8-NEXT:    vmr v3, v2
106; CHECK-P8-NEXT:    bl __mulkf3
107; CHECK-P8-NEXT:    nop
108; CHECK-P8-NEXT:    xxswapd vs0, v2
109; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
110; CHECK-P8-NEXT:    addi r1, r1, 48
111; CHECK-P8-NEXT:    ld r0, 16(r1)
112; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
113; CHECK-P8-NEXT:    mtlr r0
114; CHECK-P8-NEXT:    blr
115entry:
116  %0 = load fp128, fp128* %a, align 16
117  %mul = fmul fp128 %0, %0
118  store fp128 %mul, fp128* %res, align 16
119  ret void
120}
121
122; Function Attrs: norecurse nounwind
123define dso_local void @qpDiv(fp128* nocapture readonly %a, fp128* nocapture %res) {
124; CHECK-LABEL: qpDiv:
125; CHECK:       # %bb.0: # %entry
126; CHECK-NEXT:    lxv v2, 0(r3)
127; CHECK-NEXT:    xsdivqp v2, v2, v2
128; CHECK-NEXT:    stxv v2, 0(r4)
129; CHECK-NEXT:    blr
130;
131; CHECK-P8-LABEL: qpDiv:
132; CHECK-P8:       # %bb.0: # %entry
133; CHECK-P8-NEXT:    mflr r0
134; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
135; CHECK-P8-NEXT:    .cfi_offset lr, 16
136; CHECK-P8-NEXT:    .cfi_offset r30, -16
137; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
138; CHECK-P8-NEXT:    std r0, 16(r1)
139; CHECK-P8-NEXT:    stdu r1, -48(r1)
140; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
141; CHECK-P8-NEXT:    mr r30, r4
142; CHECK-P8-NEXT:    xxswapd v2, vs0
143; CHECK-P8-NEXT:    vmr v3, v2
144; CHECK-P8-NEXT:    bl __divkf3
145; CHECK-P8-NEXT:    nop
146; CHECK-P8-NEXT:    xxswapd vs0, v2
147; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
148; CHECK-P8-NEXT:    addi r1, r1, 48
149; CHECK-P8-NEXT:    ld r0, 16(r1)
150; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
151; CHECK-P8-NEXT:    mtlr r0
152; CHECK-P8-NEXT:    blr
153entry:
154  %0 = load fp128, fp128* %a, align 16
155  %div = fdiv fp128 %0, %0
156  store fp128 %div, fp128* %res, align 16
157  ret void
158}
159
160define dso_local void @testLdNSt(i8* nocapture readonly %PtrC, fp128* nocapture %PtrF) {
161; CHECK-LABEL: testLdNSt:
162; CHECK:       # %bb.0: # %entry
163; CHECK-NEXT:    li r5, 4
164; CHECK-NEXT:    lxvx vs0, r3, r5
165; CHECK-NEXT:    li r3, 8
166; CHECK-NEXT:    stxvx vs0, r4, r3
167; CHECK-NEXT:    blr
168;
169; CHECK-P8-LABEL: testLdNSt:
170; CHECK-P8:       # %bb.0: # %entry
171; CHECK-P8-NEXT:    addi r3, r3, 4
172; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
173; CHECK-P8-NEXT:    addi r3, r4, 8
174; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
175; CHECK-P8-NEXT:    blr
176entry:
177  %add.ptr = getelementptr inbounds i8, i8* %PtrC, i64 4
178  %0 = bitcast i8* %add.ptr to fp128*
179  %1 = load fp128, fp128* %0, align 16
180  %2 = bitcast fp128* %PtrF to i8*
181  %add.ptr1 = getelementptr inbounds i8, i8* %2, i64 8
182  %3 = bitcast i8* %add.ptr1 to fp128*
183  store fp128 %1, fp128* %3, align 16
184  ret void
185}
186
187define dso_local void @qpSqrt(fp128* nocapture readonly %a, fp128* nocapture %res) {
188; CHECK-LABEL: qpSqrt:
189; CHECK:       # %bb.0: # %entry
190; CHECK-NEXT:    lxv v2, 0(r3)
191; CHECK-NEXT:    xssqrtqp v2, v2
192; CHECK-NEXT:    stxv v2, 0(r4)
193; CHECK-NEXT:    blr
194;
195; CHECK-P8-LABEL: qpSqrt:
196; CHECK-P8:       # %bb.0: # %entry
197; CHECK-P8-NEXT:    mflr r0
198; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
199; CHECK-P8-NEXT:    .cfi_offset lr, 16
200; CHECK-P8-NEXT:    .cfi_offset r30, -16
201; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
202; CHECK-P8-NEXT:    std r0, 16(r1)
203; CHECK-P8-NEXT:    stdu r1, -48(r1)
204; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
205; CHECK-P8-NEXT:    mr r30, r4
206; CHECK-P8-NEXT:    xxswapd v2, vs0
207; CHECK-P8-NEXT:    bl sqrtf128
208; CHECK-P8-NEXT:    nop
209; CHECK-P8-NEXT:    xxswapd vs0, v2
210; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
211; CHECK-P8-NEXT:    addi r1, r1, 48
212; CHECK-P8-NEXT:    ld r0, 16(r1)
213; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
214; CHECK-P8-NEXT:    mtlr r0
215; CHECK-P8-NEXT:    blr
216entry:
217  %0 = load fp128, fp128* %a, align 16
218  %1 = tail call fp128 @llvm.sqrt.f128(fp128 %0)
219  store fp128 %1, fp128* %res, align 16
220  ret void
221
222}
223declare fp128 @llvm.sqrt.f128(fp128 %Val)
224
225define dso_local void @qpCpsgn(fp128* nocapture readonly %a, fp128* nocapture readonly %b,
226; CHECK-LABEL: qpCpsgn:
227; CHECK:       # %bb.0: # %entry
228; CHECK-NEXT:    lxv v2, 0(r3)
229; CHECK-NEXT:    lxv v3, 0(r4)
230; CHECK-NEXT:    xscpsgnqp v2, v3, v2
231; CHECK-NEXT:    stxv v2, 0(r5)
232; CHECK-NEXT:    blr
233;
234; CHECK-P8-LABEL: qpCpsgn:
235; CHECK-P8:       # %bb.0: # %entry
236; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
237; CHECK-P8-NEXT:    addi r4, r1, -16
238; CHECK-P8-NEXT:    stxvd2x vs0, 0, r4
239; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
240; CHECK-P8-NEXT:    addi r3, r1, -32
241; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
242; CHECK-P8-NEXT:    lbz r4, -1(r1)
243; CHECK-P8-NEXT:    lbz r6, -17(r1)
244; CHECK-P8-NEXT:    rlwimi r6, r4, 0, 0, 24
245; CHECK-P8-NEXT:    stb r6, -17(r1)
246; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
247; CHECK-P8-NEXT:    stxvd2x vs0, 0, r5
248; CHECK-P8-NEXT:    blr
249                     fp128* nocapture %res) {
250entry:
251  %0 = load fp128, fp128* %a, align 16
252  %1 = load fp128, fp128* %b, align 16
253  %2 = tail call fp128 @llvm.copysign.f128(fp128 %0, fp128 %1)
254  store fp128 %2, fp128* %res, align 16
255  ret void
256
257}
258declare fp128 @llvm.copysign.f128(fp128 %Mag, fp128 %Sgn)
259
260define dso_local void @qpAbs(fp128* nocapture readonly %a, fp128* nocapture %res) {
261; CHECK-LABEL: qpAbs:
262; CHECK:       # %bb.0: # %entry
263; CHECK-NEXT:    lxv v2, 0(r3)
264; CHECK-NEXT:    xsabsqp v2, v2
265; CHECK-NEXT:    stxv v2, 0(r4)
266; CHECK-NEXT:    blr
267;
268; CHECK-P8-LABEL: qpAbs:
269; CHECK-P8:       # %bb.0: # %entry
270; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
271; CHECK-P8-NEXT:    addi r3, r1, -16
272; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
273; CHECK-P8-NEXT:    lbz r5, -1(r1)
274; CHECK-P8-NEXT:    clrlwi r5, r5, 25
275; CHECK-P8-NEXT:    stb r5, -1(r1)
276; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
277; CHECK-P8-NEXT:    stxvd2x vs0, 0, r4
278; CHECK-P8-NEXT:    blr
279entry:
280  %0 = load fp128, fp128* %a, align 16
281  %1 = tail call fp128 @llvm.fabs.f128(fp128 %0)
282  store fp128 %1, fp128* %res, align 16
283  ret void
284
285}
286declare fp128 @llvm.fabs.f128(fp128 %Val)
287
288define dso_local void @qpNAbs(fp128* nocapture readonly %a, fp128* nocapture %res) {
289; CHECK-LABEL: qpNAbs:
290; CHECK:       # %bb.0: # %entry
291; CHECK-NEXT:    lxv v2, 0(r3)
292; CHECK-NEXT:    xsnabsqp v2, v2
293; CHECK-NEXT:    stxv v2, 0(r4)
294; CHECK-NEXT:    blr
295;
296; CHECK-P8-LABEL: qpNAbs:
297; CHECK-P8:       # %bb.0: # %entry
298; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
299; CHECK-P8-NEXT:    addi r3, r1, -32
300; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
301; CHECK-P8-NEXT:    lbz r5, -17(r1)
302; CHECK-P8-NEXT:    clrlwi r5, r5, 25
303; CHECK-P8-NEXT:    stb r5, -17(r1)
304; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
305; CHECK-P8-NEXT:    addi r3, r1, -16
306; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
307; CHECK-P8-NEXT:    lbz r5, -1(r1)
308; CHECK-P8-NEXT:    xori r5, r5, 128
309; CHECK-P8-NEXT:    stb r5, -1(r1)
310; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
311; CHECK-P8-NEXT:    stxvd2x vs0, 0, r4
312; CHECK-P8-NEXT:    blr
313entry:
314  %0 = load fp128, fp128* %a, align 16
315  %1 = tail call fp128 @llvm.fabs.f128(fp128 %0)
316  %neg = fsub fp128 0xL00000000000000008000000000000000, %1
317  store fp128 %neg, fp128* %res, align 16
318  ret void
319
320}
321
322define dso_local void @qpNeg(fp128* nocapture readonly %a, fp128* nocapture %res) {
323; CHECK-LABEL: qpNeg:
324; CHECK:       # %bb.0: # %entry
325; CHECK-NEXT:    lxv v2, 0(r3)
326; CHECK-NEXT:    xsnegqp v2, v2
327; CHECK-NEXT:    stxv v2, 0(r4)
328; CHECK-NEXT:    blr
329;
330; CHECK-P8-LABEL: qpNeg:
331; CHECK-P8:       # %bb.0: # %entry
332; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
333; CHECK-P8-NEXT:    addi r3, r1, -16
334; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
335; CHECK-P8-NEXT:    lbz r5, -1(r1)
336; CHECK-P8-NEXT:    xori r5, r5, 128
337; CHECK-P8-NEXT:    stb r5, -1(r1)
338; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
339; CHECK-P8-NEXT:    stxvd2x vs0, 0, r4
340; CHECK-P8-NEXT:    blr
341entry:
342  %0 = load fp128, fp128* %a, align 16
343  %sub = fsub fp128 0xL00000000000000008000000000000000, %0
344  store fp128 %sub, fp128* %res, align 16
345  ret void
346
347}
348
349define fp128 @qp_sin(fp128* nocapture readonly %a) {
350; CHECK-LABEL: qp_sin:
351; CHECK:       # %bb.0: # %entry
352; CHECK-NEXT:    mflr r0
353; CHECK-NEXT:    std r0, 16(r1)
354; CHECK-NEXT:    stdu r1, -32(r1)
355; CHECK-NEXT:    .cfi_def_cfa_offset 32
356; CHECK-NEXT:    .cfi_offset lr, 16
357; CHECK-NEXT:    lxv v2, 0(r3)
358; CHECK-NEXT:    bl sinf128
359; CHECK-NEXT:    nop
360; CHECK-NEXT:    addi r1, r1, 32
361; CHECK-NEXT:    ld r0, 16(r1)
362; CHECK-NEXT:    mtlr r0
363; CHECK-NEXT:    blr
364;
365; CHECK-P8-LABEL: qp_sin:
366; CHECK-P8:       # %bb.0: # %entry
367; CHECK-P8-NEXT:    mflr r0
368; CHECK-P8-NEXT:    std r0, 16(r1)
369; CHECK-P8-NEXT:    stdu r1, -32(r1)
370; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
371; CHECK-P8-NEXT:    .cfi_offset lr, 16
372; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
373; CHECK-P8-NEXT:    xxswapd v2, vs0
374; CHECK-P8-NEXT:    bl sinf128
375; CHECK-P8-NEXT:    nop
376; CHECK-P8-NEXT:    addi r1, r1, 32
377; CHECK-P8-NEXT:    ld r0, 16(r1)
378; CHECK-P8-NEXT:    mtlr r0
379; CHECK-P8-NEXT:    blr
380entry:
381  %0 = load fp128, fp128* %a, align 16
382  %1 = tail call fp128 @llvm.sin.f128(fp128 %0)
383  ret fp128 %1
384}
385declare fp128 @llvm.sin.f128(fp128 %Val)
386
387define fp128 @qp_cos(fp128* nocapture readonly %a) {
388; CHECK-LABEL: qp_cos:
389; CHECK:       # %bb.0: # %entry
390; CHECK-NEXT:    mflr r0
391; CHECK-NEXT:    std r0, 16(r1)
392; CHECK-NEXT:    stdu r1, -32(r1)
393; CHECK-NEXT:    .cfi_def_cfa_offset 32
394; CHECK-NEXT:    .cfi_offset lr, 16
395; CHECK-NEXT:    lxv v2, 0(r3)
396; CHECK-NEXT:    bl cosf128
397; CHECK-NEXT:    nop
398; CHECK-NEXT:    addi r1, r1, 32
399; CHECK-NEXT:    ld r0, 16(r1)
400; CHECK-NEXT:    mtlr r0
401; CHECK-NEXT:    blr
402;
403; CHECK-P8-LABEL: qp_cos:
404; CHECK-P8:       # %bb.0: # %entry
405; CHECK-P8-NEXT:    mflr r0
406; CHECK-P8-NEXT:    std r0, 16(r1)
407; CHECK-P8-NEXT:    stdu r1, -32(r1)
408; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
409; CHECK-P8-NEXT:    .cfi_offset lr, 16
410; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
411; CHECK-P8-NEXT:    xxswapd v2, vs0
412; CHECK-P8-NEXT:    bl cosf128
413; CHECK-P8-NEXT:    nop
414; CHECK-P8-NEXT:    addi r1, r1, 32
415; CHECK-P8-NEXT:    ld r0, 16(r1)
416; CHECK-P8-NEXT:    mtlr r0
417; CHECK-P8-NEXT:    blr
418entry:
419  %0 = load fp128, fp128* %a, align 16
420  %1 = tail call fp128 @llvm.cos.f128(fp128 %0)
421  ret fp128 %1
422}
423declare fp128 @llvm.cos.f128(fp128 %Val)
424
425define fp128 @qp_log(fp128* nocapture readonly %a) {
426; CHECK-LABEL: qp_log:
427; CHECK:       # %bb.0: # %entry
428; CHECK-NEXT:    mflr r0
429; CHECK-NEXT:    std r0, 16(r1)
430; CHECK-NEXT:    stdu r1, -32(r1)
431; CHECK-NEXT:    .cfi_def_cfa_offset 32
432; CHECK-NEXT:    .cfi_offset lr, 16
433; CHECK-NEXT:    lxv v2, 0(r3)
434; CHECK-NEXT:    bl logf128
435; CHECK-NEXT:    nop
436; CHECK-NEXT:    addi r1, r1, 32
437; CHECK-NEXT:    ld r0, 16(r1)
438; CHECK-NEXT:    mtlr r0
439; CHECK-NEXT:    blr
440;
441; CHECK-P8-LABEL: qp_log:
442; CHECK-P8:       # %bb.0: # %entry
443; CHECK-P8-NEXT:    mflr r0
444; CHECK-P8-NEXT:    std r0, 16(r1)
445; CHECK-P8-NEXT:    stdu r1, -32(r1)
446; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
447; CHECK-P8-NEXT:    .cfi_offset lr, 16
448; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
449; CHECK-P8-NEXT:    xxswapd v2, vs0
450; CHECK-P8-NEXT:    bl logf128
451; CHECK-P8-NEXT:    nop
452; CHECK-P8-NEXT:    addi r1, r1, 32
453; CHECK-P8-NEXT:    ld r0, 16(r1)
454; CHECK-P8-NEXT:    mtlr r0
455; CHECK-P8-NEXT:    blr
456entry:
457  %0 = load fp128, fp128* %a, align 16
458  %1 = tail call fp128 @llvm.log.f128(fp128 %0)
459  ret fp128 %1
460}
461declare fp128     @llvm.log.f128(fp128 %Val)
462
463define fp128 @qp_log10(fp128* nocapture readonly %a) {
464; CHECK-LABEL: qp_log10:
465; CHECK:       # %bb.0: # %entry
466; CHECK-NEXT:    mflr r0
467; CHECK-NEXT:    std r0, 16(r1)
468; CHECK-NEXT:    stdu r1, -32(r1)
469; CHECK-NEXT:    .cfi_def_cfa_offset 32
470; CHECK-NEXT:    .cfi_offset lr, 16
471; CHECK-NEXT:    lxv v2, 0(r3)
472; CHECK-NEXT:    bl log10f128
473; CHECK-NEXT:    nop
474; CHECK-NEXT:    addi r1, r1, 32
475; CHECK-NEXT:    ld r0, 16(r1)
476; CHECK-NEXT:    mtlr r0
477; CHECK-NEXT:    blr
478;
479; CHECK-P8-LABEL: qp_log10:
480; CHECK-P8:       # %bb.0: # %entry
481; CHECK-P8-NEXT:    mflr r0
482; CHECK-P8-NEXT:    std r0, 16(r1)
483; CHECK-P8-NEXT:    stdu r1, -32(r1)
484; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
485; CHECK-P8-NEXT:    .cfi_offset lr, 16
486; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
487; CHECK-P8-NEXT:    xxswapd v2, vs0
488; CHECK-P8-NEXT:    bl log10f128
489; CHECK-P8-NEXT:    nop
490; CHECK-P8-NEXT:    addi r1, r1, 32
491; CHECK-P8-NEXT:    ld r0, 16(r1)
492; CHECK-P8-NEXT:    mtlr r0
493; CHECK-P8-NEXT:    blr
494entry:
495  %0 = load fp128, fp128* %a, align 16
496  %1 = tail call fp128 @llvm.log10.f128(fp128 %0)
497  ret fp128 %1
498}
499declare fp128     @llvm.log10.f128(fp128 %Val)
500
501define fp128 @qp_log2(fp128* nocapture readonly %a) {
502; CHECK-LABEL: qp_log2:
503; CHECK:       # %bb.0: # %entry
504; CHECK-NEXT:    mflr r0
505; CHECK-NEXT:    std r0, 16(r1)
506; CHECK-NEXT:    stdu r1, -32(r1)
507; CHECK-NEXT:    .cfi_def_cfa_offset 32
508; CHECK-NEXT:    .cfi_offset lr, 16
509; CHECK-NEXT:    lxv v2, 0(r3)
510; CHECK-NEXT:    bl log2f128
511; CHECK-NEXT:    nop
512; CHECK-NEXT:    addi r1, r1, 32
513; CHECK-NEXT:    ld r0, 16(r1)
514; CHECK-NEXT:    mtlr r0
515; CHECK-NEXT:    blr
516;
517; CHECK-P8-LABEL: qp_log2:
518; CHECK-P8:       # %bb.0: # %entry
519; CHECK-P8-NEXT:    mflr r0
520; CHECK-P8-NEXT:    std r0, 16(r1)
521; CHECK-P8-NEXT:    stdu r1, -32(r1)
522; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
523; CHECK-P8-NEXT:    .cfi_offset lr, 16
524; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
525; CHECK-P8-NEXT:    xxswapd v2, vs0
526; CHECK-P8-NEXT:    bl log2f128
527; CHECK-P8-NEXT:    nop
528; CHECK-P8-NEXT:    addi r1, r1, 32
529; CHECK-P8-NEXT:    ld r0, 16(r1)
530; CHECK-P8-NEXT:    mtlr r0
531; CHECK-P8-NEXT:    blr
532entry:
533  %0 = load fp128, fp128* %a, align 16
534  %1 = tail call fp128 @llvm.log2.f128(fp128 %0)
535  ret fp128 %1
536}
537declare fp128     @llvm.log2.f128(fp128 %Val)
538
539define fp128 @qp_minnum(fp128* nocapture readonly %a,
540; CHECK-LABEL: qp_minnum:
541; CHECK:       # %bb.0: # %entry
542; CHECK-NEXT:    mflr r0
543; CHECK-NEXT:    std r0, 16(r1)
544; CHECK-NEXT:    stdu r1, -32(r1)
545; CHECK-NEXT:    .cfi_def_cfa_offset 32
546; CHECK-NEXT:    .cfi_offset lr, 16
547; CHECK-NEXT:    lxv v2, 0(r3)
548; CHECK-NEXT:    lxv v3, 0(r4)
549; CHECK-NEXT:    bl fminf128
550; CHECK-NEXT:    nop
551; CHECK-NEXT:    addi r1, r1, 32
552; CHECK-NEXT:    ld r0, 16(r1)
553; CHECK-NEXT:    mtlr r0
554; CHECK-NEXT:    blr
555;
556; CHECK-P8-LABEL: qp_minnum:
557; CHECK-P8:       # %bb.0: # %entry
558; CHECK-P8-NEXT:    mflr r0
559; CHECK-P8-NEXT:    std r0, 16(r1)
560; CHECK-P8-NEXT:    stdu r1, -32(r1)
561; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
562; CHECK-P8-NEXT:    .cfi_offset lr, 16
563; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
564; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
565; CHECK-P8-NEXT:    xxswapd v2, vs0
566; CHECK-P8-NEXT:    xxswapd v3, vs1
567; CHECK-P8-NEXT:    bl fminf128
568; CHECK-P8-NEXT:    nop
569; CHECK-P8-NEXT:    addi r1, r1, 32
570; CHECK-P8-NEXT:    ld r0, 16(r1)
571; CHECK-P8-NEXT:    mtlr r0
572; CHECK-P8-NEXT:    blr
573                        fp128* nocapture readonly %b) {
574entry:
575  %0 = load fp128, fp128* %a, align 16
576  %1 = load fp128, fp128* %b, align 16
577  %2 = tail call fp128 @llvm.minnum.f128(fp128 %0, fp128 %1)
578  ret fp128 %2
579}
580declare fp128     @llvm.minnum.f128(fp128 %Val0, fp128 %Val1)
581
582define fp128 @qp_maxnum(fp128* nocapture readonly %a,
583; CHECK-LABEL: qp_maxnum:
584; CHECK:       # %bb.0: # %entry
585; CHECK-NEXT:    mflr r0
586; CHECK-NEXT:    std r0, 16(r1)
587; CHECK-NEXT:    stdu r1, -32(r1)
588; CHECK-NEXT:    .cfi_def_cfa_offset 32
589; CHECK-NEXT:    .cfi_offset lr, 16
590; CHECK-NEXT:    lxv v2, 0(r3)
591; CHECK-NEXT:    lxv v3, 0(r4)
592; CHECK-NEXT:    bl fmaxf128
593; CHECK-NEXT:    nop
594; CHECK-NEXT:    addi r1, r1, 32
595; CHECK-NEXT:    ld r0, 16(r1)
596; CHECK-NEXT:    mtlr r0
597; CHECK-NEXT:    blr
598;
599; CHECK-P8-LABEL: qp_maxnum:
600; CHECK-P8:       # %bb.0: # %entry
601; CHECK-P8-NEXT:    mflr r0
602; CHECK-P8-NEXT:    std r0, 16(r1)
603; CHECK-P8-NEXT:    stdu r1, -32(r1)
604; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
605; CHECK-P8-NEXT:    .cfi_offset lr, 16
606; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
607; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
608; CHECK-P8-NEXT:    xxswapd v2, vs0
609; CHECK-P8-NEXT:    xxswapd v3, vs1
610; CHECK-P8-NEXT:    bl fmaxf128
611; CHECK-P8-NEXT:    nop
612; CHECK-P8-NEXT:    addi r1, r1, 32
613; CHECK-P8-NEXT:    ld r0, 16(r1)
614; CHECK-P8-NEXT:    mtlr r0
615; CHECK-P8-NEXT:    blr
616                        fp128* nocapture readonly %b) {
617entry:
618  %0 = load fp128, fp128* %a, align 16
619  %1 = load fp128, fp128* %b, align 16
620  %2 = tail call fp128 @llvm.maxnum.f128(fp128 %0, fp128 %1)
621  ret fp128 %2
622}
623declare fp128     @llvm.maxnum.f128(fp128 %Val0, fp128 %Val1)
624
625define fp128 @qp_pow(fp128* nocapture readonly %a,
626; CHECK-LABEL: qp_pow:
627; CHECK:       # %bb.0: # %entry
628; CHECK-NEXT:    mflr r0
629; CHECK-NEXT:    std r0, 16(r1)
630; CHECK-NEXT:    stdu r1, -32(r1)
631; CHECK-NEXT:    .cfi_def_cfa_offset 32
632; CHECK-NEXT:    .cfi_offset lr, 16
633; CHECK-NEXT:    lxv v2, 0(r3)
634; CHECK-NEXT:    lxv v3, 0(r4)
635; CHECK-NEXT:    bl powf128
636; CHECK-NEXT:    nop
637; CHECK-NEXT:    addi r1, r1, 32
638; CHECK-NEXT:    ld r0, 16(r1)
639; CHECK-NEXT:    mtlr r0
640; CHECK-NEXT:    blr
641;
642; CHECK-P8-LABEL: qp_pow:
643; CHECK-P8:       # %bb.0: # %entry
644; CHECK-P8-NEXT:    mflr r0
645; CHECK-P8-NEXT:    std r0, 16(r1)
646; CHECK-P8-NEXT:    stdu r1, -32(r1)
647; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
648; CHECK-P8-NEXT:    .cfi_offset lr, 16
649; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
650; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
651; CHECK-P8-NEXT:    xxswapd v2, vs0
652; CHECK-P8-NEXT:    xxswapd v3, vs1
653; CHECK-P8-NEXT:    bl powf128
654; CHECK-P8-NEXT:    nop
655; CHECK-P8-NEXT:    addi r1, r1, 32
656; CHECK-P8-NEXT:    ld r0, 16(r1)
657; CHECK-P8-NEXT:    mtlr r0
658; CHECK-P8-NEXT:    blr
659                     fp128* nocapture readonly %b) {
660entry:
661  %0 = load fp128, fp128* %a, align 16
662  %1 = load fp128, fp128* %b, align 16
663  %2 = tail call fp128 @llvm.pow.f128(fp128 %0, fp128 %1)
664  ret fp128 %2
665}
666declare fp128 @llvm.pow.f128(fp128 %Val, fp128 %Power)
667
668define fp128 @qp_exp(fp128* nocapture readonly %a) {
669; CHECK-LABEL: qp_exp:
670; CHECK:       # %bb.0: # %entry
671; CHECK-NEXT:    mflr r0
672; CHECK-NEXT:    std r0, 16(r1)
673; CHECK-NEXT:    stdu r1, -32(r1)
674; CHECK-NEXT:    .cfi_def_cfa_offset 32
675; CHECK-NEXT:    .cfi_offset lr, 16
676; CHECK-NEXT:    lxv v2, 0(r3)
677; CHECK-NEXT:    bl expf128
678; CHECK-NEXT:    nop
679; CHECK-NEXT:    addi r1, r1, 32
680; CHECK-NEXT:    ld r0, 16(r1)
681; CHECK-NEXT:    mtlr r0
682; CHECK-NEXT:    blr
683;
684; CHECK-P8-LABEL: qp_exp:
685; CHECK-P8:       # %bb.0: # %entry
686; CHECK-P8-NEXT:    mflr r0
687; CHECK-P8-NEXT:    std r0, 16(r1)
688; CHECK-P8-NEXT:    stdu r1, -32(r1)
689; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
690; CHECK-P8-NEXT:    .cfi_offset lr, 16
691; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
692; CHECK-P8-NEXT:    xxswapd v2, vs0
693; CHECK-P8-NEXT:    bl expf128
694; CHECK-P8-NEXT:    nop
695; CHECK-P8-NEXT:    addi r1, r1, 32
696; CHECK-P8-NEXT:    ld r0, 16(r1)
697; CHECK-P8-NEXT:    mtlr r0
698; CHECK-P8-NEXT:    blr
699entry:
700  %0 = load fp128, fp128* %a, align 16
701  %1 = tail call fp128 @llvm.exp.f128(fp128 %0)
702  ret fp128 %1
703}
704declare fp128     @llvm.exp.f128(fp128 %Val)
705
706define fp128 @qp_exp2(fp128* nocapture readonly %a) {
707; CHECK-LABEL: qp_exp2:
708; CHECK:       # %bb.0: # %entry
709; CHECK-NEXT:    mflr r0
710; CHECK-NEXT:    std r0, 16(r1)
711; CHECK-NEXT:    stdu r1, -32(r1)
712; CHECK-NEXT:    .cfi_def_cfa_offset 32
713; CHECK-NEXT:    .cfi_offset lr, 16
714; CHECK-NEXT:    lxv v2, 0(r3)
715; CHECK-NEXT:    bl exp2f128
716; CHECK-NEXT:    nop
717; CHECK-NEXT:    addi r1, r1, 32
718; CHECK-NEXT:    ld r0, 16(r1)
719; CHECK-NEXT:    mtlr r0
720; CHECK-NEXT:    blr
721;
722; CHECK-P8-LABEL: qp_exp2:
723; CHECK-P8:       # %bb.0: # %entry
724; CHECK-P8-NEXT:    mflr r0
725; CHECK-P8-NEXT:    std r0, 16(r1)
726; CHECK-P8-NEXT:    stdu r1, -32(r1)
727; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
728; CHECK-P8-NEXT:    .cfi_offset lr, 16
729; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
730; CHECK-P8-NEXT:    xxswapd v2, vs0
731; CHECK-P8-NEXT:    bl exp2f128
732; CHECK-P8-NEXT:    nop
733; CHECK-P8-NEXT:    addi r1, r1, 32
734; CHECK-P8-NEXT:    ld r0, 16(r1)
735; CHECK-P8-NEXT:    mtlr r0
736; CHECK-P8-NEXT:    blr
737entry:
738  %0 = load fp128, fp128* %a, align 16
739  %1 = tail call fp128 @llvm.exp2.f128(fp128 %0)
740  ret fp128 %1
741}
742declare fp128     @llvm.exp2.f128(fp128 %Val)
743
744define dso_local void @qp_powi(fp128* nocapture readonly %a, i32* nocapture readonly %b,
745; CHECK-LABEL: qp_powi:
746; CHECK:       # %bb.0: # %entry
747; CHECK-NEXT:    mflr r0
748; CHECK-NEXT:    .cfi_def_cfa_offset 48
749; CHECK-NEXT:    .cfi_offset lr, 16
750; CHECK-NEXT:    .cfi_offset r30, -16
751; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
752; CHECK-NEXT:    std r0, 16(r1)
753; CHECK-NEXT:    stdu r1, -48(r1)
754; CHECK-NEXT:    lxv v2, 0(r3)
755; CHECK-NEXT:    mr r30, r5
756; CHECK-NEXT:    lwz r5, 0(r4)
757; CHECK-NEXT:    bl __powikf2
758; CHECK-NEXT:    nop
759; CHECK-NEXT:    stxv v2, 0(r30)
760; CHECK-NEXT:    addi r1, r1, 48
761; CHECK-NEXT:    ld r0, 16(r1)
762; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
763; CHECK-NEXT:    mtlr r0
764; CHECK-NEXT:    blr
765;
766; CHECK-P8-LABEL: qp_powi:
767; CHECK-P8:       # %bb.0: # %entry
768; CHECK-P8-NEXT:    mflr r0
769; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
770; CHECK-P8-NEXT:    .cfi_offset lr, 16
771; CHECK-P8-NEXT:    .cfi_offset r30, -16
772; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
773; CHECK-P8-NEXT:    std r0, 16(r1)
774; CHECK-P8-NEXT:    stdu r1, -48(r1)
775; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
776; CHECK-P8-NEXT:    lwz r3, 0(r4)
777; CHECK-P8-NEXT:    mr r30, r5
778; CHECK-P8-NEXT:    mr r5, r3
779; CHECK-P8-NEXT:    xxswapd v2, vs0
780; CHECK-P8-NEXT:    bl __powikf2
781; CHECK-P8-NEXT:    nop
782; CHECK-P8-NEXT:    xxswapd vs0, v2
783; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
784; CHECK-P8-NEXT:    addi r1, r1, 48
785; CHECK-P8-NEXT:    ld r0, 16(r1)
786; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
787; CHECK-P8-NEXT:    mtlr r0
788; CHECK-P8-NEXT:    blr
789                     fp128* nocapture %res) {
790entry:
791  %0 = load fp128, fp128* %a, align 16
792  %1 = load i32, i32* %b, align 8
793  %2 = tail call fp128 @llvm.powi.f128.i32(fp128 %0, i32 %1)
794  store fp128 %2, fp128* %res, align 16
795  ret void
796}
797declare fp128 @llvm.powi.f128.i32(fp128 %Val, i32 %power)
798
799@a = common dso_local global fp128 0xL00000000000000000000000000000000, align 16
800@b = common dso_local global fp128 0xL00000000000000000000000000000000, align 16
801
802define fp128 @qp_frem() #0 {
803; CHECK-LABEL: qp_frem:
804; CHECK:       # %bb.0: # %entry
805; CHECK-NEXT:    mflr r0
806; CHECK-NEXT:    std r0, 16(r1)
807; CHECK-NEXT:    stdu r1, -32(r1)
808; CHECK-NEXT:    .cfi_def_cfa_offset 32
809; CHECK-NEXT:    .cfi_offset lr, 16
810; CHECK-NEXT:    addis r3, r2, a@toc@ha
811; CHECK-NEXT:    addi r3, r3, a@toc@l
812; CHECK-NEXT:    lxv v2, 0(r3)
813; CHECK-NEXT:    addis r3, r2, b@toc@ha
814; CHECK-NEXT:    addi r3, r3, b@toc@l
815; CHECK-NEXT:    lxv v3, 0(r3)
816; CHECK-NEXT:    bl fmodf128
817; CHECK-NEXT:    nop
818; CHECK-NEXT:    addi r1, r1, 32
819; CHECK-NEXT:    ld r0, 16(r1)
820; CHECK-NEXT:    mtlr r0
821; CHECK-NEXT:    blr
822;
823; CHECK-P8-LABEL: qp_frem:
824; CHECK-P8:       # %bb.0: # %entry
825; CHECK-P8-NEXT:    mflr r0
826; CHECK-P8-NEXT:    std r0, 16(r1)
827; CHECK-P8-NEXT:    stdu r1, -32(r1)
828; CHECK-P8-NEXT:    .cfi_def_cfa_offset 32
829; CHECK-P8-NEXT:    .cfi_offset lr, 16
830; CHECK-P8-NEXT:    addis r3, r2, a@toc@ha
831; CHECK-P8-NEXT:    addis r4, r2, b@toc@ha
832; CHECK-P8-NEXT:    addi r3, r3, a@toc@l
833; CHECK-P8-NEXT:    addi r4, r4, b@toc@l
834; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
835; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
836; CHECK-P8-NEXT:    xxswapd v2, vs0
837; CHECK-P8-NEXT:    xxswapd v3, vs1
838; CHECK-P8-NEXT:    bl fmodf128
839; CHECK-P8-NEXT:    nop
840; CHECK-P8-NEXT:    addi r1, r1, 32
841; CHECK-P8-NEXT:    ld r0, 16(r1)
842; CHECK-P8-NEXT:    mtlr r0
843; CHECK-P8-NEXT:    blr
844entry:
845  %0 = load fp128, fp128* @a, align 16
846  %1 = load fp128, fp128* @b, align 16
847  %rem = frem fp128 %0, %1
848  ret fp128 %rem
849}
850
851define dso_local void @qpCeil(fp128* nocapture readonly %a, fp128* nocapture %res) {
852; CHECK-LABEL: qpCeil:
853; CHECK:       # %bb.0: # %entry
854; CHECK-NEXT:    lxv v2, 0(r3)
855; CHECK-NEXT:    xsrqpi 1, v2, v2, 2
856; CHECK-NEXT:    stxv v2, 0(r4)
857; CHECK-NEXT:    blr
858;
859; CHECK-P8-LABEL: qpCeil:
860; CHECK-P8:       # %bb.0: # %entry
861; CHECK-P8-NEXT:    mflr r0
862; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
863; CHECK-P8-NEXT:    .cfi_offset lr, 16
864; CHECK-P8-NEXT:    .cfi_offset r30, -16
865; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
866; CHECK-P8-NEXT:    std r0, 16(r1)
867; CHECK-P8-NEXT:    stdu r1, -48(r1)
868; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
869; CHECK-P8-NEXT:    mr r30, r4
870; CHECK-P8-NEXT:    xxswapd v2, vs0
871; CHECK-P8-NEXT:    bl ceilf128
872; CHECK-P8-NEXT:    nop
873; CHECK-P8-NEXT:    xxswapd vs0, v2
874; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
875; CHECK-P8-NEXT:    addi r1, r1, 48
876; CHECK-P8-NEXT:    ld r0, 16(r1)
877; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
878; CHECK-P8-NEXT:    mtlr r0
879; CHECK-P8-NEXT:    blr
880entry:
881  %0 = load fp128, fp128* %a, align 16
882  %1 = tail call fp128 @llvm.ceil.f128(fp128 %0)
883  store fp128 %1, fp128* %res, align 16
884  ret void
885}
886declare fp128 @llvm.ceil.f128(fp128 %Val)
887
888define dso_local void @qpFloor(fp128* nocapture readonly %a, fp128* nocapture %res) {
889; CHECK-LABEL: qpFloor:
890; CHECK:       # %bb.0: # %entry
891; CHECK-NEXT:    lxv v2, 0(r3)
892; CHECK-NEXT:    xsrqpi 1, v2, v2, 3
893; CHECK-NEXT:    stxv v2, 0(r4)
894; CHECK-NEXT:    blr
895;
896; CHECK-P8-LABEL: qpFloor:
897; CHECK-P8:       # %bb.0: # %entry
898; CHECK-P8-NEXT:    mflr r0
899; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
900; CHECK-P8-NEXT:    .cfi_offset lr, 16
901; CHECK-P8-NEXT:    .cfi_offset r30, -16
902; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
903; CHECK-P8-NEXT:    std r0, 16(r1)
904; CHECK-P8-NEXT:    stdu r1, -48(r1)
905; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
906; CHECK-P8-NEXT:    mr r30, r4
907; CHECK-P8-NEXT:    xxswapd v2, vs0
908; CHECK-P8-NEXT:    bl floorf128
909; CHECK-P8-NEXT:    nop
910; CHECK-P8-NEXT:    xxswapd vs0, v2
911; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
912; CHECK-P8-NEXT:    addi r1, r1, 48
913; CHECK-P8-NEXT:    ld r0, 16(r1)
914; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
915; CHECK-P8-NEXT:    mtlr r0
916; CHECK-P8-NEXT:    blr
917entry:
918  %0 = load fp128, fp128* %a, align 16
919  %1 = tail call fp128 @llvm.floor.f128(fp128 %0)
920  store fp128 %1, fp128* %res, align 16
921  ret void
922}
923declare fp128 @llvm.floor.f128(fp128 %Val)
924
925define dso_local void @qpTrunc(fp128* nocapture readonly %a, fp128* nocapture %res) {
926; CHECK-LABEL: qpTrunc:
927; CHECK:       # %bb.0: # %entry
928; CHECK-NEXT:    lxv v2, 0(r3)
929; CHECK-NEXT:    xsrqpi 1, v2, v2, 1
930; CHECK-NEXT:    stxv v2, 0(r4)
931; CHECK-NEXT:    blr
932;
933; CHECK-P8-LABEL: qpTrunc:
934; CHECK-P8:       # %bb.0: # %entry
935; CHECK-P8-NEXT:    mflr r0
936; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
937; CHECK-P8-NEXT:    .cfi_offset lr, 16
938; CHECK-P8-NEXT:    .cfi_offset r30, -16
939; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
940; CHECK-P8-NEXT:    std r0, 16(r1)
941; CHECK-P8-NEXT:    stdu r1, -48(r1)
942; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
943; CHECK-P8-NEXT:    mr r30, r4
944; CHECK-P8-NEXT:    xxswapd v2, vs0
945; CHECK-P8-NEXT:    bl truncf128
946; CHECK-P8-NEXT:    nop
947; CHECK-P8-NEXT:    xxswapd vs0, v2
948; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
949; CHECK-P8-NEXT:    addi r1, r1, 48
950; CHECK-P8-NEXT:    ld r0, 16(r1)
951; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
952; CHECK-P8-NEXT:    mtlr r0
953; CHECK-P8-NEXT:    blr
954entry:
955  %0 = load fp128, fp128* %a, align 16
956  %1 = tail call fp128 @llvm.trunc.f128(fp128 %0)
957  store fp128 %1, fp128* %res, align 16
958  ret void
959}
960declare fp128 @llvm.trunc.f128(fp128 %Val)
961
962define dso_local void @qpRound(fp128* nocapture readonly %a, fp128* nocapture %res) {
963; CHECK-LABEL: qpRound:
964; CHECK:       # %bb.0: # %entry
965; CHECK-NEXT:    lxv v2, 0(r3)
966; CHECK-NEXT:    xsrqpi 0, v2, v2, 0
967; CHECK-NEXT:    stxv v2, 0(r4)
968; CHECK-NEXT:    blr
969;
970; CHECK-P8-LABEL: qpRound:
971; CHECK-P8:       # %bb.0: # %entry
972; CHECK-P8-NEXT:    mflr r0
973; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
974; CHECK-P8-NEXT:    .cfi_offset lr, 16
975; CHECK-P8-NEXT:    .cfi_offset r30, -16
976; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
977; CHECK-P8-NEXT:    std r0, 16(r1)
978; CHECK-P8-NEXT:    stdu r1, -48(r1)
979; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
980; CHECK-P8-NEXT:    mr r30, r4
981; CHECK-P8-NEXT:    xxswapd v2, vs0
982; CHECK-P8-NEXT:    bl roundf128
983; CHECK-P8-NEXT:    nop
984; CHECK-P8-NEXT:    xxswapd vs0, v2
985; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
986; CHECK-P8-NEXT:    addi r1, r1, 48
987; CHECK-P8-NEXT:    ld r0, 16(r1)
988; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
989; CHECK-P8-NEXT:    mtlr r0
990; CHECK-P8-NEXT:    blr
991entry:
992  %0 = load fp128, fp128* %a, align 16
993  %1 = tail call fp128 @llvm.round.f128(fp128 %0)
994  store fp128 %1, fp128* %res, align 16
995  ret void
996}
997declare fp128 @llvm.round.f128(fp128 %Val)
998
999define dso_local void @qpLRound(fp128* nocapture readonly %a, i32* nocapture %res) {
1000; CHECK-LABEL: qpLRound:
1001; CHECK:       # %bb.0: # %entry
1002; CHECK-NEXT:    mflr r0
1003; CHECK-NEXT:    .cfi_def_cfa_offset 48
1004; CHECK-NEXT:    .cfi_offset lr, 16
1005; CHECK-NEXT:    .cfi_offset r30, -16
1006; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1007; CHECK-NEXT:    std r0, 16(r1)
1008; CHECK-NEXT:    stdu r1, -48(r1)
1009; CHECK-NEXT:    lxv v2, 0(r3)
1010; CHECK-NEXT:    mr r30, r4
1011; CHECK-NEXT:    bl lroundf128
1012; CHECK-NEXT:    nop
1013; CHECK-NEXT:    stw r3, 0(r30)
1014; CHECK-NEXT:    addi r1, r1, 48
1015; CHECK-NEXT:    ld r0, 16(r1)
1016; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1017; CHECK-NEXT:    mtlr r0
1018; CHECK-NEXT:    blr
1019;
1020; CHECK-P8-LABEL: qpLRound:
1021; CHECK-P8:       # %bb.0: # %entry
1022; CHECK-P8-NEXT:    mflr r0
1023; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
1024; CHECK-P8-NEXT:    .cfi_offset lr, 16
1025; CHECK-P8-NEXT:    .cfi_offset r30, -16
1026; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1027; CHECK-P8-NEXT:    std r0, 16(r1)
1028; CHECK-P8-NEXT:    stdu r1, -48(r1)
1029; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
1030; CHECK-P8-NEXT:    mr r30, r4
1031; CHECK-P8-NEXT:    xxswapd v2, vs0
1032; CHECK-P8-NEXT:    bl lroundf128
1033; CHECK-P8-NEXT:    nop
1034; CHECK-P8-NEXT:    stw r3, 0(r30)
1035; CHECK-P8-NEXT:    addi r1, r1, 48
1036; CHECK-P8-NEXT:    ld r0, 16(r1)
1037; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1038; CHECK-P8-NEXT:    mtlr r0
1039; CHECK-P8-NEXT:    blr
1040entry:
1041  %0 = load fp128, fp128* %a, align 16
1042  %1 = tail call i32 @llvm.lround.f128(fp128 %0)
1043  store i32 %1, i32* %res, align 16
1044  ret void
1045}
1046declare i32 @llvm.lround.f128(fp128 %Val)
1047
1048define dso_local void @qpLLRound(fp128* nocapture readonly %a, i64* nocapture %res) {
1049; CHECK-LABEL: qpLLRound:
1050; CHECK:       # %bb.0: # %entry
1051; CHECK-NEXT:    mflr r0
1052; CHECK-NEXT:    .cfi_def_cfa_offset 48
1053; CHECK-NEXT:    .cfi_offset lr, 16
1054; CHECK-NEXT:    .cfi_offset r30, -16
1055; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1056; CHECK-NEXT:    std r0, 16(r1)
1057; CHECK-NEXT:    stdu r1, -48(r1)
1058; CHECK-NEXT:    lxv v2, 0(r3)
1059; CHECK-NEXT:    mr r30, r4
1060; CHECK-NEXT:    bl llroundf128
1061; CHECK-NEXT:    nop
1062; CHECK-NEXT:    std r3, 0(r30)
1063; CHECK-NEXT:    addi r1, r1, 48
1064; CHECK-NEXT:    ld r0, 16(r1)
1065; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1066; CHECK-NEXT:    mtlr r0
1067; CHECK-NEXT:    blr
1068;
1069; CHECK-P8-LABEL: qpLLRound:
1070; CHECK-P8:       # %bb.0: # %entry
1071; CHECK-P8-NEXT:    mflr r0
1072; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
1073; CHECK-P8-NEXT:    .cfi_offset lr, 16
1074; CHECK-P8-NEXT:    .cfi_offset r30, -16
1075; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1076; CHECK-P8-NEXT:    std r0, 16(r1)
1077; CHECK-P8-NEXT:    stdu r1, -48(r1)
1078; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
1079; CHECK-P8-NEXT:    mr r30, r4
1080; CHECK-P8-NEXT:    xxswapd v2, vs0
1081; CHECK-P8-NEXT:    bl llroundf128
1082; CHECK-P8-NEXT:    nop
1083; CHECK-P8-NEXT:    std r3, 0(r30)
1084; CHECK-P8-NEXT:    addi r1, r1, 48
1085; CHECK-P8-NEXT:    ld r0, 16(r1)
1086; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1087; CHECK-P8-NEXT:    mtlr r0
1088; CHECK-P8-NEXT:    blr
1089entry:
1090  %0 = load fp128, fp128* %a, align 16
1091  %1 = tail call i64 @llvm.llround.f128(fp128 %0)
1092  store i64 %1, i64* %res, align 16
1093  ret void
1094}
1095declare i64 @llvm.llround.f128(fp128 %Val)
1096
1097define dso_local void @qpRint(fp128* nocapture readonly %a, fp128* nocapture %res) {
1098; CHECK-LABEL: qpRint:
1099; CHECK:       # %bb.0: # %entry
1100; CHECK-NEXT:    lxv v2, 0(r3)
1101; CHECK-NEXT:    xsrqpix 0, v2, v2, 3
1102; CHECK-NEXT:    stxv v2, 0(r4)
1103; CHECK-NEXT:    blr
1104;
1105; CHECK-P8-LABEL: qpRint:
1106; CHECK-P8:       # %bb.0: # %entry
1107; CHECK-P8-NEXT:    mflr r0
1108; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
1109; CHECK-P8-NEXT:    .cfi_offset lr, 16
1110; CHECK-P8-NEXT:    .cfi_offset r30, -16
1111; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1112; CHECK-P8-NEXT:    std r0, 16(r1)
1113; CHECK-P8-NEXT:    stdu r1, -48(r1)
1114; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
1115; CHECK-P8-NEXT:    mr r30, r4
1116; CHECK-P8-NEXT:    xxswapd v2, vs0
1117; CHECK-P8-NEXT:    bl rintf128
1118; CHECK-P8-NEXT:    nop
1119; CHECK-P8-NEXT:    xxswapd vs0, v2
1120; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
1121; CHECK-P8-NEXT:    addi r1, r1, 48
1122; CHECK-P8-NEXT:    ld r0, 16(r1)
1123; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1124; CHECK-P8-NEXT:    mtlr r0
1125; CHECK-P8-NEXT:    blr
1126entry:
1127  %0 = load fp128, fp128* %a, align 16
1128  %1 = tail call fp128 @llvm.rint.f128(fp128 %0)
1129  store fp128 %1, fp128* %res, align 16
1130  ret void
1131}
1132declare fp128 @llvm.rint.f128(fp128 %Val)
1133
1134define dso_local void @qpLRint(fp128* nocapture readonly %a, i32* nocapture %res) {
1135; CHECK-LABEL: qpLRint:
1136; CHECK:       # %bb.0: # %entry
1137; CHECK-NEXT:    mflr r0
1138; CHECK-NEXT:    .cfi_def_cfa_offset 48
1139; CHECK-NEXT:    .cfi_offset lr, 16
1140; CHECK-NEXT:    .cfi_offset r30, -16
1141; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1142; CHECK-NEXT:    std r0, 16(r1)
1143; CHECK-NEXT:    stdu r1, -48(r1)
1144; CHECK-NEXT:    lxv v2, 0(r3)
1145; CHECK-NEXT:    mr r30, r4
1146; CHECK-NEXT:    bl lrintf128
1147; CHECK-NEXT:    nop
1148; CHECK-NEXT:    stw r3, 0(r30)
1149; CHECK-NEXT:    addi r1, r1, 48
1150; CHECK-NEXT:    ld r0, 16(r1)
1151; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1152; CHECK-NEXT:    mtlr r0
1153; CHECK-NEXT:    blr
1154;
1155; CHECK-P8-LABEL: qpLRint:
1156; CHECK-P8:       # %bb.0: # %entry
1157; CHECK-P8-NEXT:    mflr r0
1158; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
1159; CHECK-P8-NEXT:    .cfi_offset lr, 16
1160; CHECK-P8-NEXT:    .cfi_offset r30, -16
1161; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1162; CHECK-P8-NEXT:    std r0, 16(r1)
1163; CHECK-P8-NEXT:    stdu r1, -48(r1)
1164; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
1165; CHECK-P8-NEXT:    mr r30, r4
1166; CHECK-P8-NEXT:    xxswapd v2, vs0
1167; CHECK-P8-NEXT:    bl lrintf128
1168; CHECK-P8-NEXT:    nop
1169; CHECK-P8-NEXT:    stw r3, 0(r30)
1170; CHECK-P8-NEXT:    addi r1, r1, 48
1171; CHECK-P8-NEXT:    ld r0, 16(r1)
1172; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1173; CHECK-P8-NEXT:    mtlr r0
1174; CHECK-P8-NEXT:    blr
1175entry:
1176  %0 = load fp128, fp128* %a, align 16
1177  %1 = tail call i32 @llvm.lrint.f128(fp128 %0)
1178  store i32 %1, i32* %res, align 16
1179  ret void
1180}
1181declare i32 @llvm.lrint.f128(fp128 %Val)
1182
1183define dso_local void @qpLLRint(fp128* nocapture readonly %a, i64* nocapture %res) {
1184; CHECK-LABEL: qpLLRint:
1185; CHECK:       # %bb.0: # %entry
1186; CHECK-NEXT:    mflr r0
1187; CHECK-NEXT:    .cfi_def_cfa_offset 48
1188; CHECK-NEXT:    .cfi_offset lr, 16
1189; CHECK-NEXT:    .cfi_offset r30, -16
1190; CHECK-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1191; CHECK-NEXT:    std r0, 16(r1)
1192; CHECK-NEXT:    stdu r1, -48(r1)
1193; CHECK-NEXT:    lxv v2, 0(r3)
1194; CHECK-NEXT:    mr r30, r4
1195; CHECK-NEXT:    bl llrintf128
1196; CHECK-NEXT:    nop
1197; CHECK-NEXT:    std r3, 0(r30)
1198; CHECK-NEXT:    addi r1, r1, 48
1199; CHECK-NEXT:    ld r0, 16(r1)
1200; CHECK-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1201; CHECK-NEXT:    mtlr r0
1202; CHECK-NEXT:    blr
1203;
1204; CHECK-P8-LABEL: qpLLRint:
1205; CHECK-P8:       # %bb.0: # %entry
1206; CHECK-P8-NEXT:    mflr r0
1207; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
1208; CHECK-P8-NEXT:    .cfi_offset lr, 16
1209; CHECK-P8-NEXT:    .cfi_offset r30, -16
1210; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1211; CHECK-P8-NEXT:    std r0, 16(r1)
1212; CHECK-P8-NEXT:    stdu r1, -48(r1)
1213; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
1214; CHECK-P8-NEXT:    mr r30, r4
1215; CHECK-P8-NEXT:    xxswapd v2, vs0
1216; CHECK-P8-NEXT:    bl llrintf128
1217; CHECK-P8-NEXT:    nop
1218; CHECK-P8-NEXT:    std r3, 0(r30)
1219; CHECK-P8-NEXT:    addi r1, r1, 48
1220; CHECK-P8-NEXT:    ld r0, 16(r1)
1221; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1222; CHECK-P8-NEXT:    mtlr r0
1223; CHECK-P8-NEXT:    blr
1224entry:
1225  %0 = load fp128, fp128* %a, align 16
1226  %1 = tail call i64 @llvm.llrint.f128(fp128 %0)
1227  store i64 %1, i64* %res, align 16
1228  ret void
1229}
1230declare i64 @llvm.llrint.f128(fp128 %Val)
1231
1232define dso_local void @qpNearByInt(fp128* nocapture readonly %a, fp128* nocapture %res) {
1233; CHECK-LABEL: qpNearByInt:
1234; CHECK:       # %bb.0: # %entry
1235; CHECK-NEXT:    lxv v2, 0(r3)
1236; CHECK-NEXT:    xsrqpi 0, v2, v2, 3
1237; CHECK-NEXT:    stxv v2, 0(r4)
1238; CHECK-NEXT:    blr
1239;
1240; CHECK-P8-LABEL: qpNearByInt:
1241; CHECK-P8:       # %bb.0: # %entry
1242; CHECK-P8-NEXT:    mflr r0
1243; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
1244; CHECK-P8-NEXT:    .cfi_offset lr, 16
1245; CHECK-P8-NEXT:    .cfi_offset r30, -16
1246; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1247; CHECK-P8-NEXT:    std r0, 16(r1)
1248; CHECK-P8-NEXT:    stdu r1, -48(r1)
1249; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
1250; CHECK-P8-NEXT:    mr r30, r4
1251; CHECK-P8-NEXT:    xxswapd v2, vs0
1252; CHECK-P8-NEXT:    bl nearbyintf128
1253; CHECK-P8-NEXT:    nop
1254; CHECK-P8-NEXT:    xxswapd vs0, v2
1255; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
1256; CHECK-P8-NEXT:    addi r1, r1, 48
1257; CHECK-P8-NEXT:    ld r0, 16(r1)
1258; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1259; CHECK-P8-NEXT:    mtlr r0
1260; CHECK-P8-NEXT:    blr
1261entry:
1262  %0 = load fp128, fp128* %a, align 16
1263  %1 = tail call fp128 @llvm.nearbyint.f128(fp128 %0)
1264  store fp128 %1, fp128* %res, align 16
1265  ret void
1266}
1267declare fp128 @llvm.nearbyint.f128(fp128 %Val)
1268
1269define dso_local void @qpFMA(fp128* %a, fp128* %b, fp128* %c, fp128* %res) {
1270; CHECK-LABEL: qpFMA:
1271; CHECK:       # %bb.0: # %entry
1272; CHECK-NEXT:    lxv v2, 0(r3)
1273; CHECK-NEXT:    lxv v3, 0(r4)
1274; CHECK-NEXT:    lxv v4, 0(r5)
1275; CHECK-NEXT:    xsmaddqp v4, v2, v3
1276; CHECK-NEXT:    stxv v4, 0(r6)
1277; CHECK-NEXT:    blr
1278;
1279; CHECK-P8-LABEL: qpFMA:
1280; CHECK-P8:       # %bb.0: # %entry
1281; CHECK-P8-NEXT:    mflr r0
1282; CHECK-P8-NEXT:    .cfi_def_cfa_offset 48
1283; CHECK-P8-NEXT:    .cfi_offset lr, 16
1284; CHECK-P8-NEXT:    .cfi_offset r30, -16
1285; CHECK-P8-NEXT:    std r30, -16(r1) # 8-byte Folded Spill
1286; CHECK-P8-NEXT:    std r0, 16(r1)
1287; CHECK-P8-NEXT:    stdu r1, -48(r1)
1288; CHECK-P8-NEXT:    lxvd2x vs0, 0, r3
1289; CHECK-P8-NEXT:    lxvd2x vs1, 0, r4
1290; CHECK-P8-NEXT:    mr r30, r6
1291; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
1292; CHECK-P8-NEXT:    xxswapd v2, vs0
1293; CHECK-P8-NEXT:    xxswapd v3, vs1
1294; CHECK-P8-NEXT:    xxswapd v4, vs2
1295; CHECK-P8-NEXT:    bl fmaf128
1296; CHECK-P8-NEXT:    nop
1297; CHECK-P8-NEXT:    xxswapd vs0, v2
1298; CHECK-P8-NEXT:    stxvd2x vs0, 0, r30
1299; CHECK-P8-NEXT:    addi r1, r1, 48
1300; CHECK-P8-NEXT:    ld r0, 16(r1)
1301; CHECK-P8-NEXT:    ld r30, -16(r1) # 8-byte Folded Reload
1302; CHECK-P8-NEXT:    mtlr r0
1303; CHECK-P8-NEXT:    blr
1304entry:
1305  %0 = load fp128, fp128* %a, align 16
1306  %1 = load fp128, fp128* %b, align 16
1307  %2 = load fp128, fp128* %c, align 16
1308  %3 = tail call fp128 @llvm.fma.f128(fp128 %0, fp128 %1, fp128 %2)
1309  store fp128 %3, fp128* %res, align 16
1310  ret void
1311}
1312declare fp128 @llvm.fma.f128(fp128, fp128, fp128)
1313