1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names \
4; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s
5; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
6; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names \
7; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
8; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
9; RUN:   -mcpu=pwr9 -ppc-asm-full-reg-names \
10; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-P9
11
12; Byte indexed
13
14define <16 x i8> @testByte(<16 x i8> %a, i64 %b, i64 %idx) {
15; CHECK-LABEL: testByte:
16; CHECK:       # %bb.0: # %entry
17; CHECK-NEXT:    vinsbrx v2, r6, r5
18; CHECK-NEXT:    blr
19;
20; CHECK-BE-LABEL: testByte:
21; CHECK-BE:       # %bb.0: # %entry
22; CHECK-BE-NEXT:    vinsblx v2, r6, r5
23; CHECK-BE-NEXT:    blr
24;
25; CHECK-P9-LABEL: testByte:
26; CHECK-P9:       # %bb.0: # %entry
27; CHECK-P9-NEXT:    addi r4, r1, -16
28; CHECK-P9-NEXT:    clrldi r3, r6, 60
29; CHECK-P9-NEXT:    stxv v2, -16(r1)
30; CHECK-P9-NEXT:    stbx r5, r4, r3
31; CHECK-P9-NEXT:    lxv v2, -16(r1)
32; CHECK-P9-NEXT:    blr
33entry:
34  %conv = trunc i64 %b to i8
35  %vecins = insertelement <16 x i8> %a, i8 %conv, i64 %idx
36  ret <16 x i8> %vecins
37}
38
39; Halfword indexed
40
41define <8 x i16> @testHalf(<8 x i16> %a, i64 %b, i64 %idx) {
42; CHECK-LABEL: testHalf:
43; CHECK:       # %bb.0: # %entry
44; CHECK-NEXT:    slwi r3, r6, 1
45; CHECK-NEXT:    vinshrx v2, r3, r5
46; CHECK-NEXT:    blr
47;
48; CHECK-BE-LABEL: testHalf:
49; CHECK-BE:       # %bb.0: # %entry
50; CHECK-BE-NEXT:    slwi r3, r6, 1
51; CHECK-BE-NEXT:    vinshlx v2, r3, r5
52; CHECK-BE-NEXT:    blr
53;
54; CHECK-P9-LABEL: testHalf:
55; CHECK-P9:       # %bb.0: # %entry
56; CHECK-P9-NEXT:    addi r4, r1, -16
57; CHECK-P9-NEXT:    rlwinm r3, r6, 1, 28, 30
58; CHECK-P9-NEXT:    stxv v2, -16(r1)
59; CHECK-P9-NEXT:    sthx r5, r4, r3
60; CHECK-P9-NEXT:    lxv v2, -16(r1)
61; CHECK-P9-NEXT:    blr
62entry:
63  %conv = trunc i64 %b to i16
64  %vecins = insertelement <8 x i16> %a, i16 %conv, i64 %idx
65  ret <8 x i16> %vecins
66}
67
68; Word indexed
69
70define <4 x i32> @testWord(<4 x i32> %a, i64 %b, i64 %idx) {
71; CHECK-LABEL: testWord:
72; CHECK:       # %bb.0: # %entry
73; CHECK-NEXT:    slwi r3, r6, 2
74; CHECK-NEXT:    vinswrx v2, r3, r5
75; CHECK-NEXT:    blr
76;
77; CHECK-BE-LABEL: testWord:
78; CHECK-BE:       # %bb.0: # %entry
79; CHECK-BE-NEXT:    slwi r3, r6, 2
80; CHECK-BE-NEXT:    vinswlx v2, r3, r5
81; CHECK-BE-NEXT:    blr
82;
83; CHECK-P9-LABEL: testWord:
84; CHECK-P9:       # %bb.0: # %entry
85; CHECK-P9-NEXT:    addi r4, r1, -16
86; CHECK-P9-NEXT:    rlwinm r3, r6, 2, 28, 29
87; CHECK-P9-NEXT:    stxv v2, -16(r1)
88; CHECK-P9-NEXT:    stwx r5, r4, r3
89; CHECK-P9-NEXT:    lxv v2, -16(r1)
90; CHECK-P9-NEXT:    blr
91entry:
92  %conv = trunc i64 %b to i32
93  %vecins = insertelement <4 x i32> %a, i32 %conv, i64 %idx
94  ret <4 x i32> %vecins
95}
96
97; Word immediate
98
99define <4 x i32> @testWordImm(<4 x i32> %a, i64 %b) {
100; CHECK-LABEL: testWordImm:
101; CHECK:       # %bb.0: # %entry
102; CHECK-NEXT:    vinsw v2, r5, 8
103; CHECK-NEXT:    vinsw v2, r5, 0
104; CHECK-NEXT:    blr
105;
106; CHECK-BE-LABEL: testWordImm:
107; CHECK-BE:       # %bb.0: # %entry
108; CHECK-BE-NEXT:    vinsw v2, r5, 4
109; CHECK-BE-NEXT:    vinsw v2, r5, 12
110; CHECK-BE-NEXT:    blr
111;
112; CHECK-P9-LABEL: testWordImm:
113; CHECK-P9:       # %bb.0: # %entry
114; CHECK-P9-NEXT:    mtfprwz f0, r5
115; CHECK-P9-NEXT:    xxinsertw v2, vs0, 4
116; CHECK-P9-NEXT:    xxinsertw v2, vs0, 12
117; CHECK-P9-NEXT:    blr
118entry:
119  %conv = trunc i64 %b to i32
120  %vecins = insertelement <4 x i32> %a, i32 %conv, i32 1
121  %vecins2 = insertelement <4 x i32> %vecins, i32 %conv, i32 3
122  ret <4 x i32> %vecins2
123}
124
125; Doubleword indexed
126
127define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) {
128; CHECK-LABEL: testDoubleword:
129; CHECK:       # %bb.0: # %entry
130; CHECK-NEXT:    rlwinm r3, r6, 3, 0, 28
131; CHECK-NEXT:    vinsdrx v2, r3, r5
132; CHECK-NEXT:    blr
133;
134; CHECK-BE-LABEL: testDoubleword:
135; CHECK-BE:       # %bb.0: # %entry
136; CHECK-BE-NEXT:    rlwinm r3, r6, 3, 0, 28
137; CHECK-BE-NEXT:    vinsdlx v2, r3, r5
138; CHECK-BE-NEXT:    blr
139;
140; CHECK-P9-LABEL: testDoubleword:
141; CHECK-P9:       # %bb.0: # %entry
142; CHECK-P9-NEXT:    addi r4, r1, -16
143; CHECK-P9-NEXT:    rlwinm r3, r6, 3, 28, 28
144; CHECK-P9-NEXT:    stxv v2, -16(r1)
145; CHECK-P9-NEXT:    stdx r5, r4, r3
146; CHECK-P9-NEXT:    lxv v2, -16(r1)
147; CHECK-P9-NEXT:    blr
148entry:
149  %vecins = insertelement <2 x i64> %a, i64 %b, i64 %idx
150  ret <2 x i64> %vecins
151}
152
153; Doubleword immediate
154
155define <2 x i64> @testDoublewordImm(<2 x i64> %a, i64 %b) {
156; CHECK-LABEL: testDoublewordImm:
157; CHECK:       # %bb.0: # %entry
158; CHECK-NEXT:    vinsd v2, r5, 0
159; CHECK-NEXT:    blr
160;
161; CHECK-BE-LABEL: testDoublewordImm:
162; CHECK-BE:       # %bb.0: # %entry
163; CHECK-BE-NEXT:    vinsd v2, r5, 8
164; CHECK-BE-NEXT:    blr
165;
166; CHECK-P9-LABEL: testDoublewordImm:
167; CHECK-P9:       # %bb.0: # %entry
168; CHECK-P9-NEXT:    mtfprd f0, r5
169; CHECK-P9-NEXT:    xxmrghd v2, v2, vs0
170; CHECK-P9-NEXT:    blr
171entry:
172  %vecins = insertelement <2 x i64> %a, i64 %b, i32 1
173  ret <2 x i64> %vecins
174}
175
176define <2 x i64> @testDoublewordImm2(<2 x i64> %a, i64 %b) {
177; CHECK-LABEL: testDoublewordImm2:
178; CHECK:       # %bb.0: # %entry
179; CHECK-NEXT:    vinsd v2, r5, 8
180; CHECK-NEXT:    blr
181;
182; CHECK-BE-LABEL: testDoublewordImm2:
183; CHECK-BE:       # %bb.0: # %entry
184; CHECK-BE-NEXT:    vinsd v2, r5, 0
185; CHECK-BE-NEXT:    blr
186;
187; CHECK-P9-LABEL: testDoublewordImm2:
188; CHECK-P9:       # %bb.0: # %entry
189; CHECK-P9-NEXT:    mtfprd f0, r5
190; CHECK-P9-NEXT:    xxpermdi v2, vs0, v2, 1
191; CHECK-P9-NEXT:    blr
192entry:
193  %vecins = insertelement <2 x i64> %a, i64 %b, i32 0
194  ret <2 x i64> %vecins
195}
196
197; Float indexed
198
199define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) {
200; CHECK-LABEL: testFloat1:
201; CHECK:       # %bb.0: # %entry
202; CHECK-NEXT:    xscvdpspn vs0, f1
203; CHECK-NEXT:    extsw r4, r6
204; CHECK-NEXT:    slwi r4, r4, 2
205; CHECK-NEXT:    mffprwz r3, f0
206; CHECK-NEXT:    vinswrx v2, r4, r3
207; CHECK-NEXT:    blr
208;
209; CHECK-BE-LABEL: testFloat1:
210; CHECK-BE:       # %bb.0: # %entry
211; CHECK-BE-NEXT:    xscvdpspn vs0, f1
212; CHECK-BE-NEXT:    extsw r4, r6
213; CHECK-BE-NEXT:    slwi r4, r4, 2
214; CHECK-BE-NEXT:    mffprwz r3, f0
215; CHECK-BE-NEXT:    vinswlx v2, r4, r3
216; CHECK-BE-NEXT:    blr
217;
218; CHECK-P9-LABEL: testFloat1:
219; CHECK-P9:       # %bb.0: # %entry
220; CHECK-P9-NEXT:    addi r4, r1, -16
221; CHECK-P9-NEXT:    rlwinm r3, r6, 2, 28, 29
222; CHECK-P9-NEXT:    stxv v2, -16(r1)
223; CHECK-P9-NEXT:    stfsx f1, r4, r3
224; CHECK-P9-NEXT:    lxv v2, -16(r1)
225; CHECK-P9-NEXT:    blr
226entry:
227  %vecins = insertelement <4 x float> %a, float %b, i32 %idx1
228  ret <4 x float> %vecins
229}
230
231define <4 x float> @testFloat2(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
232; CHECK-LABEL: testFloat2:
233; CHECK:       # %bb.0: # %entry
234; CHECK-NEXT:    lwz r3, 0(r5)
235; CHECK-NEXT:    extsw r4, r6
236; CHECK-NEXT:    slwi r4, r4, 2
237; CHECK-NEXT:    vinswrx v2, r4, r3
238; CHECK-NEXT:    lwz r3, 1(r5)
239; CHECK-NEXT:    extsw r4, r7
240; CHECK-NEXT:    slwi r4, r4, 2
241; CHECK-NEXT:    vinswrx v2, r4, r3
242; CHECK-NEXT:    blr
243;
244; CHECK-BE-LABEL: testFloat2:
245; CHECK-BE:       # %bb.0: # %entry
246; CHECK-BE-NEXT:    lwz r3, 0(r5)
247; CHECK-BE-NEXT:    extsw r4, r6
248; CHECK-BE-NEXT:    slwi r4, r4, 2
249; CHECK-BE-NEXT:    vinswlx v2, r4, r3
250; CHECK-BE-NEXT:    lwz r3, 1(r5)
251; CHECK-BE-NEXT:    extsw r4, r7
252; CHECK-BE-NEXT:    slwi r4, r4, 2
253; CHECK-BE-NEXT:    vinswlx v2, r4, r3
254; CHECK-BE-NEXT:    blr
255;
256; CHECK-P9-LABEL: testFloat2:
257; CHECK-P9:       # %bb.0: # %entry
258; CHECK-P9-NEXT:    lwz r3, 0(r5)
259; CHECK-P9-NEXT:    rlwinm r4, r6, 2, 28, 29
260; CHECK-P9-NEXT:    addi r6, r1, -32
261; CHECK-P9-NEXT:    stxv v2, -32(r1)
262; CHECK-P9-NEXT:    stwx r3, r6, r4
263; CHECK-P9-NEXT:    rlwinm r4, r7, 2, 28, 29
264; CHECK-P9-NEXT:    lxv vs0, -32(r1)
265; CHECK-P9-NEXT:    lwz r3, 1(r5)
266; CHECK-P9-NEXT:    addi r5, r1, -16
267; CHECK-P9-NEXT:    stxv vs0, -16(r1)
268; CHECK-P9-NEXT:    stwx r3, r5, r4
269; CHECK-P9-NEXT:    lxv v2, -16(r1)
270; CHECK-P9-NEXT:    blr
271entry:
272  %0 = bitcast i8* %b to float*
273  %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 1
274  %1 = bitcast i8* %add.ptr1 to float*
275  %2 = load float, float* %0, align 4
276  %vecins = insertelement <4 x float> %a, float %2, i32 %idx1
277  %3 = load float, float* %1, align 4
278  %vecins2 = insertelement <4 x float> %vecins, float %3, i32 %idx2
279  ret <4 x float> %vecins2
280}
281
282define <4 x float> @testFloat3(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
283; CHECK-LABEL: testFloat3:
284; CHECK:       # %bb.0: # %entry
285; CHECK-NEXT:    plwz r3, 65536(r5), 0
286; CHECK-NEXT:    extsw r4, r6
287; CHECK-NEXT:    slwi r4, r4, 2
288; CHECK-NEXT:    vinswrx v2, r4, r3
289; CHECK-NEXT:    li r3, 1
290; CHECK-NEXT:    extsw r4, r7
291; CHECK-NEXT:    rldic r3, r3, 36, 27
292; CHECK-NEXT:    slwi r4, r4, 2
293; CHECK-NEXT:    lwzx r3, r5, r3
294; CHECK-NEXT:    vinswrx v2, r4, r3
295; CHECK-NEXT:    blr
296;
297; CHECK-BE-LABEL: testFloat3:
298; CHECK-BE:       # %bb.0: # %entry
299; CHECK-BE-NEXT:    plwz r3, 65536(r5), 0
300; CHECK-BE-NEXT:    extsw r4, r6
301; CHECK-BE-NEXT:    slwi r4, r4, 2
302; CHECK-BE-NEXT:    vinswlx v2, r4, r3
303; CHECK-BE-NEXT:    li r3, 1
304; CHECK-BE-NEXT:    extsw r4, r7
305; CHECK-BE-NEXT:    rldic r3, r3, 36, 27
306; CHECK-BE-NEXT:    slwi r4, r4, 2
307; CHECK-BE-NEXT:    lwzx r3, r5, r3
308; CHECK-BE-NEXT:    vinswlx v2, r4, r3
309; CHECK-BE-NEXT:    blr
310;
311; CHECK-P9-LABEL: testFloat3:
312; CHECK-P9:       # %bb.0: # %entry
313; CHECK-P9-NEXT:    lis r3, 1
314; CHECK-P9-NEXT:    rlwinm r4, r6, 2, 28, 29
315; CHECK-P9-NEXT:    addi r6, r1, -32
316; CHECK-P9-NEXT:    lwzx r3, r5, r3
317; CHECK-P9-NEXT:    stxv v2, -32(r1)
318; CHECK-P9-NEXT:    stwx r3, r6, r4
319; CHECK-P9-NEXT:    li r3, 1
320; CHECK-P9-NEXT:    rlwinm r4, r7, 2, 28, 29
321; CHECK-P9-NEXT:    lxv vs0, -32(r1)
322; CHECK-P9-NEXT:    rldic r3, r3, 36, 27
323; CHECK-P9-NEXT:    lwzx r3, r5, r3
324; CHECK-P9-NEXT:    addi r5, r1, -16
325; CHECK-P9-NEXT:    stxv vs0, -16(r1)
326; CHECK-P9-NEXT:    stwx r3, r5, r4
327; CHECK-P9-NEXT:    lxv v2, -16(r1)
328; CHECK-P9-NEXT:    blr
329entry:
330  %add.ptr = getelementptr inbounds i8, i8* %b, i64 65536
331  %0 = bitcast i8* %add.ptr to float*
332  %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 68719476736
333  %1 = bitcast i8* %add.ptr1 to float*
334  %2 = load float, float* %0, align 4
335  %vecins = insertelement <4 x float> %a, float %2, i32 %idx1
336  %3 = load float, float* %1, align 4
337  %vecins2 = insertelement <4 x float> %vecins, float %3, i32 %idx2
338  ret <4 x float> %vecins2
339}
340
341; Float immediate
342
343define <4 x float> @testFloatImm1(<4 x float> %a, float %b) {
344; CHECK-LABEL: testFloatImm1:
345; CHECK:       # %bb.0: # %entry
346; CHECK-NEXT:    xscvdpspn vs0, f1
347; CHECK-NEXT:    xxinsertw v2, vs0, 12
348; CHECK-NEXT:    xxinsertw v2, vs0, 4
349; CHECK-NEXT:    blr
350;
351; CHECK-BE-LABEL: testFloatImm1:
352; CHECK-BE:       # %bb.0: # %entry
353; CHECK-BE-NEXT:    xscvdpspn vs0, f1
354; CHECK-BE-NEXT:    xxinsertw v2, vs0, 0
355; CHECK-BE-NEXT:    xxinsertw v2, vs0, 8
356; CHECK-BE-NEXT:    blr
357;
358; CHECK-P9-LABEL: testFloatImm1:
359; CHECK-P9:       # %bb.0: # %entry
360; CHECK-P9-NEXT:    xscvdpspn vs0, f1
361; CHECK-P9-NEXT:    xxinsertw v2, vs0, 0
362; CHECK-P9-NEXT:    xxinsertw v2, vs0, 8
363; CHECK-P9-NEXT:    blr
364entry:
365  %vecins = insertelement <4 x float> %a, float %b, i32 0
366  %vecins1 = insertelement <4 x float> %vecins, float %b, i32 2
367  ret <4 x float> %vecins1
368}
369
370define <4 x float> @testFloatImm2(<4 x float> %a, i32* %b) {
371; CHECK-LABEL: testFloatImm2:
372; CHECK:       # %bb.0: # %entry
373; CHECK-NEXT:    lwz r3, 0(r5)
374; CHECK-NEXT:    vinsw v2, r3, 12
375; CHECK-NEXT:    lwz r3, 4(r5)
376; CHECK-NEXT:    vinsw v2, r3, 4
377; CHECK-NEXT:    blr
378;
379; CHECK-BE-LABEL: testFloatImm2:
380; CHECK-BE:       # %bb.0: # %entry
381; CHECK-BE-NEXT:    lwz r3, 0(r5)
382; CHECK-BE-NEXT:    vinsw v2, r3, 0
383; CHECK-BE-NEXT:    lwz r3, 4(r5)
384; CHECK-BE-NEXT:    vinsw v2, r3, 8
385; CHECK-BE-NEXT:    blr
386;
387; CHECK-P9-LABEL: testFloatImm2:
388; CHECK-P9:       # %bb.0: # %entry
389; CHECK-P9-NEXT:    lfs f0, 0(r5)
390; CHECK-P9-NEXT:    xscvdpspn vs0, f0
391; CHECK-P9-NEXT:    xxinsertw v2, vs0, 0
392; CHECK-P9-NEXT:    lfs f0, 4(r5)
393; CHECK-P9-NEXT:    xscvdpspn vs0, f0
394; CHECK-P9-NEXT:    xxinsertw v2, vs0, 8
395; CHECK-P9-NEXT:    blr
396entry:
397  %0 = bitcast i32* %b to float*
398  %add.ptr1 = getelementptr inbounds i32, i32* %b, i64 1
399  %1 = bitcast i32* %add.ptr1 to float*
400  %2 = load float, float* %0, align 4
401  %vecins = insertelement <4 x float> %a, float %2, i32 0
402  %3 = load float, float* %1, align 4
403  %vecins2 = insertelement <4 x float> %vecins, float %3, i32 2
404  ret <4 x float> %vecins2
405}
406
407define <4 x float> @testFloatImm3(<4 x float> %a, i32* %b) {
408; CHECK-LABEL: testFloatImm3:
409; CHECK:       # %bb.0: # %entry
410; CHECK-NEXT:    plwz r3, 262144(r5), 0
411; CHECK-NEXT:    vinsw v2, r3, 12
412; CHECK-NEXT:    li r3, 1
413; CHECK-NEXT:    rldic r3, r3, 38, 25
414; CHECK-NEXT:    lwzx r3, r5, r3
415; CHECK-NEXT:    vinsw v2, r3, 4
416; CHECK-NEXT:    blr
417;
418; CHECK-BE-LABEL: testFloatImm3:
419; CHECK-BE:       # %bb.0: # %entry
420; CHECK-BE-NEXT:    plwz r3, 262144(r5), 0
421; CHECK-BE-NEXT:    vinsw v2, r3, 0
422; CHECK-BE-NEXT:    li r3, 1
423; CHECK-BE-NEXT:    rldic r3, r3, 38, 25
424; CHECK-BE-NEXT:    lwzx r3, r5, r3
425; CHECK-BE-NEXT:    vinsw v2, r3, 8
426; CHECK-BE-NEXT:    blr
427;
428; CHECK-P9-LABEL: testFloatImm3:
429; CHECK-P9:       # %bb.0: # %entry
430; CHECK-P9-NEXT:    lis r3, 4
431; CHECK-P9-NEXT:    lfsx f0, r5, r3
432; CHECK-P9-NEXT:    li r3, 1
433; CHECK-P9-NEXT:    rldic r3, r3, 38, 25
434; CHECK-P9-NEXT:    xscvdpspn vs0, f0
435; CHECK-P9-NEXT:    xxinsertw v2, vs0, 0
436; CHECK-P9-NEXT:    lfsx f0, r5, r3
437; CHECK-P9-NEXT:    xscvdpspn vs0, f0
438; CHECK-P9-NEXT:    xxinsertw v2, vs0, 8
439; CHECK-P9-NEXT:    blr
440entry:
441  %add.ptr = getelementptr inbounds i32, i32* %b, i64 65536
442  %0 = bitcast i32* %add.ptr to float*
443  %add.ptr1 = getelementptr inbounds i32, i32* %b, i64 68719476736
444  %1 = bitcast i32* %add.ptr1 to float*
445  %2 = load float, float* %0, align 4
446  %vecins = insertelement <4 x float> %a, float %2, i32 0
447  %3 = load float, float* %1, align 4
448  %vecins2 = insertelement <4 x float> %vecins, float %3, i32 2
449  ret <4 x float> %vecins2
450}
451
452; Double indexed
453
454define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1) {
455; CHECK-LABEL: testDouble1:
456; CHECK:       # %bb.0: # %entry
457; CHECK-NEXT:    extsw r4, r6
458; CHECK-NEXT:    mffprd r3, f1
459; CHECK-NEXT:    rlwinm r4, r4, 3, 0, 28
460; CHECK-NEXT:    vinsdrx v2, r4, r3
461; CHECK-NEXT:    blr
462;
463; CHECK-BE-LABEL: testDouble1:
464; CHECK-BE:       # %bb.0: # %entry
465; CHECK-BE-NEXT:    extsw r4, r6
466; CHECK-BE-NEXT:    mffprd r3, f1
467; CHECK-BE-NEXT:    rlwinm r4, r4, 3, 0, 28
468; CHECK-BE-NEXT:    vinsdlx v2, r4, r3
469; CHECK-BE-NEXT:    blr
470;
471; CHECK-P9-LABEL: testDouble1:
472; CHECK-P9:       # %bb.0: # %entry
473; CHECK-P9-NEXT:    addi r4, r1, -16
474; CHECK-P9-NEXT:    rlwinm r3, r6, 3, 28, 28
475; CHECK-P9-NEXT:    stxv v2, -16(r1)
476; CHECK-P9-NEXT:    stfdx f1, r4, r3
477; CHECK-P9-NEXT:    lxv v2, -16(r1)
478; CHECK-P9-NEXT:    blr
479entry:
480  %vecins = insertelement <2 x double> %a, double %b, i32 %idx1
481  ret <2 x double> %vecins
482}
483
484define <2 x double> @testDouble2(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
485; CHECK-LABEL: testDouble2:
486; CHECK:       # %bb.0: # %entry
487; CHECK-NEXT:    ld r3, 0(r5)
488; CHECK-NEXT:    extsw r4, r6
489; CHECK-NEXT:    rlwinm r4, r4, 3, 0, 28
490; CHECK-NEXT:    vinsdrx v2, r4, r3
491; CHECK-NEXT:    pld r3, 1(r5), 0
492; CHECK-NEXT:    extsw r4, r7
493; CHECK-NEXT:    rlwinm r4, r4, 3, 0, 28
494; CHECK-NEXT:    vinsdrx v2, r4, r3
495; CHECK-NEXT:    blr
496;
497; CHECK-BE-LABEL: testDouble2:
498; CHECK-BE:       # %bb.0: # %entry
499; CHECK-BE-NEXT:    ld r3, 0(r5)
500; CHECK-BE-NEXT:    extsw r4, r6
501; CHECK-BE-NEXT:    rlwinm r4, r4, 3, 0, 28
502; CHECK-BE-NEXT:    vinsdlx v2, r4, r3
503; CHECK-BE-NEXT:    pld r3, 1(r5), 0
504; CHECK-BE-NEXT:    extsw r4, r7
505; CHECK-BE-NEXT:    rlwinm r4, r4, 3, 0, 28
506; CHECK-BE-NEXT:    vinsdlx v2, r4, r3
507; CHECK-BE-NEXT:    blr
508;
509; CHECK-P9-LABEL: testDouble2:
510; CHECK-P9:       # %bb.0: # %entry
511; CHECK-P9-NEXT:    ld r3, 0(r5)
512; CHECK-P9-NEXT:    rlwinm r4, r6, 3, 28, 28
513; CHECK-P9-NEXT:    addi r6, r1, -32
514; CHECK-P9-NEXT:    stxv v2, -32(r1)
515; CHECK-P9-NEXT:    stdx r3, r6, r4
516; CHECK-P9-NEXT:    li r3, 1
517; CHECK-P9-NEXT:    rlwinm r4, r7, 3, 28, 28
518; CHECK-P9-NEXT:    lxv vs0, -32(r1)
519; CHECK-P9-NEXT:    ldx r3, r5, r3
520; CHECK-P9-NEXT:    addi r5, r1, -16
521; CHECK-P9-NEXT:    stxv vs0, -16(r1)
522; CHECK-P9-NEXT:    stdx r3, r5, r4
523; CHECK-P9-NEXT:    lxv v2, -16(r1)
524; CHECK-P9-NEXT:    blr
525entry:
526  %0 = bitcast i8* %b to double*
527  %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 1
528  %1 = bitcast i8* %add.ptr1 to double*
529  %2 = load double, double* %0, align 8
530  %vecins = insertelement <2 x double> %a, double %2, i32 %idx1
531  %3 = load double, double* %1, align 8
532  %vecins2 = insertelement <2 x double> %vecins, double %3, i32 %idx2
533  ret <2 x double> %vecins2
534}
535
536define <2 x double> @testDouble3(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
537; CHECK-LABEL: testDouble3:
538; CHECK:       # %bb.0: # %entry
539; CHECK-NEXT:    pld r3, 65536(r5), 0
540; CHECK-NEXT:    extsw r4, r6
541; CHECK-NEXT:    rlwinm r4, r4, 3, 0, 28
542; CHECK-NEXT:    vinsdrx v2, r4, r3
543; CHECK-NEXT:    li r3, 1
544; CHECK-NEXT:    extsw r4, r7
545; CHECK-NEXT:    rldic r3, r3, 36, 27
546; CHECK-NEXT:    rlwinm r4, r4, 3, 0, 28
547; CHECK-NEXT:    ldx r3, r5, r3
548; CHECK-NEXT:    vinsdrx v2, r4, r3
549; CHECK-NEXT:    blr
550;
551; CHECK-BE-LABEL: testDouble3:
552; CHECK-BE:       # %bb.0: # %entry
553; CHECK-BE-NEXT:    pld r3, 65536(r5), 0
554; CHECK-BE-NEXT:    extsw r4, r6
555; CHECK-BE-NEXT:    rlwinm r4, r4, 3, 0, 28
556; CHECK-BE-NEXT:    vinsdlx v2, r4, r3
557; CHECK-BE-NEXT:    li r3, 1
558; CHECK-BE-NEXT:    extsw r4, r7
559; CHECK-BE-NEXT:    rldic r3, r3, 36, 27
560; CHECK-BE-NEXT:    rlwinm r4, r4, 3, 0, 28
561; CHECK-BE-NEXT:    ldx r3, r5, r3
562; CHECK-BE-NEXT:    vinsdlx v2, r4, r3
563; CHECK-BE-NEXT:    blr
564;
565; CHECK-P9-LABEL: testDouble3:
566; CHECK-P9:       # %bb.0: # %entry
567; CHECK-P9-NEXT:    lis r3, 1
568; CHECK-P9-NEXT:    rlwinm r4, r6, 3, 28, 28
569; CHECK-P9-NEXT:    addi r6, r1, -32
570; CHECK-P9-NEXT:    ldx r3, r5, r3
571; CHECK-P9-NEXT:    stxv v2, -32(r1)
572; CHECK-P9-NEXT:    stdx r3, r6, r4
573; CHECK-P9-NEXT:    li r3, 1
574; CHECK-P9-NEXT:    rlwinm r4, r7, 3, 28, 28
575; CHECK-P9-NEXT:    lxv vs0, -32(r1)
576; CHECK-P9-NEXT:    rldic r3, r3, 36, 27
577; CHECK-P9-NEXT:    ldx r3, r5, r3
578; CHECK-P9-NEXT:    addi r5, r1, -16
579; CHECK-P9-NEXT:    stxv vs0, -16(r1)
580; CHECK-P9-NEXT:    stdx r3, r5, r4
581; CHECK-P9-NEXT:    lxv v2, -16(r1)
582; CHECK-P9-NEXT:    blr
583entry:
584  %add.ptr = getelementptr inbounds i8, i8* %b, i64 65536
585  %0 = bitcast i8* %add.ptr to double*
586  %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 68719476736
587  %1 = bitcast i8* %add.ptr1 to double*
588  %2 = load double, double* %0, align 8
589  %vecins = insertelement <2 x double> %a, double %2, i32 %idx1
590  %3 = load double, double* %1, align 8
591  %vecins2 = insertelement <2 x double> %vecins, double %3, i32 %idx2
592  ret <2 x double> %vecins2
593}
594
595; Double immediate
596
597define <2 x double> @testDoubleImm1(<2 x double> %a, double %b) {
598; CHECK-LABEL: testDoubleImm1:
599; CHECK:       # %bb.0: # %entry
600; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
601; CHECK-NEXT:    xxmrghd v2, v2, vs1
602; CHECK-NEXT:    blr
603;
604; CHECK-BE-LABEL: testDoubleImm1:
605; CHECK-BE:       # %bb.0: # %entry
606; CHECK-BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
607; CHECK-BE-NEXT:    xxpermdi v2, vs1, v2, 1
608; CHECK-BE-NEXT:    blr
609;
610; CHECK-P9-LABEL: testDoubleImm1:
611; CHECK-P9:       # %bb.0: # %entry
612; CHECK-P9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
613; CHECK-P9-NEXT:    xxpermdi v2, vs1, v2, 1
614; CHECK-P9-NEXT:    blr
615entry:
616  %vecins = insertelement <2 x double> %a, double %b, i32 0
617  ret <2 x double> %vecins
618}
619
620define <2 x double> @testDoubleImm2(<2 x double> %a, i32* %b) {
621; CHECK-LABEL: testDoubleImm2:
622; CHECK:       # %bb.0: # %entry
623; CHECK-NEXT:    lfd f0, 0(r5)
624; CHECK-NEXT:    xxmrghd v2, v2, vs0
625; CHECK-NEXT:    blr
626;
627; CHECK-BE-LABEL: testDoubleImm2:
628; CHECK-BE:       # %bb.0: # %entry
629; CHECK-BE-NEXT:    lfd f0, 0(r5)
630; CHECK-BE-NEXT:    xxpermdi v2, vs0, v2, 1
631; CHECK-BE-NEXT:    blr
632;
633; CHECK-P9-LABEL: testDoubleImm2:
634; CHECK-P9:       # %bb.0: # %entry
635; CHECK-P9-NEXT:    lfd f0, 0(r5)
636; CHECK-P9-NEXT:    xxpermdi v2, vs0, v2, 1
637; CHECK-P9-NEXT:    blr
638entry:
639  %0 = bitcast i32* %b to double*
640  %1 = load double, double* %0, align 8
641  %vecins = insertelement <2 x double> %a, double %1, i32 0
642  ret <2 x double> %vecins
643}
644
645define <2 x double> @testDoubleImm3(<2 x double> %a, i32* %b) {
646; CHECK-LABEL: testDoubleImm3:
647; CHECK:       # %bb.0: # %entry
648; CHECK-NEXT:    lfd f0, 4(r5)
649; CHECK-NEXT:    xxmrghd v2, v2, vs0
650; CHECK-NEXT:    blr
651;
652; CHECK-BE-LABEL: testDoubleImm3:
653; CHECK-BE:       # %bb.0: # %entry
654; CHECK-BE-NEXT:    lfd f0, 4(r5)
655; CHECK-BE-NEXT:    xxpermdi v2, vs0, v2, 1
656; CHECK-BE-NEXT:    blr
657;
658; CHECK-P9-LABEL: testDoubleImm3:
659; CHECK-P9:       # %bb.0: # %entry
660; CHECK-P9-NEXT:    lfd f0, 4(r5)
661; CHECK-P9-NEXT:    xxpermdi v2, vs0, v2, 1
662; CHECK-P9-NEXT:    blr
663entry:
664  %add.ptr = getelementptr inbounds i32, i32* %b, i64 1
665  %0 = bitcast i32* %add.ptr to double*
666  %1 = load double, double* %0, align 8
667  %vecins = insertelement <2 x double> %a, double %1, i32 0
668  ret <2 x double> %vecins
669}
670
671define <2 x double> @testDoubleImm4(<2 x double> %a, i32* %b) {
672; CHECK-LABEL: testDoubleImm4:
673; CHECK:       # %bb.0: # %entry
674; CHECK-NEXT:    plfd f0, 262144(r5), 0
675; CHECK-NEXT:    xxmrghd v2, v2, vs0
676; CHECK-NEXT:    blr
677;
678; CHECK-BE-LABEL: testDoubleImm4:
679; CHECK-BE:       # %bb.0: # %entry
680; CHECK-BE-NEXT:    plfd f0, 262144(r5), 0
681; CHECK-BE-NEXT:    xxpermdi v2, vs0, v2, 1
682; CHECK-BE-NEXT:    blr
683;
684; CHECK-P9-LABEL: testDoubleImm4:
685; CHECK-P9:       # %bb.0: # %entry
686; CHECK-P9-NEXT:    lis r3, 4
687; CHECK-P9-NEXT:    lfdx f0, r5, r3
688; CHECK-P9-NEXT:    xxpermdi v2, vs0, v2, 1
689; CHECK-P9-NEXT:    blr
690entry:
691  %add.ptr = getelementptr inbounds i32, i32* %b, i64 65536
692  %0 = bitcast i32* %add.ptr to double*
693  %1 = load double, double* %0, align 8
694  %vecins = insertelement <2 x double> %a, double %1, i32 0
695  ret <2 x double> %vecins
696}
697
698define <2 x double> @testDoubleImm5(<2 x double> %a, i32* %b) {
699; CHECK-LABEL: testDoubleImm5:
700; CHECK:       # %bb.0: # %entry
701; CHECK-NEXT:    li r3, 1
702; CHECK-NEXT:    rldic r3, r3, 38, 25
703; CHECK-NEXT:    lfdx f0, r5, r3
704; CHECK-NEXT:    xxmrghd v2, v2, vs0
705; CHECK-NEXT:    blr
706;
707; CHECK-BE-LABEL: testDoubleImm5:
708; CHECK-BE:       # %bb.0: # %entry
709; CHECK-BE-NEXT:    li r3, 1
710; CHECK-BE-NEXT:    rldic r3, r3, 38, 25
711; CHECK-BE-NEXT:    lfdx f0, r5, r3
712; CHECK-BE-NEXT:    xxpermdi v2, vs0, v2, 1
713; CHECK-BE-NEXT:    blr
714;
715; CHECK-P9-LABEL: testDoubleImm5:
716; CHECK-P9:       # %bb.0: # %entry
717; CHECK-P9-NEXT:    li r3, 1
718; CHECK-P9-NEXT:    rldic r3, r3, 38, 25
719; CHECK-P9-NEXT:    lfdx f0, r5, r3
720; CHECK-P9-NEXT:    xxpermdi v2, vs0, v2, 1
721; CHECK-P9-NEXT:    blr
722entry:
723  %add.ptr = getelementptr inbounds i32, i32* %b, i64 68719476736
724  %0 = bitcast i32* %add.ptr to double*
725  %1 = load double, double* %0, align 8
726  %vecins = insertelement <2 x double> %a, double %1, i32 0
727  ret <2 x double> %vecins
728}
729
730define dso_local <4 x float> @testInsertDoubleToFloat(<4 x float> %a, double %b) local_unnamed_addr #0 {
731; CHECK-LABEL: testInsertDoubleToFloat:
732; CHECK:       # %bb.0: # %entry
733; CHECK-NEXT:    xscvdpsp f0, f1
734; CHECK-NEXT:    xxinsertw v2, vs0, 8
735; CHECK-NEXT:    blr
736;
737; CHECK-BE-LABEL: testInsertDoubleToFloat:
738; CHECK-BE:       # %bb.0: # %entry
739; CHECK-BE-NEXT:    xscvdpsp f0, f1
740; CHECK-BE-NEXT:    xxinsertw v2, vs0, 4
741; CHECK-BE-NEXT:    blr
742;
743; CHECK-P9-LABEL: testInsertDoubleToFloat:
744; CHECK-P9:       # %bb.0: # %entry
745; CHECK-P9-NEXT:    xscvdpsp f0, f1
746; CHECK-P9-NEXT:    xxinsertw v2, vs0, 4
747; CHECK-P9-NEXT:    blr
748entry:
749  %conv = fptrunc double %b to float
750  %vecins = insertelement <4 x float> %a, float %conv, i32 1
751  ret <4 x float> %vecins
752}
753