1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names \
4; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s
5; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
6; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names \
7; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
8; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
9; RUN:   -mcpu=pwr9 -ppc-asm-full-reg-names \
10; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-P9
11
12; Byte indexed
13
14define <16 x i8> @testByte(<16 x i8> %a, i64 %b, i64 %idx) {
15; CHECK-LABEL: testByte:
16; CHECK:       # %bb.0: # %entry
17; CHECK-NEXT:    vinsbrx v2, r6, r5
18; CHECK-NEXT:    blr
19;
20; CHECK-BE-LABEL: testByte:
21; CHECK-BE:       # %bb.0: # %entry
22; CHECK-BE-NEXT:    vinsblx v2, r6, r5
23; CHECK-BE-NEXT:    blr
24;
25; CHECK-P9-LABEL: testByte:
26; CHECK-P9:       # %bb.0: # %entry
27; CHECK-P9-NEXT:    addi r4, r1, -16
28; CHECK-P9-NEXT:    clrldi r3, r6, 60
29; CHECK-P9-NEXT:    stxv v2, -16(r1)
30; CHECK-P9-NEXT:    stbx r5, r4, r3
31; CHECK-P9-NEXT:    lxv v2, -16(r1)
32; CHECK-P9-NEXT:    blr
33entry:
34  %conv = trunc i64 %b to i8
35  %vecins = insertelement <16 x i8> %a, i8 %conv, i64 %idx
36  ret <16 x i8> %vecins
37}
38
39; Halfword indexed
40
41define <8 x i16> @testHalf(<8 x i16> %a, i64 %b, i64 %idx) {
42; CHECK-LABEL: testHalf:
43; CHECK:       # %bb.0: # %entry
44; CHECK-NEXT:    slwi r3, r6, 1
45; CHECK-NEXT:    vinshrx v2, r3, r5
46; CHECK-NEXT:    blr
47;
48; CHECK-BE-LABEL: testHalf:
49; CHECK-BE:       # %bb.0: # %entry
50; CHECK-BE-NEXT:    slwi r3, r6, 1
51; CHECK-BE-NEXT:    vinshlx v2, r3, r5
52; CHECK-BE-NEXT:    blr
53;
54; CHECK-P9-LABEL: testHalf:
55; CHECK-P9:       # %bb.0: # %entry
56; CHECK-P9-NEXT:    addi r4, r1, -16
57; CHECK-P9-NEXT:    rlwinm r3, r6, 1, 28, 30
58; CHECK-P9-NEXT:    stxv v2, -16(r1)
59; CHECK-P9-NEXT:    sthx r5, r4, r3
60; CHECK-P9-NEXT:    lxv v2, -16(r1)
61; CHECK-P9-NEXT:    blr
62entry:
63  %conv = trunc i64 %b to i16
64  %vecins = insertelement <8 x i16> %a, i16 %conv, i64 %idx
65  ret <8 x i16> %vecins
66}
67
68; Word indexed
69
70define <4 x i32> @testWord(<4 x i32> %a, i64 %b, i64 %idx) {
71; CHECK-LABEL: testWord:
72; CHECK:       # %bb.0: # %entry
73; CHECK-NEXT:    slwi r3, r6, 2
74; CHECK-NEXT:    vinswrx v2, r3, r5
75; CHECK-NEXT:    blr
76;
77; CHECK-BE-LABEL: testWord:
78; CHECK-BE:       # %bb.0: # %entry
79; CHECK-BE-NEXT:    slwi r3, r6, 2
80; CHECK-BE-NEXT:    vinswlx v2, r3, r5
81; CHECK-BE-NEXT:    blr
82;
83; CHECK-P9-LABEL: testWord:
84; CHECK-P9:       # %bb.0: # %entry
85; CHECK-P9-NEXT:    addi r4, r1, -16
86; CHECK-P9-NEXT:    rlwinm r3, r6, 2, 28, 29
87; CHECK-P9-NEXT:    stxv v2, -16(r1)
88; CHECK-P9-NEXT:    stwx r5, r4, r3
89; CHECK-P9-NEXT:    lxv v2, -16(r1)
90; CHECK-P9-NEXT:    blr
91entry:
92  %conv = trunc i64 %b to i32
93  %vecins = insertelement <4 x i32> %a, i32 %conv, i64 %idx
94  ret <4 x i32> %vecins
95}
96
97; Word immediate
98
99define <4 x i32> @testWordImm(<4 x i32> %a, i64 %b) {
100; CHECK-LABEL: testWordImm:
101; CHECK:       # %bb.0: # %entry
102; CHECK-NEXT:    vinsw v2, r5, 8
103; CHECK-NEXT:    vinsw v2, r5, 0
104; CHECK-NEXT:    blr
105;
106; CHECK-BE-LABEL: testWordImm:
107; CHECK-BE:       # %bb.0: # %entry
108; CHECK-BE-NEXT:    vinsw v2, r5, 4
109; CHECK-BE-NEXT:    vinsw v2, r5, 12
110; CHECK-BE-NEXT:    blr
111;
112; CHECK-P9-LABEL: testWordImm:
113; CHECK-P9:       # %bb.0: # %entry
114; CHECK-P9-NEXT:    mtfprwz f0, r5
115; CHECK-P9-NEXT:    xxinsertw v2, vs0, 4
116; CHECK-P9-NEXT:    xxinsertw v2, vs0, 12
117; CHECK-P9-NEXT:    blr
118entry:
119  %conv = trunc i64 %b to i32
120  %vecins = insertelement <4 x i32> %a, i32 %conv, i32 1
121  %vecins2 = insertelement <4 x i32> %vecins, i32 %conv, i32 3
122  ret <4 x i32> %vecins2
123}
124
125; Doubleword indexed
126
127define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) {
128; CHECK-LABEL: testDoubleword:
129; CHECK:       # %bb.0: # %entry
130; CHECK-NEXT:    rlwinm r3, r6, 3, 0, 28
131; CHECK-NEXT:    vinsdrx v2, r3, r5
132; CHECK-NEXT:    blr
133;
134; CHECK-BE-LABEL: testDoubleword:
135; CHECK-BE:       # %bb.0: # %entry
136; CHECK-BE-NEXT:    rlwinm r3, r6, 3, 0, 28
137; CHECK-BE-NEXT:    vinsdlx v2, r3, r5
138; CHECK-BE-NEXT:    blr
139;
140; CHECK-P9-LABEL: testDoubleword:
141; CHECK-P9:       # %bb.0: # %entry
142; CHECK-P9-NEXT:    addi r4, r1, -16
143; CHECK-P9-NEXT:    rlwinm r3, r6, 3, 28, 28
144; CHECK-P9-NEXT:    stxv v2, -16(r1)
145; CHECK-P9-NEXT:    stdx r5, r4, r3
146; CHECK-P9-NEXT:    lxv v2, -16(r1)
147; CHECK-P9-NEXT:    blr
148entry:
149  %vecins = insertelement <2 x i64> %a, i64 %b, i64 %idx
150  ret <2 x i64> %vecins
151}
152
153; Doubleword immediate
154
155define <2 x i64> @testDoublewordImm(<2 x i64> %a, i64 %b) {
156; CHECK-LABEL: testDoublewordImm:
157; CHECK:       # %bb.0: # %entry
158; CHECK-NEXT:    vinsd v2, r5, 0
159; CHECK-NEXT:    blr
160;
161; CHECK-BE-LABEL: testDoublewordImm:
162; CHECK-BE:       # %bb.0: # %entry
163; CHECK-BE-NEXT:    vinsd v2, r5, 8
164; CHECK-BE-NEXT:    blr
165;
166; CHECK-P9-LABEL: testDoublewordImm:
167; CHECK-P9:       # %bb.0: # %entry
168; CHECK-P9-NEXT:    mtfprd f0, r5
169; CHECK-P9-NEXT:    xxmrghd v2, v2, vs0
170; CHECK-P9-NEXT:    blr
171entry:
172  %vecins = insertelement <2 x i64> %a, i64 %b, i32 1
173  ret <2 x i64> %vecins
174}
175
176define <2 x i64> @testDoublewordImm2(<2 x i64> %a, i64 %b) {
177; CHECK-LABEL: testDoublewordImm2:
178; CHECK:       # %bb.0: # %entry
179; CHECK-NEXT:    vinsd v2, r5, 8
180; CHECK-NEXT:    blr
181;
182; CHECK-BE-LABEL: testDoublewordImm2:
183; CHECK-BE:       # %bb.0: # %entry
184; CHECK-BE-NEXT:    vinsd v2, r5, 0
185; CHECK-BE-NEXT:    blr
186;
187; CHECK-P9-LABEL: testDoublewordImm2:
188; CHECK-P9:       # %bb.0: # %entry
189; CHECK-P9-NEXT:    mtfprd f0, r5
190; CHECK-P9-NEXT:    xxpermdi v2, vs0, v2, 1
191; CHECK-P9-NEXT:    blr
192entry:
193  %vecins = insertelement <2 x i64> %a, i64 %b, i32 0
194  ret <2 x i64> %vecins
195}
196
197; Float indexed
198
199define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) {
200; CHECK-LABEL: testFloat1:
201; CHECK:       # %bb.0: # %entry
202; CHECK-NEXT:    xscvdpspn v3, f1
203; CHECK-NEXT:    extsw r3, r6
204; CHECK-NEXT:    slwi r3, r3, 2
205; CHECK-NEXT:    vinswvrx v2, r3, v3
206; CHECK-NEXT:    blr
207;
208; CHECK-BE-LABEL: testFloat1:
209; CHECK-BE:       # %bb.0: # %entry
210; CHECK-BE-NEXT:    xscvdpspn v3, f1
211; CHECK-BE-NEXT:    extsw r3, r6
212; CHECK-BE-NEXT:    slwi r3, r3, 2
213; CHECK-BE-NEXT:    vinswvlx v2, r3, v3
214; CHECK-BE-NEXT:    blr
215;
216; CHECK-P9-LABEL: testFloat1:
217; CHECK-P9:       # %bb.0: # %entry
218; CHECK-P9-NEXT:    addi r4, r1, -16
219; CHECK-P9-NEXT:    rlwinm r3, r6, 2, 28, 29
220; CHECK-P9-NEXT:    stxv v2, -16(r1)
221; CHECK-P9-NEXT:    stfsx f1, r4, r3
222; CHECK-P9-NEXT:    lxv v2, -16(r1)
223; CHECK-P9-NEXT:    blr
224entry:
225  %vecins = insertelement <4 x float> %a, float %b, i32 %idx1
226  ret <4 x float> %vecins
227}
228
229define <4 x float> @testFloat2(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
230; CHECK-LABEL: testFloat2:
231; CHECK:       # %bb.0: # %entry
232; CHECK-NEXT:    lwz r3, 0(r5)
233; CHECK-NEXT:    extsw r4, r6
234; CHECK-NEXT:    slwi r4, r4, 2
235; CHECK-NEXT:    vinswrx v2, r4, r3
236; CHECK-NEXT:    lwz r3, 1(r5)
237; CHECK-NEXT:    extsw r4, r7
238; CHECK-NEXT:    slwi r4, r4, 2
239; CHECK-NEXT:    vinswrx v2, r4, r3
240; CHECK-NEXT:    blr
241;
242; CHECK-BE-LABEL: testFloat2:
243; CHECK-BE:       # %bb.0: # %entry
244; CHECK-BE-NEXT:    lwz r3, 0(r5)
245; CHECK-BE-NEXT:    extsw r4, r6
246; CHECK-BE-NEXT:    slwi r4, r4, 2
247; CHECK-BE-NEXT:    vinswlx v2, r4, r3
248; CHECK-BE-NEXT:    lwz r3, 1(r5)
249; CHECK-BE-NEXT:    extsw r4, r7
250; CHECK-BE-NEXT:    slwi r4, r4, 2
251; CHECK-BE-NEXT:    vinswlx v2, r4, r3
252; CHECK-BE-NEXT:    blr
253;
254; CHECK-P9-LABEL: testFloat2:
255; CHECK-P9:       # %bb.0: # %entry
256; CHECK-P9-NEXT:    lwz r3, 0(r5)
257; CHECK-P9-NEXT:    rlwinm r4, r6, 2, 28, 29
258; CHECK-P9-NEXT:    addi r6, r1, -32
259; CHECK-P9-NEXT:    stxv v2, -32(r1)
260; CHECK-P9-NEXT:    stwx r3, r6, r4
261; CHECK-P9-NEXT:    rlwinm r4, r7, 2, 28, 29
262; CHECK-P9-NEXT:    lxv vs0, -32(r1)
263; CHECK-P9-NEXT:    lwz r3, 1(r5)
264; CHECK-P9-NEXT:    addi r5, r1, -16
265; CHECK-P9-NEXT:    stxv vs0, -16(r1)
266; CHECK-P9-NEXT:    stwx r3, r5, r4
267; CHECK-P9-NEXT:    lxv v2, -16(r1)
268; CHECK-P9-NEXT:    blr
269entry:
270  %0 = bitcast i8* %b to float*
271  %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 1
272  %1 = bitcast i8* %add.ptr1 to float*
273  %2 = load float, float* %0, align 4
274  %vecins = insertelement <4 x float> %a, float %2, i32 %idx1
275  %3 = load float, float* %1, align 4
276  %vecins2 = insertelement <4 x float> %vecins, float %3, i32 %idx2
277  ret <4 x float> %vecins2
278}
279
280define <4 x float> @testFloat3(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
281; CHECK-LABEL: testFloat3:
282; CHECK:       # %bb.0: # %entry
283; CHECK-NEXT:    plwz r3, 65536(r5), 0
284; CHECK-NEXT:    extsw r4, r6
285; CHECK-NEXT:    slwi r4, r4, 2
286; CHECK-NEXT:    vinswrx v2, r4, r3
287; CHECK-NEXT:    li r3, 1
288; CHECK-NEXT:    extsw r4, r7
289; CHECK-NEXT:    rldic r3, r3, 36, 27
290; CHECK-NEXT:    slwi r4, r4, 2
291; CHECK-NEXT:    lwzx r3, r5, r3
292; CHECK-NEXT:    vinswrx v2, r4, r3
293; CHECK-NEXT:    blr
294;
295; CHECK-BE-LABEL: testFloat3:
296; CHECK-BE:       # %bb.0: # %entry
297; CHECK-BE-NEXT:    plwz r3, 65536(r5), 0
298; CHECK-BE-NEXT:    extsw r4, r6
299; CHECK-BE-NEXT:    slwi r4, r4, 2
300; CHECK-BE-NEXT:    vinswlx v2, r4, r3
301; CHECK-BE-NEXT:    li r3, 1
302; CHECK-BE-NEXT:    extsw r4, r7
303; CHECK-BE-NEXT:    rldic r3, r3, 36, 27
304; CHECK-BE-NEXT:    slwi r4, r4, 2
305; CHECK-BE-NEXT:    lwzx r3, r5, r3
306; CHECK-BE-NEXT:    vinswlx v2, r4, r3
307; CHECK-BE-NEXT:    blr
308;
309; CHECK-P9-LABEL: testFloat3:
310; CHECK-P9:       # %bb.0: # %entry
311; CHECK-P9-NEXT:    lis r3, 1
312; CHECK-P9-NEXT:    rlwinm r4, r6, 2, 28, 29
313; CHECK-P9-NEXT:    addi r6, r1, -32
314; CHECK-P9-NEXT:    lwzx r3, r5, r3
315; CHECK-P9-NEXT:    stxv v2, -32(r1)
316; CHECK-P9-NEXT:    stwx r3, r6, r4
317; CHECK-P9-NEXT:    li r3, 1
318; CHECK-P9-NEXT:    rlwinm r4, r7, 2, 28, 29
319; CHECK-P9-NEXT:    lxv vs0, -32(r1)
320; CHECK-P9-NEXT:    rldic r3, r3, 36, 27
321; CHECK-P9-NEXT:    lwzx r3, r5, r3
322; CHECK-P9-NEXT:    addi r5, r1, -16
323; CHECK-P9-NEXT:    stxv vs0, -16(r1)
324; CHECK-P9-NEXT:    stwx r3, r5, r4
325; CHECK-P9-NEXT:    lxv v2, -16(r1)
326; CHECK-P9-NEXT:    blr
327entry:
328  %add.ptr = getelementptr inbounds i8, i8* %b, i64 65536
329  %0 = bitcast i8* %add.ptr to float*
330  %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 68719476736
331  %1 = bitcast i8* %add.ptr1 to float*
332  %2 = load float, float* %0, align 4
333  %vecins = insertelement <4 x float> %a, float %2, i32 %idx1
334  %3 = load float, float* %1, align 4
335  %vecins2 = insertelement <4 x float> %vecins, float %3, i32 %idx2
336  ret <4 x float> %vecins2
337}
338
339; Float immediate
340
341define <4 x float> @testFloatImm1(<4 x float> %a, float %b) {
342; CHECK-LABEL: testFloatImm1:
343; CHECK:       # %bb.0: # %entry
344; CHECK-NEXT:    xscvdpspn vs0, f1
345; CHECK-NEXT:    xxinsertw v2, vs0, 12
346; CHECK-NEXT:    xxinsertw v2, vs0, 4
347; CHECK-NEXT:    blr
348;
349; CHECK-BE-LABEL: testFloatImm1:
350; CHECK-BE:       # %bb.0: # %entry
351; CHECK-BE-NEXT:    xscvdpspn vs0, f1
352; CHECK-BE-NEXT:    xxinsertw v2, vs0, 0
353; CHECK-BE-NEXT:    xxinsertw v2, vs0, 8
354; CHECK-BE-NEXT:    blr
355;
356; CHECK-P9-LABEL: testFloatImm1:
357; CHECK-P9:       # %bb.0: # %entry
358; CHECK-P9-NEXT:    xscvdpspn vs0, f1
359; CHECK-P9-NEXT:    xxinsertw v2, vs0, 0
360; CHECK-P9-NEXT:    xxinsertw v2, vs0, 8
361; CHECK-P9-NEXT:    blr
362entry:
363  %vecins = insertelement <4 x float> %a, float %b, i32 0
364  %vecins1 = insertelement <4 x float> %vecins, float %b, i32 2
365  ret <4 x float> %vecins1
366}
367
368define <4 x float> @testFloatImm2(<4 x float> %a, i32* %b) {
369; CHECK-LABEL: testFloatImm2:
370; CHECK:       # %bb.0: # %entry
371; CHECK-NEXT:    lwz r3, 0(r5)
372; CHECK-NEXT:    vinsw v2, r3, 12
373; CHECK-NEXT:    lwz r3, 4(r5)
374; CHECK-NEXT:    vinsw v2, r3, 4
375; CHECK-NEXT:    blr
376;
377; CHECK-BE-LABEL: testFloatImm2:
378; CHECK-BE:       # %bb.0: # %entry
379; CHECK-BE-NEXT:    lwz r3, 0(r5)
380; CHECK-BE-NEXT:    vinsw v2, r3, 0
381; CHECK-BE-NEXT:    lwz r3, 4(r5)
382; CHECK-BE-NEXT:    vinsw v2, r3, 8
383; CHECK-BE-NEXT:    blr
384;
385; CHECK-P9-LABEL: testFloatImm2:
386; CHECK-P9:       # %bb.0: # %entry
387; CHECK-P9-NEXT:    lfs f0, 0(r5)
388; CHECK-P9-NEXT:    xscvdpspn vs0, f0
389; CHECK-P9-NEXT:    xxinsertw v2, vs0, 0
390; CHECK-P9-NEXT:    lfs f0, 4(r5)
391; CHECK-P9-NEXT:    xscvdpspn vs0, f0
392; CHECK-P9-NEXT:    xxinsertw v2, vs0, 8
393; CHECK-P9-NEXT:    blr
394entry:
395  %0 = bitcast i32* %b to float*
396  %add.ptr1 = getelementptr inbounds i32, i32* %b, i64 1
397  %1 = bitcast i32* %add.ptr1 to float*
398  %2 = load float, float* %0, align 4
399  %vecins = insertelement <4 x float> %a, float %2, i32 0
400  %3 = load float, float* %1, align 4
401  %vecins2 = insertelement <4 x float> %vecins, float %3, i32 2
402  ret <4 x float> %vecins2
403}
404
405define <4 x float> @testFloatImm3(<4 x float> %a, i32* %b) {
406; CHECK-LABEL: testFloatImm3:
407; CHECK:       # %bb.0: # %entry
408; CHECK-NEXT:    plwz r3, 262144(r5), 0
409; CHECK-NEXT:    vinsw v2, r3, 12
410; CHECK-NEXT:    li r3, 1
411; CHECK-NEXT:    rldic r3, r3, 38, 25
412; CHECK-NEXT:    lwzx r3, r5, r3
413; CHECK-NEXT:    vinsw v2, r3, 4
414; CHECK-NEXT:    blr
415;
416; CHECK-BE-LABEL: testFloatImm3:
417; CHECK-BE:       # %bb.0: # %entry
418; CHECK-BE-NEXT:    plwz r3, 262144(r5), 0
419; CHECK-BE-NEXT:    vinsw v2, r3, 0
420; CHECK-BE-NEXT:    li r3, 1
421; CHECK-BE-NEXT:    rldic r3, r3, 38, 25
422; CHECK-BE-NEXT:    lwzx r3, r5, r3
423; CHECK-BE-NEXT:    vinsw v2, r3, 8
424; CHECK-BE-NEXT:    blr
425;
426; CHECK-P9-LABEL: testFloatImm3:
427; CHECK-P9:       # %bb.0: # %entry
428; CHECK-P9-NEXT:    lis r3, 4
429; CHECK-P9-NEXT:    lfsx f0, r5, r3
430; CHECK-P9-NEXT:    li r3, 1
431; CHECK-P9-NEXT:    rldic r3, r3, 38, 25
432; CHECK-P9-NEXT:    xscvdpspn vs0, f0
433; CHECK-P9-NEXT:    xxinsertw v2, vs0, 0
434; CHECK-P9-NEXT:    lfsx f0, r5, r3
435; CHECK-P9-NEXT:    xscvdpspn vs0, f0
436; CHECK-P9-NEXT:    xxinsertw v2, vs0, 8
437; CHECK-P9-NEXT:    blr
438entry:
439  %add.ptr = getelementptr inbounds i32, i32* %b, i64 65536
440  %0 = bitcast i32* %add.ptr to float*
441  %add.ptr1 = getelementptr inbounds i32, i32* %b, i64 68719476736
442  %1 = bitcast i32* %add.ptr1 to float*
443  %2 = load float, float* %0, align 4
444  %vecins = insertelement <4 x float> %a, float %2, i32 0
445  %3 = load float, float* %1, align 4
446  %vecins2 = insertelement <4 x float> %vecins, float %3, i32 2
447  ret <4 x float> %vecins2
448}
449
450; Double indexed
451
452define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1) {
453; CHECK-LABEL: testDouble1:
454; CHECK:       # %bb.0: # %entry
455; CHECK-NEXT:    extsw r4, r6
456; CHECK-NEXT:    mffprd r3, f1
457; CHECK-NEXT:    rlwinm r4, r4, 3, 0, 28
458; CHECK-NEXT:    vinsdrx v2, r4, r3
459; CHECK-NEXT:    blr
460;
461; CHECK-BE-LABEL: testDouble1:
462; CHECK-BE:       # %bb.0: # %entry
463; CHECK-BE-NEXT:    extsw r4, r6
464; CHECK-BE-NEXT:    mffprd r3, f1
465; CHECK-BE-NEXT:    rlwinm r4, r4, 3, 0, 28
466; CHECK-BE-NEXT:    vinsdlx v2, r4, r3
467; CHECK-BE-NEXT:    blr
468;
469; CHECK-P9-LABEL: testDouble1:
470; CHECK-P9:       # %bb.0: # %entry
471; CHECK-P9-NEXT:    addi r4, r1, -16
472; CHECK-P9-NEXT:    rlwinm r3, r6, 3, 28, 28
473; CHECK-P9-NEXT:    stxv v2, -16(r1)
474; CHECK-P9-NEXT:    stfdx f1, r4, r3
475; CHECK-P9-NEXT:    lxv v2, -16(r1)
476; CHECK-P9-NEXT:    blr
477entry:
478  %vecins = insertelement <2 x double> %a, double %b, i32 %idx1
479  ret <2 x double> %vecins
480}
481
482define <2 x double> @testDouble2(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
483; CHECK-LABEL: testDouble2:
484; CHECK:       # %bb.0: # %entry
485; CHECK-NEXT:    ld r3, 0(r5)
486; CHECK-NEXT:    extsw r4, r6
487; CHECK-NEXT:    rlwinm r4, r4, 3, 0, 28
488; CHECK-NEXT:    vinsdrx v2, r4, r3
489; CHECK-NEXT:    pld r3, 1(r5), 0
490; CHECK-NEXT:    extsw r4, r7
491; CHECK-NEXT:    rlwinm r4, r4, 3, 0, 28
492; CHECK-NEXT:    vinsdrx v2, r4, r3
493; CHECK-NEXT:    blr
494;
495; CHECK-BE-LABEL: testDouble2:
496; CHECK-BE:       # %bb.0: # %entry
497; CHECK-BE-NEXT:    ld r3, 0(r5)
498; CHECK-BE-NEXT:    extsw r4, r6
499; CHECK-BE-NEXT:    rlwinm r4, r4, 3, 0, 28
500; CHECK-BE-NEXT:    vinsdlx v2, r4, r3
501; CHECK-BE-NEXT:    pld r3, 1(r5), 0
502; CHECK-BE-NEXT:    extsw r4, r7
503; CHECK-BE-NEXT:    rlwinm r4, r4, 3, 0, 28
504; CHECK-BE-NEXT:    vinsdlx v2, r4, r3
505; CHECK-BE-NEXT:    blr
506;
507; CHECK-P9-LABEL: testDouble2:
508; CHECK-P9:       # %bb.0: # %entry
509; CHECK-P9-NEXT:    ld r3, 0(r5)
510; CHECK-P9-NEXT:    rlwinm r4, r6, 3, 28, 28
511; CHECK-P9-NEXT:    addi r6, r1, -32
512; CHECK-P9-NEXT:    stxv v2, -32(r1)
513; CHECK-P9-NEXT:    stdx r3, r6, r4
514; CHECK-P9-NEXT:    li r3, 1
515; CHECK-P9-NEXT:    rlwinm r4, r7, 3, 28, 28
516; CHECK-P9-NEXT:    lxv vs0, -32(r1)
517; CHECK-P9-NEXT:    ldx r3, r5, r3
518; CHECK-P9-NEXT:    addi r5, r1, -16
519; CHECK-P9-NEXT:    stxv vs0, -16(r1)
520; CHECK-P9-NEXT:    stdx r3, r5, r4
521; CHECK-P9-NEXT:    lxv v2, -16(r1)
522; CHECK-P9-NEXT:    blr
523entry:
524  %0 = bitcast i8* %b to double*
525  %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 1
526  %1 = bitcast i8* %add.ptr1 to double*
527  %2 = load double, double* %0, align 8
528  %vecins = insertelement <2 x double> %a, double %2, i32 %idx1
529  %3 = load double, double* %1, align 8
530  %vecins2 = insertelement <2 x double> %vecins, double %3, i32 %idx2
531  ret <2 x double> %vecins2
532}
533
534define <2 x double> @testDouble3(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
535; CHECK-LABEL: testDouble3:
536; CHECK:       # %bb.0: # %entry
537; CHECK-NEXT:    pld r3, 65536(r5), 0
538; CHECK-NEXT:    extsw r4, r6
539; CHECK-NEXT:    rlwinm r4, r4, 3, 0, 28
540; CHECK-NEXT:    vinsdrx v2, r4, r3
541; CHECK-NEXT:    li r3, 1
542; CHECK-NEXT:    extsw r4, r7
543; CHECK-NEXT:    rldic r3, r3, 36, 27
544; CHECK-NEXT:    rlwinm r4, r4, 3, 0, 28
545; CHECK-NEXT:    ldx r3, r5, r3
546; CHECK-NEXT:    vinsdrx v2, r4, r3
547; CHECK-NEXT:    blr
548;
549; CHECK-BE-LABEL: testDouble3:
550; CHECK-BE:       # %bb.0: # %entry
551; CHECK-BE-NEXT:    pld r3, 65536(r5), 0
552; CHECK-BE-NEXT:    extsw r4, r6
553; CHECK-BE-NEXT:    rlwinm r4, r4, 3, 0, 28
554; CHECK-BE-NEXT:    vinsdlx v2, r4, r3
555; CHECK-BE-NEXT:    li r3, 1
556; CHECK-BE-NEXT:    extsw r4, r7
557; CHECK-BE-NEXT:    rldic r3, r3, 36, 27
558; CHECK-BE-NEXT:    rlwinm r4, r4, 3, 0, 28
559; CHECK-BE-NEXT:    ldx r3, r5, r3
560; CHECK-BE-NEXT:    vinsdlx v2, r4, r3
561; CHECK-BE-NEXT:    blr
562;
563; CHECK-P9-LABEL: testDouble3:
564; CHECK-P9:       # %bb.0: # %entry
565; CHECK-P9-NEXT:    lis r3, 1
566; CHECK-P9-NEXT:    rlwinm r4, r6, 3, 28, 28
567; CHECK-P9-NEXT:    addi r6, r1, -32
568; CHECK-P9-NEXT:    ldx r3, r5, r3
569; CHECK-P9-NEXT:    stxv v2, -32(r1)
570; CHECK-P9-NEXT:    stdx r3, r6, r4
571; CHECK-P9-NEXT:    li r3, 1
572; CHECK-P9-NEXT:    rlwinm r4, r7, 3, 28, 28
573; CHECK-P9-NEXT:    lxv vs0, -32(r1)
574; CHECK-P9-NEXT:    rldic r3, r3, 36, 27
575; CHECK-P9-NEXT:    ldx r3, r5, r3
576; CHECK-P9-NEXT:    addi r5, r1, -16
577; CHECK-P9-NEXT:    stxv vs0, -16(r1)
578; CHECK-P9-NEXT:    stdx r3, r5, r4
579; CHECK-P9-NEXT:    lxv v2, -16(r1)
580; CHECK-P9-NEXT:    blr
581entry:
582  %add.ptr = getelementptr inbounds i8, i8* %b, i64 65536
583  %0 = bitcast i8* %add.ptr to double*
584  %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 68719476736
585  %1 = bitcast i8* %add.ptr1 to double*
586  %2 = load double, double* %0, align 8
587  %vecins = insertelement <2 x double> %a, double %2, i32 %idx1
588  %3 = load double, double* %1, align 8
589  %vecins2 = insertelement <2 x double> %vecins, double %3, i32 %idx2
590  ret <2 x double> %vecins2
591}
592
593; Double immediate
594
595define <2 x double> @testDoubleImm1(<2 x double> %a, double %b) {
596; CHECK-LABEL: testDoubleImm1:
597; CHECK:       # %bb.0: # %entry
598; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
599; CHECK-NEXT:    xxmrghd v2, v2, vs1
600; CHECK-NEXT:    blr
601;
602; CHECK-BE-LABEL: testDoubleImm1:
603; CHECK-BE:       # %bb.0: # %entry
604; CHECK-BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
605; CHECK-BE-NEXT:    xxpermdi v2, vs1, v2, 1
606; CHECK-BE-NEXT:    blr
607;
608; CHECK-P9-LABEL: testDoubleImm1:
609; CHECK-P9:       # %bb.0: # %entry
610; CHECK-P9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
611; CHECK-P9-NEXT:    xxpermdi v2, vs1, v2, 1
612; CHECK-P9-NEXT:    blr
613entry:
614  %vecins = insertelement <2 x double> %a, double %b, i32 0
615  ret <2 x double> %vecins
616}
617
618define <2 x double> @testDoubleImm2(<2 x double> %a, i32* %b) {
619; CHECK-LABEL: testDoubleImm2:
620; CHECK:       # %bb.0: # %entry
621; CHECK-NEXT:    lfd f0, 0(r5)
622; CHECK-NEXT:    xxmrghd v2, v2, vs0
623; CHECK-NEXT:    blr
624;
625; CHECK-BE-LABEL: testDoubleImm2:
626; CHECK-BE:       # %bb.0: # %entry
627; CHECK-BE-NEXT:    lfd f0, 0(r5)
628; CHECK-BE-NEXT:    xxpermdi v2, vs0, v2, 1
629; CHECK-BE-NEXT:    blr
630;
631; CHECK-P9-LABEL: testDoubleImm2:
632; CHECK-P9:       # %bb.0: # %entry
633; CHECK-P9-NEXT:    lfd f0, 0(r5)
634; CHECK-P9-NEXT:    xxpermdi v2, vs0, v2, 1
635; CHECK-P9-NEXT:    blr
636entry:
637  %0 = bitcast i32* %b to double*
638  %1 = load double, double* %0, align 8
639  %vecins = insertelement <2 x double> %a, double %1, i32 0
640  ret <2 x double> %vecins
641}
642
643define <2 x double> @testDoubleImm3(<2 x double> %a, i32* %b) {
644; CHECK-LABEL: testDoubleImm3:
645; CHECK:       # %bb.0: # %entry
646; CHECK-NEXT:    lfd f0, 4(r5)
647; CHECK-NEXT:    xxmrghd v2, v2, vs0
648; CHECK-NEXT:    blr
649;
650; CHECK-BE-LABEL: testDoubleImm3:
651; CHECK-BE:       # %bb.0: # %entry
652; CHECK-BE-NEXT:    lfd f0, 4(r5)
653; CHECK-BE-NEXT:    xxpermdi v2, vs0, v2, 1
654; CHECK-BE-NEXT:    blr
655;
656; CHECK-P9-LABEL: testDoubleImm3:
657; CHECK-P9:       # %bb.0: # %entry
658; CHECK-P9-NEXT:    lfd f0, 4(r5)
659; CHECK-P9-NEXT:    xxpermdi v2, vs0, v2, 1
660; CHECK-P9-NEXT:    blr
661entry:
662  %add.ptr = getelementptr inbounds i32, i32* %b, i64 1
663  %0 = bitcast i32* %add.ptr to double*
664  %1 = load double, double* %0, align 8
665  %vecins = insertelement <2 x double> %a, double %1, i32 0
666  ret <2 x double> %vecins
667}
668
669define <2 x double> @testDoubleImm4(<2 x double> %a, i32* %b) {
670; CHECK-LABEL: testDoubleImm4:
671; CHECK:       # %bb.0: # %entry
672; CHECK-NEXT:    plfd f0, 262144(r5), 0
673; CHECK-NEXT:    xxmrghd v2, v2, vs0
674; CHECK-NEXT:    blr
675;
676; CHECK-BE-LABEL: testDoubleImm4:
677; CHECK-BE:       # %bb.0: # %entry
678; CHECK-BE-NEXT:    plfd f0, 262144(r5), 0
679; CHECK-BE-NEXT:    xxpermdi v2, vs0, v2, 1
680; CHECK-BE-NEXT:    blr
681;
682; CHECK-P9-LABEL: testDoubleImm4:
683; CHECK-P9:       # %bb.0: # %entry
684; CHECK-P9-NEXT:    lis r3, 4
685; CHECK-P9-NEXT:    lfdx f0, r5, r3
686; CHECK-P9-NEXT:    xxpermdi v2, vs0, v2, 1
687; CHECK-P9-NEXT:    blr
688entry:
689  %add.ptr = getelementptr inbounds i32, i32* %b, i64 65536
690  %0 = bitcast i32* %add.ptr to double*
691  %1 = load double, double* %0, align 8
692  %vecins = insertelement <2 x double> %a, double %1, i32 0
693  ret <2 x double> %vecins
694}
695
696define <2 x double> @testDoubleImm5(<2 x double> %a, i32* %b) {
697; CHECK-LABEL: testDoubleImm5:
698; CHECK:       # %bb.0: # %entry
699; CHECK-NEXT:    li r3, 1
700; CHECK-NEXT:    rldic r3, r3, 38, 25
701; CHECK-NEXT:    lfdx f0, r5, r3
702; CHECK-NEXT:    xxmrghd v2, v2, vs0
703; CHECK-NEXT:    blr
704;
705; CHECK-BE-LABEL: testDoubleImm5:
706; CHECK-BE:       # %bb.0: # %entry
707; CHECK-BE-NEXT:    li r3, 1
708; CHECK-BE-NEXT:    rldic r3, r3, 38, 25
709; CHECK-BE-NEXT:    lfdx f0, r5, r3
710; CHECK-BE-NEXT:    xxpermdi v2, vs0, v2, 1
711; CHECK-BE-NEXT:    blr
712;
713; CHECK-P9-LABEL: testDoubleImm5:
714; CHECK-P9:       # %bb.0: # %entry
715; CHECK-P9-NEXT:    li r3, 1
716; CHECK-P9-NEXT:    rldic r3, r3, 38, 25
717; CHECK-P9-NEXT:    lfdx f0, r5, r3
718; CHECK-P9-NEXT:    xxpermdi v2, vs0, v2, 1
719; CHECK-P9-NEXT:    blr
720entry:
721  %add.ptr = getelementptr inbounds i32, i32* %b, i64 68719476736
722  %0 = bitcast i32* %add.ptr to double*
723  %1 = load double, double* %0, align 8
724  %vecins = insertelement <2 x double> %a, double %1, i32 0
725  ret <2 x double> %vecins
726}
727
728define dso_local <4 x float> @testInsertDoubleToFloat(<4 x float> %a, double %b) local_unnamed_addr #0 {
729; CHECK-LABEL: testInsertDoubleToFloat:
730; CHECK:       # %bb.0: # %entry
731; CHECK-NEXT:    xscvdpsp f0, f1
732; CHECK-NEXT:    xxinsertw v2, vs0, 8
733; CHECK-NEXT:    blr
734;
735; CHECK-BE-LABEL: testInsertDoubleToFloat:
736; CHECK-BE:       # %bb.0: # %entry
737; CHECK-BE-NEXT:    xscvdpsp f0, f1
738; CHECK-BE-NEXT:    xxinsertw v2, vs0, 4
739; CHECK-BE-NEXT:    blr
740;
741; CHECK-P9-LABEL: testInsertDoubleToFloat:
742; CHECK-P9:       # %bb.0: # %entry
743; CHECK-P9-NEXT:    xscvdpsp f0, f1
744; CHECK-P9-NEXT:    xxinsertw v2, vs0, 4
745; CHECK-P9-NEXT:    blr
746entry:
747  %conv = fptrunc double %b to float
748  %vecins = insertelement <4 x float> %a, float %conv, i32 1
749  ret <4 x float> %vecins
750}
751