1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names \
4; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s
5; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
6; RUN:   -mcpu=pwr10 -ppc-asm-full-reg-names \
7; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-BE
8; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
9; RUN:   -mcpu=pwr9 -ppc-asm-full-reg-names \
10; RUN:   -ppc-vsr-nums-as-vr < %s | FileCheck %s --check-prefix=CHECK-P9
11; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr \
12; RUN:   -ppc-asm-full-reg-names -mtriple=powerpc64-ibm-aix-xcoff < %s | \
13; RUN: FileCheck %s --check-prefixes=AIX-P8,AIX-P8-64
14; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr \
15; RUN:   -ppc-asm-full-reg-names -mtriple=powerpc-ibm-aix-xcoff < %s | \
16; RUN: FileCheck %s --check-prefixes=AIX-P8,AIX-P8-32
17
18; Byte indexed
19
20define <16 x i8> @testByte(<16 x i8> %a, i64 %b, i64 %idx) {
21; CHECK-LABEL: testByte:
22; CHECK:       # %bb.0: # %entry
23; CHECK-NEXT:    vinsbrx v2, r6, r5
24; CHECK-NEXT:    blr
25;
26; CHECK-BE-LABEL: testByte:
27; CHECK-BE:       # %bb.0: # %entry
28; CHECK-BE-NEXT:    vinsblx v2, r6, r5
29; CHECK-BE-NEXT:    blr
30;
31; CHECK-P9-LABEL: testByte:
32; CHECK-P9:       # %bb.0: # %entry
33; CHECK-P9-NEXT:    addi r4, r1, -16
34; CHECK-P9-NEXT:    clrldi r3, r6, 60
35; CHECK-P9-NEXT:    stxv v2, -16(r1)
36; CHECK-P9-NEXT:    stbx r5, r4, r3
37; CHECK-P9-NEXT:    lxv v2, -16(r1)
38; CHECK-P9-NEXT:    blr
39;
40; AIX-P8-64-LABEL: testByte:
41; AIX-P8-64:       # %bb.0: # %entry
42; AIX-P8-64-NEXT:    addi r5, r1, -16
43; AIX-P8-64-NEXT:    clrldi r4, r4, 60
44; AIX-P8-64-NEXT:    stxvw4x v2, 0, r5
45; AIX-P8-64-NEXT:    stbx r3, r5, r4
46; AIX-P8-64-NEXT:    lxvw4x v2, 0, r5
47; AIX-P8-64-NEXT:    blr
48;
49; AIX-P8-32-LABEL: testByte:
50; AIX-P8-32:       # %bb.0: # %entry
51; AIX-P8-32-NEXT:    addi r3, r1, -16
52; AIX-P8-32-NEXT:    clrlwi r5, r6, 28
53; AIX-P8-32-NEXT:    stxvw4x v2, 0, r3
54; AIX-P8-32-NEXT:    stbx r4, r3, r5
55; AIX-P8-32-NEXT:    lxvw4x v2, 0, r3
56; AIX-P8-32-NEXT:    blr
57entry:
58  %conv = trunc i64 %b to i8
59  %vecins = insertelement <16 x i8> %a, i8 %conv, i64 %idx
60  ret <16 x i8> %vecins
61}
62
63; Halfword indexed
64
65define <8 x i16> @testHalf(<8 x i16> %a, i64 %b, i64 %idx) {
66; CHECK-LABEL: testHalf:
67; CHECK:       # %bb.0: # %entry
68; CHECK-NEXT:    slwi r3, r6, 1
69; CHECK-NEXT:    vinshrx v2, r3, r5
70; CHECK-NEXT:    blr
71;
72; CHECK-BE-LABEL: testHalf:
73; CHECK-BE:       # %bb.0: # %entry
74; CHECK-BE-NEXT:    slwi r3, r6, 1
75; CHECK-BE-NEXT:    vinshlx v2, r3, r5
76; CHECK-BE-NEXT:    blr
77;
78; CHECK-P9-LABEL: testHalf:
79; CHECK-P9:       # %bb.0: # %entry
80; CHECK-P9-NEXT:    addi r4, r1, -16
81; CHECK-P9-NEXT:    rlwinm r3, r6, 1, 28, 30
82; CHECK-P9-NEXT:    stxv v2, -16(r1)
83; CHECK-P9-NEXT:    sthx r5, r4, r3
84; CHECK-P9-NEXT:    lxv v2, -16(r1)
85; CHECK-P9-NEXT:    blr
86;
87; AIX-P8-64-LABEL: testHalf:
88; AIX-P8-64:       # %bb.0: # %entry
89; AIX-P8-64-NEXT:    addi r5, r1, -16
90; AIX-P8-64-NEXT:    rlwinm r4, r4, 1, 28, 30
91; AIX-P8-64-NEXT:    stxvw4x v2, 0, r5
92; AIX-P8-64-NEXT:    sthx r3, r5, r4
93; AIX-P8-64-NEXT:    lxvw4x v2, 0, r5
94; AIX-P8-64-NEXT:    blr
95;
96; AIX-P8-32-LABEL: testHalf:
97; AIX-P8-32:       # %bb.0: # %entry
98; AIX-P8-32-NEXT:    addi r3, r1, -16
99; AIX-P8-32-NEXT:    rlwinm r5, r6, 1, 28, 30
100; AIX-P8-32-NEXT:    stxvw4x v2, 0, r3
101; AIX-P8-32-NEXT:    sthx r4, r3, r5
102; AIX-P8-32-NEXT:    lxvw4x v2, 0, r3
103; AIX-P8-32-NEXT:    blr
104entry:
105  %conv = trunc i64 %b to i16
106  %vecins = insertelement <8 x i16> %a, i16 %conv, i64 %idx
107  ret <8 x i16> %vecins
108}
109
110; Word indexed
111
112define <4 x i32> @testWord(<4 x i32> %a, i64 %b, i64 %idx) {
113; CHECK-LABEL: testWord:
114; CHECK:       # %bb.0: # %entry
115; CHECK-NEXT:    slwi r3, r6, 2
116; CHECK-NEXT:    vinswrx v2, r3, r5
117; CHECK-NEXT:    blr
118;
119; CHECK-BE-LABEL: testWord:
120; CHECK-BE:       # %bb.0: # %entry
121; CHECK-BE-NEXT:    slwi r3, r6, 2
122; CHECK-BE-NEXT:    vinswlx v2, r3, r5
123; CHECK-BE-NEXT:    blr
124;
125; CHECK-P9-LABEL: testWord:
126; CHECK-P9:       # %bb.0: # %entry
127; CHECK-P9-NEXT:    addi r4, r1, -16
128; CHECK-P9-NEXT:    rlwinm r3, r6, 2, 28, 29
129; CHECK-P9-NEXT:    stxv v2, -16(r1)
130; CHECK-P9-NEXT:    stwx r5, r4, r3
131; CHECK-P9-NEXT:    lxv v2, -16(r1)
132; CHECK-P9-NEXT:    blr
133;
134; AIX-P8-64-LABEL: testWord:
135; AIX-P8-64:       # %bb.0: # %entry
136; AIX-P8-64-NEXT:    addi r5, r1, -16
137; AIX-P8-64-NEXT:    rlwinm r4, r4, 2, 28, 29
138; AIX-P8-64-NEXT:    stxvw4x v2, 0, r5
139; AIX-P8-64-NEXT:    stwx r3, r5, r4
140; AIX-P8-64-NEXT:    lxvw4x v2, 0, r5
141; AIX-P8-64-NEXT:    blr
142;
143; AIX-P8-32-LABEL: testWord:
144; AIX-P8-32:       # %bb.0: # %entry
145; AIX-P8-32-NEXT:    addi r3, r1, -16
146; AIX-P8-32-NEXT:    rlwinm r5, r6, 2, 28, 29
147; AIX-P8-32-NEXT:    stxvw4x v2, 0, r3
148; AIX-P8-32-NEXT:    stwx r4, r3, r5
149; AIX-P8-32-NEXT:    lxvw4x v2, 0, r3
150; AIX-P8-32-NEXT:    blr
151entry:
152  %conv = trunc i64 %b to i32
153  %vecins = insertelement <4 x i32> %a, i32 %conv, i64 %idx
154  ret <4 x i32> %vecins
155}
156
157; Word immediate
158
159define <4 x i32> @testWordImm(<4 x i32> %a, i64 %b) {
160; CHECK-LABEL: testWordImm:
161; CHECK:       # %bb.0: # %entry
162; CHECK-NEXT:    vinsw v2, r5, 8
163; CHECK-NEXT:    vinsw v2, r5, 0
164; CHECK-NEXT:    blr
165;
166; CHECK-BE-LABEL: testWordImm:
167; CHECK-BE:       # %bb.0: # %entry
168; CHECK-BE-NEXT:    vinsw v2, r5, 4
169; CHECK-BE-NEXT:    vinsw v2, r5, 12
170; CHECK-BE-NEXT:    blr
171;
172; CHECK-P9-LABEL: testWordImm:
173; CHECK-P9:       # %bb.0: # %entry
174; CHECK-P9-NEXT:    mtfprwz f0, r5
175; CHECK-P9-NEXT:    xxinsertw v2, vs0, 4
176; CHECK-P9-NEXT:    xxinsertw v2, vs0, 12
177; CHECK-P9-NEXT:    blr
178;
179; AIX-P8-64-LABEL: testWordImm:
180; AIX-P8-64:       # %bb.0: # %entry
181; AIX-P8-64-NEXT:    ld r4, L..C0(r2) # %const.0
182; AIX-P8-64-NEXT:    mtvsrwz v4, r3
183; AIX-P8-64-NEXT:    ld r3, L..C1(r2) # %const.1
184; AIX-P8-64-NEXT:    lxvw4x v3, 0, r4
185; AIX-P8-64-NEXT:    vperm v2, v2, v4, v3
186; AIX-P8-64-NEXT:    lxvw4x v3, 0, r3
187; AIX-P8-64-NEXT:    vperm v2, v2, v4, v3
188; AIX-P8-64-NEXT:    blr
189;
190; AIX-P8-32-LABEL: testWordImm:
191; AIX-P8-32:       # %bb.0: # %entry
192; AIX-P8-32-NEXT:    lwz r3, L..C0(r2) # %const.0
193; AIX-P8-32-NEXT:    stw r4, -16(r1)
194; AIX-P8-32-NEXT:    addi r4, r1, -16
195; AIX-P8-32-NEXT:    lxvw4x v4, 0, r4
196; AIX-P8-32-NEXT:    lxvw4x v3, 0, r3
197; AIX-P8-32-NEXT:    lwz r3, L..C1(r2) # %const.1
198; AIX-P8-32-NEXT:    vperm v2, v2, v4, v3
199; AIX-P8-32-NEXT:    lxvw4x v3, 0, r3
200; AIX-P8-32-NEXT:    vperm v2, v2, v4, v3
201; AIX-P8-32-NEXT:    blr
202entry:
203  %conv = trunc i64 %b to i32
204  %vecins = insertelement <4 x i32> %a, i32 %conv, i32 1
205  %vecins2 = insertelement <4 x i32> %vecins, i32 %conv, i32 3
206  ret <4 x i32> %vecins2
207}
208
209; Doubleword indexed
210
211define <2 x i64> @testDoubleword(<2 x i64> %a, i64 %b, i64 %idx) {
212; CHECK-LABEL: testDoubleword:
213; CHECK:       # %bb.0: # %entry
214; CHECK-NEXT:    rlwinm r3, r6, 3, 0, 28
215; CHECK-NEXT:    vinsdrx v2, r3, r5
216; CHECK-NEXT:    blr
217;
218; CHECK-BE-LABEL: testDoubleword:
219; CHECK-BE:       # %bb.0: # %entry
220; CHECK-BE-NEXT:    rlwinm r3, r6, 3, 0, 28
221; CHECK-BE-NEXT:    vinsdlx v2, r3, r5
222; CHECK-BE-NEXT:    blr
223;
224; CHECK-P9-LABEL: testDoubleword:
225; CHECK-P9:       # %bb.0: # %entry
226; CHECK-P9-NEXT:    addi r4, r1, -16
227; CHECK-P9-NEXT:    rlwinm r3, r6, 3, 28, 28
228; CHECK-P9-NEXT:    stxv v2, -16(r1)
229; CHECK-P9-NEXT:    stdx r5, r4, r3
230; CHECK-P9-NEXT:    lxv v2, -16(r1)
231; CHECK-P9-NEXT:    blr
232;
233; AIX-P8-64-LABEL: testDoubleword:
234; AIX-P8-64:       # %bb.0: # %entry
235; AIX-P8-64-NEXT:    addi r5, r1, -16
236; AIX-P8-64-NEXT:    rlwinm r4, r4, 3, 28, 28
237; AIX-P8-64-NEXT:    stxvd2x v2, 0, r5
238; AIX-P8-64-NEXT:    stdx r3, r5, r4
239; AIX-P8-64-NEXT:    lxvd2x v2, 0, r5
240; AIX-P8-64-NEXT:    blr
241;
242; AIX-P8-32-LABEL: testDoubleword:
243; AIX-P8-32:       # %bb.0: # %entry
244; AIX-P8-32-NEXT:    add r6, r6, r6
245; AIX-P8-32-NEXT:    addi r5, r1, -32
246; AIX-P8-32-NEXT:    rlwinm r7, r6, 2, 28, 29
247; AIX-P8-32-NEXT:    stxvw4x v2, 0, r5
248; AIX-P8-32-NEXT:    stwx r3, r5, r7
249; AIX-P8-32-NEXT:    addi r3, r6, 1
250; AIX-P8-32-NEXT:    lxvw4x vs0, 0, r5
251; AIX-P8-32-NEXT:    addi r5, r1, -16
252; AIX-P8-32-NEXT:    rlwinm r3, r3, 2, 28, 29
253; AIX-P8-32-NEXT:    stxvw4x vs0, 0, r5
254; AIX-P8-32-NEXT:    stwx r4, r5, r3
255; AIX-P8-32-NEXT:    lxvw4x v2, 0, r5
256; AIX-P8-32-NEXT:    blr
257entry:
258  %vecins = insertelement <2 x i64> %a, i64 %b, i64 %idx
259  ret <2 x i64> %vecins
260}
261
262; Doubleword immediate
263
264define <2 x i64> @testDoublewordImm(<2 x i64> %a, i64 %b) {
265; CHECK-LABEL: testDoublewordImm:
266; CHECK:       # %bb.0: # %entry
267; CHECK-NEXT:    vinsd v2, r5, 0
268; CHECK-NEXT:    blr
269;
270; CHECK-BE-LABEL: testDoublewordImm:
271; CHECK-BE:       # %bb.0: # %entry
272; CHECK-BE-NEXT:    vinsd v2, r5, 8
273; CHECK-BE-NEXT:    blr
274;
275; CHECK-P9-LABEL: testDoublewordImm:
276; CHECK-P9:       # %bb.0: # %entry
277; CHECK-P9-NEXT:    mtfprd f0, r5
278; CHECK-P9-NEXT:    xxmrghd v2, v2, vs0
279; CHECK-P9-NEXT:    blr
280;
281; AIX-P8-64-LABEL: testDoublewordImm:
282; AIX-P8-64:       # %bb.0: # %entry
283; AIX-P8-64-NEXT:    mtfprd f0, r3
284; AIX-P8-64-NEXT:    xxmrghd v2, v2, vs0
285; AIX-P8-64-NEXT:    blr
286;
287; AIX-P8-32-LABEL: testDoublewordImm:
288; AIX-P8-32:       # %bb.0: # %entry
289; AIX-P8-32-NEXT:    lwz r5, L..C2(r2) # %const.0
290; AIX-P8-32-NEXT:    stw r3, -16(r1)
291; AIX-P8-32-NEXT:    stw r4, -32(r1)
292; AIX-P8-32-NEXT:    addi r3, r1, -16
293; AIX-P8-32-NEXT:    addi r4, r1, -32
294; AIX-P8-32-NEXT:    lxvw4x v4, 0, r3
295; AIX-P8-32-NEXT:    lwz r3, L..C3(r2) # %const.1
296; AIX-P8-32-NEXT:    lxvw4x v3, 0, r5
297; AIX-P8-32-NEXT:    vperm v2, v2, v4, v3
298; AIX-P8-32-NEXT:    lxvw4x v3, 0, r3
299; AIX-P8-32-NEXT:    lxvw4x v4, 0, r4
300; AIX-P8-32-NEXT:    vperm v2, v2, v4, v3
301; AIX-P8-32-NEXT:    blr
302entry:
303  %vecins = insertelement <2 x i64> %a, i64 %b, i32 1
304  ret <2 x i64> %vecins
305}
306
307define <2 x i64> @testDoublewordImm2(<2 x i64> %a, i64 %b) {
308; CHECK-LABEL: testDoublewordImm2:
309; CHECK:       # %bb.0: # %entry
310; CHECK-NEXT:    vinsd v2, r5, 8
311; CHECK-NEXT:    blr
312;
313; CHECK-BE-LABEL: testDoublewordImm2:
314; CHECK-BE:       # %bb.0: # %entry
315; CHECK-BE-NEXT:    vinsd v2, r5, 0
316; CHECK-BE-NEXT:    blr
317;
318; CHECK-P9-LABEL: testDoublewordImm2:
319; CHECK-P9:       # %bb.0: # %entry
320; CHECK-P9-NEXT:    mtfprd f0, r5
321; CHECK-P9-NEXT:    xxpermdi v2, vs0, v2, 1
322; CHECK-P9-NEXT:    blr
323;
324; AIX-P8-64-LABEL: testDoublewordImm2:
325; AIX-P8-64:       # %bb.0: # %entry
326; AIX-P8-64-NEXT:    mtfprd f0, r3
327; AIX-P8-64-NEXT:    xxpermdi v2, vs0, v2, 1
328; AIX-P8-64-NEXT:    blr
329;
330; AIX-P8-32-LABEL: testDoublewordImm2:
331; AIX-P8-32:       # %bb.0: # %entry
332; AIX-P8-32-NEXT:    lwz r5, L..C4(r2) # %const.0
333; AIX-P8-32-NEXT:    stw r3, -16(r1)
334; AIX-P8-32-NEXT:    stw r4, -32(r1)
335; AIX-P8-32-NEXT:    addi r3, r1, -16
336; AIX-P8-32-NEXT:    addi r4, r1, -32
337; AIX-P8-32-NEXT:    lxvw4x v4, 0, r3
338; AIX-P8-32-NEXT:    lwz r3, L..C5(r2) # %const.1
339; AIX-P8-32-NEXT:    lxvw4x v3, 0, r5
340; AIX-P8-32-NEXT:    vperm v2, v4, v2, v3
341; AIX-P8-32-NEXT:    lxvw4x v3, 0, r3
342; AIX-P8-32-NEXT:    lxvw4x v4, 0, r4
343; AIX-P8-32-NEXT:    vperm v2, v2, v4, v3
344; AIX-P8-32-NEXT:    blr
345entry:
346  %vecins = insertelement <2 x i64> %a, i64 %b, i32 0
347  ret <2 x i64> %vecins
348}
349
350; Float indexed
351
352define <4 x float> @testFloat1(<4 x float> %a, float %b, i32 zeroext %idx1) {
353; CHECK-LABEL: testFloat1:
354; CHECK:       # %bb.0: # %entry
355; CHECK-NEXT:    xscvdpspn v3, f1
356; CHECK-NEXT:    extsw r3, r6
357; CHECK-NEXT:    slwi r3, r3, 2
358; CHECK-NEXT:    vinswvrx v2, r3, v3
359; CHECK-NEXT:    blr
360;
361; CHECK-BE-LABEL: testFloat1:
362; CHECK-BE:       # %bb.0: # %entry
363; CHECK-BE-NEXT:    xscvdpspn v3, f1
364; CHECK-BE-NEXT:    extsw r3, r6
365; CHECK-BE-NEXT:    slwi r3, r3, 2
366; CHECK-BE-NEXT:    vinswvlx v2, r3, v3
367; CHECK-BE-NEXT:    blr
368;
369; CHECK-P9-LABEL: testFloat1:
370; CHECK-P9:       # %bb.0: # %entry
371; CHECK-P9-NEXT:    addi r4, r1, -16
372; CHECK-P9-NEXT:    rlwinm r3, r6, 2, 28, 29
373; CHECK-P9-NEXT:    stxv v2, -16(r1)
374; CHECK-P9-NEXT:    stfsx f1, r4, r3
375; CHECK-P9-NEXT:    lxv v2, -16(r1)
376; CHECK-P9-NEXT:    blr
377;
378; AIX-P8-LABEL: testFloat1:
379; AIX-P8:       # %bb.0: # %entry
380; AIX-P8-NEXT:    addi r3, r1, -16
381; AIX-P8-NEXT:    rlwinm r4, r4, 2, 28, 29
382; AIX-P8-NEXT:    stxvw4x v2, 0, r3
383; AIX-P8-NEXT:    stfsx f1, r3, r4
384; AIX-P8-NEXT:    lxvw4x v2, 0, r3
385; AIX-P8-NEXT:    blr
386entry:
387  %vecins = insertelement <4 x float> %a, float %b, i32 %idx1
388  ret <4 x float> %vecins
389}
390
391define <4 x float> @testFloat2(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
392; CHECK-LABEL: testFloat2:
393; CHECK:       # %bb.0: # %entry
394; CHECK-NEXT:    lwz r3, 0(r5)
395; CHECK-NEXT:    extsw r4, r6
396; CHECK-NEXT:    slwi r4, r4, 2
397; CHECK-NEXT:    vinswrx v2, r4, r3
398; CHECK-NEXT:    lwz r3, 1(r5)
399; CHECK-NEXT:    extsw r4, r7
400; CHECK-NEXT:    slwi r4, r4, 2
401; CHECK-NEXT:    vinswrx v2, r4, r3
402; CHECK-NEXT:    blr
403;
404; CHECK-BE-LABEL: testFloat2:
405; CHECK-BE:       # %bb.0: # %entry
406; CHECK-BE-NEXT:    lwz r3, 0(r5)
407; CHECK-BE-NEXT:    extsw r4, r6
408; CHECK-BE-NEXT:    slwi r4, r4, 2
409; CHECK-BE-NEXT:    vinswlx v2, r4, r3
410; CHECK-BE-NEXT:    lwz r3, 1(r5)
411; CHECK-BE-NEXT:    extsw r4, r7
412; CHECK-BE-NEXT:    slwi r4, r4, 2
413; CHECK-BE-NEXT:    vinswlx v2, r4, r3
414; CHECK-BE-NEXT:    blr
415;
416; CHECK-P9-LABEL: testFloat2:
417; CHECK-P9:       # %bb.0: # %entry
418; CHECK-P9-NEXT:    lwz r3, 0(r5)
419; CHECK-P9-NEXT:    rlwinm r4, r6, 2, 28, 29
420; CHECK-P9-NEXT:    addi r6, r1, -16
421; CHECK-P9-NEXT:    stxv v2, -16(r1)
422; CHECK-P9-NEXT:    stwx r3, r6, r4
423; CHECK-P9-NEXT:    rlwinm r4, r7, 2, 28, 29
424; CHECK-P9-NEXT:    lxv vs0, -16(r1)
425; CHECK-P9-NEXT:    lwz r3, 1(r5)
426; CHECK-P9-NEXT:    addi r5, r1, -32
427; CHECK-P9-NEXT:    stxv vs0, -32(r1)
428; CHECK-P9-NEXT:    stwx r3, r5, r4
429; CHECK-P9-NEXT:    lxv v2, -32(r1)
430; CHECK-P9-NEXT:    blr
431;
432; AIX-P8-64-LABEL: testFloat2:
433; AIX-P8-64:       # %bb.0: # %entry
434; AIX-P8-64-NEXT:    lwz r7, 0(r3)
435; AIX-P8-64-NEXT:    addi r6, r1, -32
436; AIX-P8-64-NEXT:    rlwinm r4, r4, 2, 28, 29
437; AIX-P8-64-NEXT:    rlwinm r5, r5, 2, 28, 29
438; AIX-P8-64-NEXT:    stxvw4x v2, 0, r6
439; AIX-P8-64-NEXT:    stwx r7, r6, r4
440; AIX-P8-64-NEXT:    addi r4, r1, -16
441; AIX-P8-64-NEXT:    lxvw4x vs0, 0, r6
442; AIX-P8-64-NEXT:    lwz r3, 1(r3)
443; AIX-P8-64-NEXT:    stxvw4x vs0, 0, r4
444; AIX-P8-64-NEXT:    stwx r3, r4, r5
445; AIX-P8-64-NEXT:    lxvw4x v2, 0, r4
446; AIX-P8-64-NEXT:    blr
447;
448; AIX-P8-32-LABEL: testFloat2:
449; AIX-P8-32:       # %bb.0: # %entry
450; AIX-P8-32-NEXT:    lwz r7, 0(r3)
451; AIX-P8-32-NEXT:    addi r6, r1, -32
452; AIX-P8-32-NEXT:    rlwinm r4, r4, 2, 28, 29
453; AIX-P8-32-NEXT:    stxvw4x v2, 0, r6
454; AIX-P8-32-NEXT:    stwx r7, r6, r4
455; AIX-P8-32-NEXT:    rlwinm r4, r5, 2, 28, 29
456; AIX-P8-32-NEXT:    addi r5, r1, -16
457; AIX-P8-32-NEXT:    lxvw4x vs0, 0, r6
458; AIX-P8-32-NEXT:    lwz r3, 1(r3)
459; AIX-P8-32-NEXT:    stxvw4x vs0, 0, r5
460; AIX-P8-32-NEXT:    stwx r3, r5, r4
461; AIX-P8-32-NEXT:    lxvw4x v2, 0, r5
462; AIX-P8-32-NEXT:    blr
463entry:
464  %0 = bitcast i8* %b to float*
465  %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 1
466  %1 = bitcast i8* %add.ptr1 to float*
467  %2 = load float, float* %0, align 4
468  %vecins = insertelement <4 x float> %a, float %2, i32 %idx1
469  %3 = load float, float* %1, align 4
470  %vecins2 = insertelement <4 x float> %vecins, float %3, i32 %idx2
471  ret <4 x float> %vecins2
472}
473
474define <4 x float> @testFloat3(<4 x float> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
475; CHECK-LABEL: testFloat3:
476; CHECK:       # %bb.0: # %entry
477; CHECK-NEXT:    plwz r3, 65536(r5), 0
478; CHECK-NEXT:    extsw r4, r6
479; CHECK-NEXT:    slwi r4, r4, 2
480; CHECK-NEXT:    vinswrx v2, r4, r3
481; CHECK-NEXT:    li r3, 1
482; CHECK-NEXT:    extsw r4, r7
483; CHECK-NEXT:    rldic r3, r3, 36, 27
484; CHECK-NEXT:    slwi r4, r4, 2
485; CHECK-NEXT:    lwzx r3, r5, r3
486; CHECK-NEXT:    vinswrx v2, r4, r3
487; CHECK-NEXT:    blr
488;
489; CHECK-BE-LABEL: testFloat3:
490; CHECK-BE:       # %bb.0: # %entry
491; CHECK-BE-NEXT:    plwz r3, 65536(r5), 0
492; CHECK-BE-NEXT:    extsw r4, r6
493; CHECK-BE-NEXT:    slwi r4, r4, 2
494; CHECK-BE-NEXT:    vinswlx v2, r4, r3
495; CHECK-BE-NEXT:    li r3, 1
496; CHECK-BE-NEXT:    extsw r4, r7
497; CHECK-BE-NEXT:    rldic r3, r3, 36, 27
498; CHECK-BE-NEXT:    slwi r4, r4, 2
499; CHECK-BE-NEXT:    lwzx r3, r5, r3
500; CHECK-BE-NEXT:    vinswlx v2, r4, r3
501; CHECK-BE-NEXT:    blr
502;
503; CHECK-P9-LABEL: testFloat3:
504; CHECK-P9:       # %bb.0: # %entry
505; CHECK-P9-NEXT:    lis r3, 1
506; CHECK-P9-NEXT:    rlwinm r4, r6, 2, 28, 29
507; CHECK-P9-NEXT:    addi r6, r1, -16
508; CHECK-P9-NEXT:    lwzx r3, r5, r3
509; CHECK-P9-NEXT:    stxv v2, -16(r1)
510; CHECK-P9-NEXT:    stwx r3, r6, r4
511; CHECK-P9-NEXT:    li r3, 1
512; CHECK-P9-NEXT:    rlwinm r4, r7, 2, 28, 29
513; CHECK-P9-NEXT:    lxv vs0, -16(r1)
514; CHECK-P9-NEXT:    rldic r3, r3, 36, 27
515; CHECK-P9-NEXT:    lwzx r3, r5, r3
516; CHECK-P9-NEXT:    addi r5, r1, -32
517; CHECK-P9-NEXT:    stxv vs0, -32(r1)
518; CHECK-P9-NEXT:    stwx r3, r5, r4
519; CHECK-P9-NEXT:    lxv v2, -32(r1)
520; CHECK-P9-NEXT:    blr
521;
522; AIX-P8-64-LABEL: testFloat3:
523; AIX-P8-64:       # %bb.0: # %entry
524; AIX-P8-64-NEXT:    lis r6, 1
525; AIX-P8-64-NEXT:    addi r7, r1, -32
526; AIX-P8-64-NEXT:    rlwinm r4, r4, 2, 28, 29
527; AIX-P8-64-NEXT:    rlwinm r5, r5, 2, 28, 29
528; AIX-P8-64-NEXT:    lwzx r6, r3, r6
529; AIX-P8-64-NEXT:    stxvw4x v2, 0, r7
530; AIX-P8-64-NEXT:    stwx r6, r7, r4
531; AIX-P8-64-NEXT:    li r4, 1
532; AIX-P8-64-NEXT:    lxvw4x vs0, 0, r7
533; AIX-P8-64-NEXT:    rldic r4, r4, 36, 27
534; AIX-P8-64-NEXT:    lwzx r3, r3, r4
535; AIX-P8-64-NEXT:    addi r4, r1, -16
536; AIX-P8-64-NEXT:    stxvw4x vs0, 0, r4
537; AIX-P8-64-NEXT:    stwx r3, r4, r5
538; AIX-P8-64-NEXT:    lxvw4x v2, 0, r4
539; AIX-P8-64-NEXT:    blr
540;
541; AIX-P8-32-LABEL: testFloat3:
542; AIX-P8-32:       # %bb.0: # %entry
543; AIX-P8-32-NEXT:    lis r6, 1
544; AIX-P8-32-NEXT:    rlwinm r4, r4, 2, 28, 29
545; AIX-P8-32-NEXT:    addi r7, r1, -32
546; AIX-P8-32-NEXT:    lwzx r6, r3, r6
547; AIX-P8-32-NEXT:    stxvw4x v2, 0, r7
548; AIX-P8-32-NEXT:    stwx r6, r7, r4
549; AIX-P8-32-NEXT:    rlwinm r4, r5, 2, 28, 29
550; AIX-P8-32-NEXT:    addi r5, r1, -16
551; AIX-P8-32-NEXT:    lxvw4x vs0, 0, r7
552; AIX-P8-32-NEXT:    lwz r3, 0(r3)
553; AIX-P8-32-NEXT:    stxvw4x vs0, 0, r5
554; AIX-P8-32-NEXT:    stwx r3, r5, r4
555; AIX-P8-32-NEXT:    lxvw4x v2, 0, r5
556; AIX-P8-32-NEXT:    blr
557entry:
558  %add.ptr = getelementptr inbounds i8, i8* %b, i64 65536
559  %0 = bitcast i8* %add.ptr to float*
560  %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 68719476736
561  %1 = bitcast i8* %add.ptr1 to float*
562  %2 = load float, float* %0, align 4
563  %vecins = insertelement <4 x float> %a, float %2, i32 %idx1
564  %3 = load float, float* %1, align 4
565  %vecins2 = insertelement <4 x float> %vecins, float %3, i32 %idx2
566  ret <4 x float> %vecins2
567}
568
569; Float immediate
570
571define <4 x float> @testFloatImm1(<4 x float> %a, float %b) {
572; CHECK-LABEL: testFloatImm1:
573; CHECK:       # %bb.0: # %entry
574; CHECK-NEXT:    xscvdpspn vs0, f1
575; CHECK-NEXT:    xxinsertw v2, vs0, 12
576; CHECK-NEXT:    xxinsertw v2, vs0, 4
577; CHECK-NEXT:    blr
578;
579; CHECK-BE-LABEL: testFloatImm1:
580; CHECK-BE:       # %bb.0: # %entry
581; CHECK-BE-NEXT:    xscvdpspn vs0, f1
582; CHECK-BE-NEXT:    xxinsertw v2, vs0, 0
583; CHECK-BE-NEXT:    xxinsertw v2, vs0, 8
584; CHECK-BE-NEXT:    blr
585;
586; CHECK-P9-LABEL: testFloatImm1:
587; CHECK-P9:       # %bb.0: # %entry
588; CHECK-P9-NEXT:    xscvdpspn vs0, f1
589; CHECK-P9-NEXT:    xxinsertw v2, vs0, 0
590; CHECK-P9-NEXT:    xxinsertw v2, vs0, 8
591; CHECK-P9-NEXT:    blr
592;
593; AIX-P8-64-LABEL: testFloatImm1:
594; AIX-P8-64:       # %bb.0: # %entry
595; AIX-P8-64-NEXT:    ld r3, L..C2(r2) # %const.0
596; AIX-P8-64-NEXT:    xscvdpspn v3, f1
597; AIX-P8-64-NEXT:    lxvw4x v4, 0, r3
598; AIX-P8-64-NEXT:    ld r3, L..C3(r2) # %const.1
599; AIX-P8-64-NEXT:    vperm v2, v3, v2, v4
600; AIX-P8-64-NEXT:    lxvw4x v4, 0, r3
601; AIX-P8-64-NEXT:    vperm v2, v2, v3, v4
602; AIX-P8-64-NEXT:    blr
603;
604; AIX-P8-32-LABEL: testFloatImm1:
605; AIX-P8-32:       # %bb.0: # %entry
606; AIX-P8-32-NEXT:    lwz r3, L..C6(r2) # %const.0
607; AIX-P8-32-NEXT:    xscvdpspn v3, f1
608; AIX-P8-32-NEXT:    lxvw4x v4, 0, r3
609; AIX-P8-32-NEXT:    lwz r3, L..C7(r2) # %const.1
610; AIX-P8-32-NEXT:    vperm v2, v3, v2, v4
611; AIX-P8-32-NEXT:    lxvw4x v4, 0, r3
612; AIX-P8-32-NEXT:    vperm v2, v2, v3, v4
613; AIX-P8-32-NEXT:    blr
614entry:
615  %vecins = insertelement <4 x float> %a, float %b, i32 0
616  %vecins1 = insertelement <4 x float> %vecins, float %b, i32 2
617  ret <4 x float> %vecins1
618}
619
620define <4 x float> @testFloatImm2(<4 x float> %a, i32* %b) {
621; CHECK-LABEL: testFloatImm2:
622; CHECK:       # %bb.0: # %entry
623; CHECK-NEXT:    lwz r3, 0(r5)
624; CHECK-NEXT:    vinsw v2, r3, 12
625; CHECK-NEXT:    lwz r3, 4(r5)
626; CHECK-NEXT:    vinsw v2, r3, 4
627; CHECK-NEXT:    blr
628;
629; CHECK-BE-LABEL: testFloatImm2:
630; CHECK-BE:       # %bb.0: # %entry
631; CHECK-BE-NEXT:    lwz r3, 0(r5)
632; CHECK-BE-NEXT:    vinsw v2, r3, 0
633; CHECK-BE-NEXT:    lwz r3, 4(r5)
634; CHECK-BE-NEXT:    vinsw v2, r3, 8
635; CHECK-BE-NEXT:    blr
636;
637; CHECK-P9-LABEL: testFloatImm2:
638; CHECK-P9:       # %bb.0: # %entry
639; CHECK-P9-NEXT:    lwz r3, 0(r5)
640; CHECK-P9-NEXT:    mtfprwz f0, r3
641; CHECK-P9-NEXT:    lwz r3, 4(r5)
642; CHECK-P9-NEXT:    xxinsertw v2, vs0, 0
643; CHECK-P9-NEXT:    mtfprwz f0, r3
644; CHECK-P9-NEXT:    xxinsertw v2, vs0, 8
645; CHECK-P9-NEXT:    blr
646;
647; AIX-P8-64-LABEL: testFloatImm2:
648; AIX-P8-64:       # %bb.0: # %entry
649; AIX-P8-64-NEXT:    ld r4, L..C4(r2) # %const.0
650; AIX-P8-64-NEXT:    lxsiwzx v3, 0, r3
651; AIX-P8-64-NEXT:    li r5, 4
652; AIX-P8-64-NEXT:    lxvw4x v4, 0, r4
653; AIX-P8-64-NEXT:    ld r4, L..C5(r2) # %const.1
654; AIX-P8-64-NEXT:    vperm v2, v3, v2, v4
655; AIX-P8-64-NEXT:    lxsiwzx v3, r3, r5
656; AIX-P8-64-NEXT:    lxvw4x v4, 0, r4
657; AIX-P8-64-NEXT:    vperm v2, v2, v3, v4
658; AIX-P8-64-NEXT:    blr
659;
660; AIX-P8-32-LABEL: testFloatImm2:
661; AIX-P8-32:       # %bb.0: # %entry
662; AIX-P8-32-NEXT:    lfs f0, 0(r3)
663; AIX-P8-32-NEXT:    lwz r4, L..C8(r2) # %const.0
664; AIX-P8-32-NEXT:    xscvdpspn v3, f0
665; AIX-P8-32-NEXT:    lxvw4x v4, 0, r4
666; AIX-P8-32-NEXT:    lfs f0, 4(r3)
667; AIX-P8-32-NEXT:    lwz r3, L..C9(r2) # %const.1
668; AIX-P8-32-NEXT:    vperm v2, v3, v2, v4
669; AIX-P8-32-NEXT:    lxvw4x v4, 0, r3
670; AIX-P8-32-NEXT:    xscvdpspn v3, f0
671; AIX-P8-32-NEXT:    vperm v2, v2, v3, v4
672; AIX-P8-32-NEXT:    blr
673entry:
674  %0 = bitcast i32* %b to float*
675  %add.ptr1 = getelementptr inbounds i32, i32* %b, i64 1
676  %1 = bitcast i32* %add.ptr1 to float*
677  %2 = load float, float* %0, align 4
678  %vecins = insertelement <4 x float> %a, float %2, i32 0
679  %3 = load float, float* %1, align 4
680  %vecins2 = insertelement <4 x float> %vecins, float %3, i32 2
681  ret <4 x float> %vecins2
682}
683
684define <4 x float> @testFloatImm3(<4 x float> %a, i32* %b) {
685; CHECK-LABEL: testFloatImm3:
686; CHECK:       # %bb.0: # %entry
687; CHECK-NEXT:    plwz r3, 262144(r5), 0
688; CHECK-NEXT:    vinsw v2, r3, 12
689; CHECK-NEXT:    li r3, 1
690; CHECK-NEXT:    rldic r3, r3, 38, 25
691; CHECK-NEXT:    lwzx r3, r5, r3
692; CHECK-NEXT:    vinsw v2, r3, 4
693; CHECK-NEXT:    blr
694;
695; CHECK-BE-LABEL: testFloatImm3:
696; CHECK-BE:       # %bb.0: # %entry
697; CHECK-BE-NEXT:    plwz r3, 262144(r5), 0
698; CHECK-BE-NEXT:    vinsw v2, r3, 0
699; CHECK-BE-NEXT:    li r3, 1
700; CHECK-BE-NEXT:    rldic r3, r3, 38, 25
701; CHECK-BE-NEXT:    lwzx r3, r5, r3
702; CHECK-BE-NEXT:    vinsw v2, r3, 8
703; CHECK-BE-NEXT:    blr
704;
705; CHECK-P9-LABEL: testFloatImm3:
706; CHECK-P9:       # %bb.0: # %entry
707; CHECK-P9-NEXT:    lis r3, 4
708; CHECK-P9-NEXT:    lwzx r3, r5, r3
709; CHECK-P9-NEXT:    mtfprwz f0, r3
710; CHECK-P9-NEXT:    li r3, 1
711; CHECK-P9-NEXT:    rldic r3, r3, 38, 25
712; CHECK-P9-NEXT:    xxinsertw v2, vs0, 0
713; CHECK-P9-NEXT:    lwzx r3, r5, r3
714; CHECK-P9-NEXT:    mtfprwz f0, r3
715; CHECK-P9-NEXT:    xxinsertw v2, vs0, 8
716; CHECK-P9-NEXT:    blr
717;
718; AIX-P8-64-LABEL: testFloatImm3:
719; AIX-P8-64:       # %bb.0: # %entry
720; AIX-P8-64-NEXT:    ld r4, L..C6(r2) # %const.0
721; AIX-P8-64-NEXT:    lis r5, 4
722; AIX-P8-64-NEXT:    lxsiwzx v3, r3, r5
723; AIX-P8-64-NEXT:    li r5, 1
724; AIX-P8-64-NEXT:    rldic r5, r5, 38, 25
725; AIX-P8-64-NEXT:    lxvw4x v4, 0, r4
726; AIX-P8-64-NEXT:    ld r4, L..C7(r2) # %const.1
727; AIX-P8-64-NEXT:    vperm v2, v3, v2, v4
728; AIX-P8-64-NEXT:    lxsiwzx v3, r3, r5
729; AIX-P8-64-NEXT:    lxvw4x v4, 0, r4
730; AIX-P8-64-NEXT:    vperm v2, v2, v3, v4
731; AIX-P8-64-NEXT:    blr
732;
733; AIX-P8-32-LABEL: testFloatImm3:
734; AIX-P8-32:       # %bb.0: # %entry
735; AIX-P8-32-NEXT:    lis r4, 4
736; AIX-P8-32-NEXT:    lfsx f0, r3, r4
737; AIX-P8-32-NEXT:    lwz r4, L..C10(r2) # %const.0
738; AIX-P8-32-NEXT:    xscvdpspn v3, f0
739; AIX-P8-32-NEXT:    lxvw4x v4, 0, r4
740; AIX-P8-32-NEXT:    lfs f0, 0(r3)
741; AIX-P8-32-NEXT:    lwz r3, L..C11(r2) # %const.1
742; AIX-P8-32-NEXT:    vperm v2, v3, v2, v4
743; AIX-P8-32-NEXT:    lxvw4x v4, 0, r3
744; AIX-P8-32-NEXT:    xscvdpspn v3, f0
745; AIX-P8-32-NEXT:    vperm v2, v2, v3, v4
746; AIX-P8-32-NEXT:    blr
747entry:
748  %add.ptr = getelementptr inbounds i32, i32* %b, i64 65536
749  %0 = bitcast i32* %add.ptr to float*
750  %add.ptr1 = getelementptr inbounds i32, i32* %b, i64 68719476736
751  %1 = bitcast i32* %add.ptr1 to float*
752  %2 = load float, float* %0, align 4
753  %vecins = insertelement <4 x float> %a, float %2, i32 0
754  %3 = load float, float* %1, align 4
755  %vecins2 = insertelement <4 x float> %vecins, float %3, i32 2
756  ret <4 x float> %vecins2
757}
758
759; Double indexed
760
761define <2 x double> @testDouble1(<2 x double> %a, double %b, i32 zeroext %idx1) {
762; CHECK-LABEL: testDouble1:
763; CHECK:       # %bb.0: # %entry
764; CHECK-NEXT:    extsw r4, r6
765; CHECK-NEXT:    mffprd r3, f1
766; CHECK-NEXT:    rlwinm r4, r4, 3, 0, 28
767; CHECK-NEXT:    vinsdrx v2, r4, r3
768; CHECK-NEXT:    blr
769;
770; CHECK-BE-LABEL: testDouble1:
771; CHECK-BE:       # %bb.0: # %entry
772; CHECK-BE-NEXT:    extsw r4, r6
773; CHECK-BE-NEXT:    mffprd r3, f1
774; CHECK-BE-NEXT:    rlwinm r4, r4, 3, 0, 28
775; CHECK-BE-NEXT:    vinsdlx v2, r4, r3
776; CHECK-BE-NEXT:    blr
777;
778; CHECK-P9-LABEL: testDouble1:
779; CHECK-P9:       # %bb.0: # %entry
780; CHECK-P9-NEXT:    addi r4, r1, -16
781; CHECK-P9-NEXT:    rlwinm r3, r6, 3, 28, 28
782; CHECK-P9-NEXT:    stxv v2, -16(r1)
783; CHECK-P9-NEXT:    stfdx f1, r4, r3
784; CHECK-P9-NEXT:    lxv v2, -16(r1)
785; CHECK-P9-NEXT:    blr
786;
787; AIX-P8-64-LABEL: testDouble1:
788; AIX-P8-64:       # %bb.0: # %entry
789; AIX-P8-64-NEXT:    addi r3, r1, -16
790; AIX-P8-64-NEXT:    rlwinm r4, r4, 3, 28, 28
791; AIX-P8-64-NEXT:    stxvd2x v2, 0, r3
792; AIX-P8-64-NEXT:    stfdx f1, r3, r4
793; AIX-P8-64-NEXT:    lxvd2x v2, 0, r3
794; AIX-P8-64-NEXT:    blr
795;
796; AIX-P8-32-LABEL: testDouble1:
797; AIX-P8-32:       # %bb.0: # %entry
798; AIX-P8-32-NEXT:    addi r3, r1, -16
799; AIX-P8-32-NEXT:    rlwinm r4, r5, 3, 28, 28
800; AIX-P8-32-NEXT:    stxvd2x v2, 0, r3
801; AIX-P8-32-NEXT:    stfdx f1, r3, r4
802; AIX-P8-32-NEXT:    lxvd2x v2, 0, r3
803; AIX-P8-32-NEXT:    blr
804entry:
805  %vecins = insertelement <2 x double> %a, double %b, i32 %idx1
806  ret <2 x double> %vecins
807}
808
809define <2 x double> @testDouble2(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
810; CHECK-LABEL: testDouble2:
811; CHECK:       # %bb.0: # %entry
812; CHECK-NEXT:    ld r3, 0(r5)
813; CHECK-NEXT:    extsw r4, r6
814; CHECK-NEXT:    rlwinm r4, r4, 3, 0, 28
815; CHECK-NEXT:    vinsdrx v2, r4, r3
816; CHECK-NEXT:    pld r3, 1(r5), 0
817; CHECK-NEXT:    extsw r4, r7
818; CHECK-NEXT:    rlwinm r4, r4, 3, 0, 28
819; CHECK-NEXT:    vinsdrx v2, r4, r3
820; CHECK-NEXT:    blr
821;
822; CHECK-BE-LABEL: testDouble2:
823; CHECK-BE:       # %bb.0: # %entry
824; CHECK-BE-NEXT:    ld r3, 0(r5)
825; CHECK-BE-NEXT:    extsw r4, r6
826; CHECK-BE-NEXT:    rlwinm r4, r4, 3, 0, 28
827; CHECK-BE-NEXT:    vinsdlx v2, r4, r3
828; CHECK-BE-NEXT:    pld r3, 1(r5), 0
829; CHECK-BE-NEXT:    extsw r4, r7
830; CHECK-BE-NEXT:    rlwinm r4, r4, 3, 0, 28
831; CHECK-BE-NEXT:    vinsdlx v2, r4, r3
832; CHECK-BE-NEXT:    blr
833;
834; CHECK-P9-LABEL: testDouble2:
835; CHECK-P9:       # %bb.0: # %entry
836; CHECK-P9-NEXT:    ld r3, 0(r5)
837; CHECK-P9-NEXT:    rlwinm r4, r6, 3, 28, 28
838; CHECK-P9-NEXT:    addi r6, r1, -32
839; CHECK-P9-NEXT:    stxv v2, -32(r1)
840; CHECK-P9-NEXT:    stdx r3, r6, r4
841; CHECK-P9-NEXT:    li r3, 1
842; CHECK-P9-NEXT:    rlwinm r4, r7, 3, 28, 28
843; CHECK-P9-NEXT:    lxv vs0, -32(r1)
844; CHECK-P9-NEXT:    ldx r3, r5, r3
845; CHECK-P9-NEXT:    addi r5, r1, -16
846; CHECK-P9-NEXT:    stxv vs0, -16(r1)
847; CHECK-P9-NEXT:    stdx r3, r5, r4
848; CHECK-P9-NEXT:    lxv v2, -16(r1)
849; CHECK-P9-NEXT:    blr
850;
851; AIX-P8-64-LABEL: testDouble2:
852; AIX-P8-64:       # %bb.0: # %entry
853; AIX-P8-64-NEXT:    ld r7, 0(r3)
854; AIX-P8-64-NEXT:    addi r6, r1, -32
855; AIX-P8-64-NEXT:    rlwinm r4, r4, 3, 28, 28
856; AIX-P8-64-NEXT:    rlwinm r5, r5, 3, 28, 28
857; AIX-P8-64-NEXT:    stxvd2x v2, 0, r6
858; AIX-P8-64-NEXT:    stdx r7, r6, r4
859; AIX-P8-64-NEXT:    li r4, 1
860; AIX-P8-64-NEXT:    lxvd2x vs0, 0, r6
861; AIX-P8-64-NEXT:    ldx r3, r3, r4
862; AIX-P8-64-NEXT:    addi r4, r1, -16
863; AIX-P8-64-NEXT:    stxvd2x vs0, 0, r4
864; AIX-P8-64-NEXT:    stdx r3, r4, r5
865; AIX-P8-64-NEXT:    lxvd2x v2, 0, r4
866; AIX-P8-64-NEXT:    blr
867;
868; AIX-P8-32-LABEL: testDouble2:
869; AIX-P8-32:       # %bb.0: # %entry
870; AIX-P8-32-NEXT:    lfd f0, 0(r3)
871; AIX-P8-32-NEXT:    addi r6, r1, -32
872; AIX-P8-32-NEXT:    rlwinm r4, r4, 3, 28, 28
873; AIX-P8-32-NEXT:    stxvd2x v2, 0, r6
874; AIX-P8-32-NEXT:    stfdx f0, r6, r4
875; AIX-P8-32-NEXT:    addi r4, r1, -16
876; AIX-P8-32-NEXT:    lxvd2x vs0, 0, r6
877; AIX-P8-32-NEXT:    lfd f1, 1(r3)
878; AIX-P8-32-NEXT:    rlwinm r3, r5, 3, 28, 28
879; AIX-P8-32-NEXT:    stxvd2x vs0, 0, r4
880; AIX-P8-32-NEXT:    stfdx f1, r4, r3
881; AIX-P8-32-NEXT:    lxvd2x v2, 0, r4
882; AIX-P8-32-NEXT:    blr
883entry:
884  %0 = bitcast i8* %b to double*
885  %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 1
886  %1 = bitcast i8* %add.ptr1 to double*
887  %2 = load double, double* %0, align 8
888  %vecins = insertelement <2 x double> %a, double %2, i32 %idx1
889  %3 = load double, double* %1, align 8
890  %vecins2 = insertelement <2 x double> %vecins, double %3, i32 %idx2
891  ret <2 x double> %vecins2
892}
893
894define <2 x double> @testDouble3(<2 x double> %a, i8* %b, i32 zeroext %idx1, i32 zeroext %idx2) {
895; CHECK-LABEL: testDouble3:
896; CHECK:       # %bb.0: # %entry
897; CHECK-NEXT:    pld r3, 65536(r5), 0
898; CHECK-NEXT:    extsw r4, r6
899; CHECK-NEXT:    rlwinm r4, r4, 3, 0, 28
900; CHECK-NEXT:    vinsdrx v2, r4, r3
901; CHECK-NEXT:    li r3, 1
902; CHECK-NEXT:    extsw r4, r7
903; CHECK-NEXT:    rldic r3, r3, 36, 27
904; CHECK-NEXT:    rlwinm r4, r4, 3, 0, 28
905; CHECK-NEXT:    ldx r3, r5, r3
906; CHECK-NEXT:    vinsdrx v2, r4, r3
907; CHECK-NEXT:    blr
908;
909; CHECK-BE-LABEL: testDouble3:
910; CHECK-BE:       # %bb.0: # %entry
911; CHECK-BE-NEXT:    pld r3, 65536(r5), 0
912; CHECK-BE-NEXT:    extsw r4, r6
913; CHECK-BE-NEXT:    rlwinm r4, r4, 3, 0, 28
914; CHECK-BE-NEXT:    vinsdlx v2, r4, r3
915; CHECK-BE-NEXT:    li r3, 1
916; CHECK-BE-NEXT:    extsw r4, r7
917; CHECK-BE-NEXT:    rldic r3, r3, 36, 27
918; CHECK-BE-NEXT:    rlwinm r4, r4, 3, 0, 28
919; CHECK-BE-NEXT:    ldx r3, r5, r3
920; CHECK-BE-NEXT:    vinsdlx v2, r4, r3
921; CHECK-BE-NEXT:    blr
922;
923; CHECK-P9-LABEL: testDouble3:
924; CHECK-P9:       # %bb.0: # %entry
925; CHECK-P9-NEXT:    lis r3, 1
926; CHECK-P9-NEXT:    rlwinm r4, r6, 3, 28, 28
927; CHECK-P9-NEXT:    addi r6, r1, -32
928; CHECK-P9-NEXT:    ldx r3, r5, r3
929; CHECK-P9-NEXT:    stxv v2, -32(r1)
930; CHECK-P9-NEXT:    stdx r3, r6, r4
931; CHECK-P9-NEXT:    li r3, 1
932; CHECK-P9-NEXT:    rlwinm r4, r7, 3, 28, 28
933; CHECK-P9-NEXT:    lxv vs0, -32(r1)
934; CHECK-P9-NEXT:    rldic r3, r3, 36, 27
935; CHECK-P9-NEXT:    ldx r3, r5, r3
936; CHECK-P9-NEXT:    addi r5, r1, -16
937; CHECK-P9-NEXT:    stxv vs0, -16(r1)
938; CHECK-P9-NEXT:    stdx r3, r5, r4
939; CHECK-P9-NEXT:    lxv v2, -16(r1)
940; CHECK-P9-NEXT:    blr
941;
942; AIX-P8-64-LABEL: testDouble3:
943; AIX-P8-64:       # %bb.0: # %entry
944; AIX-P8-64-NEXT:    lis r6, 1
945; AIX-P8-64-NEXT:    addi r7, r1, -32
946; AIX-P8-64-NEXT:    rlwinm r4, r4, 3, 28, 28
947; AIX-P8-64-NEXT:    li r8, 1
948; AIX-P8-64-NEXT:    rlwinm r5, r5, 3, 28, 28
949; AIX-P8-64-NEXT:    ldx r6, r3, r6
950; AIX-P8-64-NEXT:    stxvd2x v2, 0, r7
951; AIX-P8-64-NEXT:    stdx r6, r7, r4
952; AIX-P8-64-NEXT:    rldic r4, r8, 36, 27
953; AIX-P8-64-NEXT:    lxvd2x vs0, 0, r7
954; AIX-P8-64-NEXT:    ldx r3, r3, r4
955; AIX-P8-64-NEXT:    addi r4, r1, -16
956; AIX-P8-64-NEXT:    stxvd2x vs0, 0, r4
957; AIX-P8-64-NEXT:    stdx r3, r4, r5
958; AIX-P8-64-NEXT:    lxvd2x v2, 0, r4
959; AIX-P8-64-NEXT:    blr
960;
961; AIX-P8-32-LABEL: testDouble3:
962; AIX-P8-32:       # %bb.0: # %entry
963; AIX-P8-32-NEXT:    lis r6, 1
964; AIX-P8-32-NEXT:    rlwinm r4, r4, 3, 28, 28
965; AIX-P8-32-NEXT:    lfdx f0, r3, r6
966; AIX-P8-32-NEXT:    addi r6, r1, -32
967; AIX-P8-32-NEXT:    stxvd2x v2, 0, r6
968; AIX-P8-32-NEXT:    stfdx f0, r6, r4
969; AIX-P8-32-NEXT:    addi r4, r1, -16
970; AIX-P8-32-NEXT:    lxvd2x vs0, 0, r6
971; AIX-P8-32-NEXT:    lfd f1, 0(r3)
972; AIX-P8-32-NEXT:    rlwinm r3, r5, 3, 28, 28
973; AIX-P8-32-NEXT:    stxvd2x vs0, 0, r4
974; AIX-P8-32-NEXT:    stfdx f1, r4, r3
975; AIX-P8-32-NEXT:    lxvd2x v2, 0, r4
976; AIX-P8-32-NEXT:    blr
977entry:
978  %add.ptr = getelementptr inbounds i8, i8* %b, i64 65536
979  %0 = bitcast i8* %add.ptr to double*
980  %add.ptr1 = getelementptr inbounds i8, i8* %b, i64 68719476736
981  %1 = bitcast i8* %add.ptr1 to double*
982  %2 = load double, double* %0, align 8
983  %vecins = insertelement <2 x double> %a, double %2, i32 %idx1
984  %3 = load double, double* %1, align 8
985  %vecins2 = insertelement <2 x double> %vecins, double %3, i32 %idx2
986  ret <2 x double> %vecins2
987}
988
989; Double immediate
990
991define <2 x double> @testDoubleImm1(<2 x double> %a, double %b) {
992; CHECK-LABEL: testDoubleImm1:
993; CHECK:       # %bb.0: # %entry
994; CHECK-NEXT:    # kill: def $f1 killed $f1 def $vsl1
995; CHECK-NEXT:    xxmrghd v2, v2, vs1
996; CHECK-NEXT:    blr
997;
998; CHECK-BE-LABEL: testDoubleImm1:
999; CHECK-BE:       # %bb.0: # %entry
1000; CHECK-BE-NEXT:    # kill: def $f1 killed $f1 def $vsl1
1001; CHECK-BE-NEXT:    xxpermdi v2, vs1, v2, 1
1002; CHECK-BE-NEXT:    blr
1003;
1004; CHECK-P9-LABEL: testDoubleImm1:
1005; CHECK-P9:       # %bb.0: # %entry
1006; CHECK-P9-NEXT:    # kill: def $f1 killed $f1 def $vsl1
1007; CHECK-P9-NEXT:    xxpermdi v2, vs1, v2, 1
1008; CHECK-P9-NEXT:    blr
1009;
1010; AIX-P8-LABEL: testDoubleImm1:
1011; AIX-P8:       # %bb.0: # %entry
1012; AIX-P8-NEXT:    # kill: def $f1 killed $f1 def $vsl1
1013; AIX-P8-NEXT:    xxpermdi v2, vs1, v2, 1
1014; AIX-P8-NEXT:    blr
1015entry:
1016  %vecins = insertelement <2 x double> %a, double %b, i32 0
1017  ret <2 x double> %vecins
1018}
1019
1020define <2 x double> @testDoubleImm2(<2 x double> %a, i32* %b) {
1021; CHECK-LABEL: testDoubleImm2:
1022; CHECK:       # %bb.0: # %entry
1023; CHECK-NEXT:    lfd f0, 0(r5)
1024; CHECK-NEXT:    xxmrghd v2, v2, vs0
1025; CHECK-NEXT:    blr
1026;
1027; CHECK-BE-LABEL: testDoubleImm2:
1028; CHECK-BE:       # %bb.0: # %entry
1029; CHECK-BE-NEXT:    lfd f0, 0(r5)
1030; CHECK-BE-NEXT:    xxpermdi v2, vs0, v2, 1
1031; CHECK-BE-NEXT:    blr
1032;
1033; CHECK-P9-LABEL: testDoubleImm2:
1034; CHECK-P9:       # %bb.0: # %entry
1035; CHECK-P9-NEXT:    lfd f0, 0(r5)
1036; CHECK-P9-NEXT:    xxpermdi v2, vs0, v2, 1
1037; CHECK-P9-NEXT:    blr
1038;
1039; AIX-P8-LABEL: testDoubleImm2:
1040; AIX-P8:       # %bb.0: # %entry
1041; AIX-P8-NEXT:    lfd f0, 0(r3)
1042; AIX-P8-NEXT:    xxpermdi v2, vs0, v2, 1
1043; AIX-P8-NEXT:    blr
1044entry:
1045  %0 = bitcast i32* %b to double*
1046  %1 = load double, double* %0, align 8
1047  %vecins = insertelement <2 x double> %a, double %1, i32 0
1048  ret <2 x double> %vecins
1049}
1050
1051define <2 x double> @testDoubleImm3(<2 x double> %a, i32* %b) {
1052; CHECK-LABEL: testDoubleImm3:
1053; CHECK:       # %bb.0: # %entry
1054; CHECK-NEXT:    lfd f0, 4(r5)
1055; CHECK-NEXT:    xxmrghd v2, v2, vs0
1056; CHECK-NEXT:    blr
1057;
1058; CHECK-BE-LABEL: testDoubleImm3:
1059; CHECK-BE:       # %bb.0: # %entry
1060; CHECK-BE-NEXT:    lfd f0, 4(r5)
1061; CHECK-BE-NEXT:    xxpermdi v2, vs0, v2, 1
1062; CHECK-BE-NEXT:    blr
1063;
1064; CHECK-P9-LABEL: testDoubleImm3:
1065; CHECK-P9:       # %bb.0: # %entry
1066; CHECK-P9-NEXT:    lfd f0, 4(r5)
1067; CHECK-P9-NEXT:    xxpermdi v2, vs0, v2, 1
1068; CHECK-P9-NEXT:    blr
1069;
1070; AIX-P8-LABEL: testDoubleImm3:
1071; AIX-P8:       # %bb.0: # %entry
1072; AIX-P8-NEXT:    lfd f0, 4(r3)
1073; AIX-P8-NEXT:    xxpermdi v2, vs0, v2, 1
1074; AIX-P8-NEXT:    blr
1075entry:
1076  %add.ptr = getelementptr inbounds i32, i32* %b, i64 1
1077  %0 = bitcast i32* %add.ptr to double*
1078  %1 = load double, double* %0, align 8
1079  %vecins = insertelement <2 x double> %a, double %1, i32 0
1080  ret <2 x double> %vecins
1081}
1082
1083define <2 x double> @testDoubleImm4(<2 x double> %a, i32* %b) {
1084; CHECK-LABEL: testDoubleImm4:
1085; CHECK:       # %bb.0: # %entry
1086; CHECK-NEXT:    plfd f0, 262144(r5), 0
1087; CHECK-NEXT:    xxmrghd v2, v2, vs0
1088; CHECK-NEXT:    blr
1089;
1090; CHECK-BE-LABEL: testDoubleImm4:
1091; CHECK-BE:       # %bb.0: # %entry
1092; CHECK-BE-NEXT:    plfd f0, 262144(r5), 0
1093; CHECK-BE-NEXT:    xxpermdi v2, vs0, v2, 1
1094; CHECK-BE-NEXT:    blr
1095;
1096; CHECK-P9-LABEL: testDoubleImm4:
1097; CHECK-P9:       # %bb.0: # %entry
1098; CHECK-P9-NEXT:    lis r3, 4
1099; CHECK-P9-NEXT:    lfdx f0, r5, r3
1100; CHECK-P9-NEXT:    xxpermdi v2, vs0, v2, 1
1101; CHECK-P9-NEXT:    blr
1102;
1103; AIX-P8-LABEL: testDoubleImm4:
1104; AIX-P8:       # %bb.0: # %entry
1105; AIX-P8-NEXT:    lis r4, 4
1106; AIX-P8-NEXT:    lfdx f0, r3, r4
1107; AIX-P8-NEXT:    xxpermdi v2, vs0, v2, 1
1108; AIX-P8-NEXT:    blr
1109entry:
1110  %add.ptr = getelementptr inbounds i32, i32* %b, i64 65536
1111  %0 = bitcast i32* %add.ptr to double*
1112  %1 = load double, double* %0, align 8
1113  %vecins = insertelement <2 x double> %a, double %1, i32 0
1114  ret <2 x double> %vecins
1115}
1116
1117define <2 x double> @testDoubleImm5(<2 x double> %a, i32* %b) {
1118; CHECK-LABEL: testDoubleImm5:
1119; CHECK:       # %bb.0: # %entry
1120; CHECK-NEXT:    li r3, 1
1121; CHECK-NEXT:    rldic r3, r3, 38, 25
1122; CHECK-NEXT:    lfdx f0, r5, r3
1123; CHECK-NEXT:    xxmrghd v2, v2, vs0
1124; CHECK-NEXT:    blr
1125;
1126; CHECK-BE-LABEL: testDoubleImm5:
1127; CHECK-BE:       # %bb.0: # %entry
1128; CHECK-BE-NEXT:    li r3, 1
1129; CHECK-BE-NEXT:    rldic r3, r3, 38, 25
1130; CHECK-BE-NEXT:    lfdx f0, r5, r3
1131; CHECK-BE-NEXT:    xxpermdi v2, vs0, v2, 1
1132; CHECK-BE-NEXT:    blr
1133;
1134; CHECK-P9-LABEL: testDoubleImm5:
1135; CHECK-P9:       # %bb.0: # %entry
1136; CHECK-P9-NEXT:    li r3, 1
1137; CHECK-P9-NEXT:    rldic r3, r3, 38, 25
1138; CHECK-P9-NEXT:    lfdx f0, r5, r3
1139; CHECK-P9-NEXT:    xxpermdi v2, vs0, v2, 1
1140; CHECK-P9-NEXT:    blr
1141;
1142; AIX-P8-64-LABEL: testDoubleImm5:
1143; AIX-P8-64:       # %bb.0: # %entry
1144; AIX-P8-64-NEXT:    li r4, 1
1145; AIX-P8-64-NEXT:    rldic r4, r4, 38, 25
1146; AIX-P8-64-NEXT:    lfdx f0, r3, r4
1147; AIX-P8-64-NEXT:    xxpermdi v2, vs0, v2, 1
1148; AIX-P8-64-NEXT:    blr
1149;
1150; AIX-P8-32-LABEL: testDoubleImm5:
1151; AIX-P8-32:       # %bb.0: # %entry
1152; AIX-P8-32-NEXT:    lfd f0, 0(r3)
1153; AIX-P8-32-NEXT:    xxpermdi v2, vs0, v2, 1
1154; AIX-P8-32-NEXT:    blr
1155entry:
1156  %add.ptr = getelementptr inbounds i32, i32* %b, i64 68719476736
1157  %0 = bitcast i32* %add.ptr to double*
1158  %1 = load double, double* %0, align 8
1159  %vecins = insertelement <2 x double> %a, double %1, i32 0
1160  ret <2 x double> %vecins
1161}
1162
1163define dso_local <4 x float> @testInsertDoubleToFloat(<4 x float> %a, double %b) local_unnamed_addr #0 {
1164; CHECK-LABEL: testInsertDoubleToFloat:
1165; CHECK:       # %bb.0: # %entry
1166; CHECK-NEXT:    xscvdpsp f0, f1
1167; CHECK-NEXT:    xxinsertw v2, vs0, 8
1168; CHECK-NEXT:    blr
1169;
1170; CHECK-BE-LABEL: testInsertDoubleToFloat:
1171; CHECK-BE:       # %bb.0: # %entry
1172; CHECK-BE-NEXT:    xscvdpsp f0, f1
1173; CHECK-BE-NEXT:    xxinsertw v2, vs0, 4
1174; CHECK-BE-NEXT:    blr
1175;
1176; CHECK-P9-LABEL: testInsertDoubleToFloat:
1177; CHECK-P9:       # %bb.0: # %entry
1178; CHECK-P9-NEXT:    xscvdpsp f0, f1
1179; CHECK-P9-NEXT:    xxinsertw v2, vs0, 4
1180; CHECK-P9-NEXT:    blr
1181;
1182; AIX-P8-64-LABEL: testInsertDoubleToFloat:
1183; AIX-P8-64:       # %bb.0: # %entry
1184; AIX-P8-64-NEXT:    xsrsp f0, f1
1185; AIX-P8-64-NEXT:    ld r3, L..C8(r2) # %const.0
1186; AIX-P8-64-NEXT:    lxvw4x v4, 0, r3
1187; AIX-P8-64-NEXT:    xscvdpspn v3, f0
1188; AIX-P8-64-NEXT:    vperm v2, v2, v3, v4
1189; AIX-P8-64-NEXT:    blr
1190;
1191; AIX-P8-32-LABEL: testInsertDoubleToFloat:
1192; AIX-P8-32:       # %bb.0: # %entry
1193; AIX-P8-32-NEXT:    xsrsp f0, f1
1194; AIX-P8-32-NEXT:    lwz r3, L..C12(r2) # %const.0
1195; AIX-P8-32-NEXT:    lxvw4x v4, 0, r3
1196; AIX-P8-32-NEXT:    xscvdpspn v3, f0
1197; AIX-P8-32-NEXT:    vperm v2, v2, v3, v4
1198; AIX-P8-32-NEXT:    blr
1199entry:
1200  %conv = fptrunc double %b to float
1201  %vecins = insertelement <4 x float> %a, float %conv, i32 1
1202  ret <4 x float> %vecins
1203}
1204