1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
3; RUN:    -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9LE
4; RUN: llc -mcpu=pwr9 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
5; RUN:    -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P9BE
6; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
7; RUN:    -mtriple=powerpc64le-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8LE
8; RUN: llc -mcpu=pwr8 -verify-machineinstrs -ppc-vsr-nums-as-vr -ppc-asm-full-reg-names \
9; RUN:    -mtriple=powerpc64-unknown-linux-gnu < %s | FileCheck %s --check-prefix=P8BE
10
11; Function Attrs: norecurse nounwind readonly
12define <4 x i32> @s2v_test1(i32* nocapture readonly %int32, <4 x i32> %vec)  {
13; P9LE-LABEL: s2v_test1:
14; P9LE:       # %bb.0: # %entry
15; P9LE-NEXT:    lwz r3, 0(r3)
16; P9LE-NEXT:    mtfprwz f0, r3
17; P9LE-NEXT:    xxinsertw v2, vs0, 12
18; P9LE-NEXT:    blr
19;
20; P9BE-LABEL: s2v_test1:
21; P9BE:       # %bb.0: # %entry
22; P9BE-NEXT:    lwz r3, 0(r3)
23; P9BE-NEXT:    mtfprwz f0, r3
24; P9BE-NEXT:    xxinsertw v2, vs0, 0
25; P9BE-NEXT:    blr
26;
27; P8LE-LABEL: s2v_test1:
28; P8LE:       # %bb.0: # %entry
29; P8LE-NEXT:    addis r4, r2, .LCPI0_0@toc@ha
30; P8LE-NEXT:    lxsiwzx v4, 0, r3
31; P8LE-NEXT:    addi r4, r4, .LCPI0_0@toc@l
32; P8LE-NEXT:    lvx v3, 0, r4
33; P8LE-NEXT:    vperm v2, v2, v4, v3
34; P8LE-NEXT:    blr
35;
36; P8BE-LABEL: s2v_test1:
37; P8BE:       # %bb.0: # %entry
38; P8BE-NEXT:    lxsiwzx v3, 0, r3
39; P8BE-NEXT:    vmrghw v4, v2, v3
40; P8BE-NEXT:    xxsldwi vs0, v2, v3, 1
41; P8BE-NEXT:    xxsldwi v2, v4, vs0, 3
42; P8BE-NEXT:    blr
43entry:
44  %0 = load i32, i32* %int32, align 4
45  %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
46  ret <4 x i32> %vecins
47}
48
49; Function Attrs: norecurse nounwind readonly
50define <4 x i32> @s2v_test2(i32* nocapture readonly %int32, <4 x i32> %vec)  {
51; P9LE-LABEL: s2v_test2:
52; P9LE:       # %bb.0: # %entry
53; P9LE-NEXT:    lwz r3, 4(r3)
54; P9LE-NEXT:    mtfprwz f0, r3
55; P9LE-NEXT:    xxinsertw v2, vs0, 12
56; P9LE-NEXT:    blr
57;
58; P9BE-LABEL: s2v_test2:
59; P9BE:       # %bb.0: # %entry
60; P9BE-NEXT:    lwz r3, 4(r3)
61; P9BE-NEXT:    mtfprwz f0, r3
62; P9BE-NEXT:    xxinsertw v2, vs0, 0
63; P9BE-NEXT:    blr
64;
65; P8LE-LABEL: s2v_test2:
66; P8LE:       # %bb.0: # %entry
67; P8LE-NEXT:    addis r4, r2, .LCPI1_0@toc@ha
68; P8LE-NEXT:    addi r3, r3, 4
69; P8LE-NEXT:    addi r4, r4, .LCPI1_0@toc@l
70; P8LE-NEXT:    lxsiwzx v4, 0, r3
71; P8LE-NEXT:    lvx v3, 0, r4
72; P8LE-NEXT:    vperm v2, v2, v4, v3
73; P8LE-NEXT:    blr
74;
75; P8BE-LABEL: s2v_test2:
76; P8BE:       # %bb.0: # %entry
77; P8BE-NEXT:    addi r3, r3, 4
78; P8BE-NEXT:    lxsiwzx v3, 0, r3
79; P8BE-NEXT:    vmrghw v4, v2, v3
80; P8BE-NEXT:    xxsldwi vs0, v2, v3, 1
81; P8BE-NEXT:    xxsldwi v2, v4, vs0, 3
82; P8BE-NEXT:    blr
83entry:
84  %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
85  %0 = load i32, i32* %arrayidx, align 4
86  %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
87  ret <4 x i32> %vecins
88}
89
90; Function Attrs: norecurse nounwind readonly
91define <4 x i32> @s2v_test3(i32* nocapture readonly %int32, <4 x i32> %vec, i32 signext %Idx)  {
92; P9LE-LABEL: s2v_test3:
93; P9LE:       # %bb.0: # %entry
94; P9LE-NEXT:    sldi r4, r7, 2
95; P9LE-NEXT:    lwzx r3, r3, r4
96; P9LE-NEXT:    mtfprwz f0, r3
97; P9LE-NEXT:    xxinsertw v2, vs0, 12
98; P9LE-NEXT:    blr
99;
100; P9BE-LABEL: s2v_test3:
101; P9BE:       # %bb.0: # %entry
102; P9BE-NEXT:    sldi r4, r7, 2
103; P9BE-NEXT:    lwzx r3, r3, r4
104; P9BE-NEXT:    mtfprwz f0, r3
105; P9BE-NEXT:    xxinsertw v2, vs0, 0
106; P9BE-NEXT:    blr
107;
108; P8LE-LABEL: s2v_test3:
109; P8LE:       # %bb.0: # %entry
110; P8LE-NEXT:    addis r4, r2, .LCPI2_0@toc@ha
111; P8LE-NEXT:    sldi r5, r7, 2
112; P8LE-NEXT:    addi r4, r4, .LCPI2_0@toc@l
113; P8LE-NEXT:    lxsiwzx v3, r3, r5
114; P8LE-NEXT:    lvx v4, 0, r4
115; P8LE-NEXT:    vperm v2, v2, v3, v4
116; P8LE-NEXT:    blr
117;
118; P8BE-LABEL: s2v_test3:
119; P8BE:       # %bb.0: # %entry
120; P8BE-NEXT:    sldi r4, r7, 2
121; P8BE-NEXT:    lxsiwzx v3, r3, r4
122; P8BE-NEXT:    vmrghw v4, v2, v3
123; P8BE-NEXT:    xxsldwi vs0, v2, v3, 1
124; P8BE-NEXT:    xxsldwi v2, v4, vs0, 3
125; P8BE-NEXT:    blr
126entry:
127  %idxprom = sext i32 %Idx to i64
128  %arrayidx = getelementptr inbounds i32, i32* %int32, i64 %idxprom
129  %0 = load i32, i32* %arrayidx, align 4
130  %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
131  ret <4 x i32> %vecins
132}
133
134; Function Attrs: norecurse nounwind readonly
135define <4 x i32> @s2v_test4(i32* nocapture readonly %int32, <4 x i32> %vec)  {
136; P9LE-LABEL: s2v_test4:
137; P9LE:       # %bb.0: # %entry
138; P9LE-NEXT:    lwz r3, 4(r3)
139; P9LE-NEXT:    mtfprwz f0, r3
140; P9LE-NEXT:    xxinsertw v2, vs0, 12
141; P9LE-NEXT:    blr
142;
143; P9BE-LABEL: s2v_test4:
144; P9BE:       # %bb.0: # %entry
145; P9BE-NEXT:    lwz r3, 4(r3)
146; P9BE-NEXT:    mtfprwz f0, r3
147; P9BE-NEXT:    xxinsertw v2, vs0, 0
148; P9BE-NEXT:    blr
149;
150; P8LE-LABEL: s2v_test4:
151; P8LE:       # %bb.0: # %entry
152; P8LE-NEXT:    addis r4, r2, .LCPI3_0@toc@ha
153; P8LE-NEXT:    addi r3, r3, 4
154; P8LE-NEXT:    addi r4, r4, .LCPI3_0@toc@l
155; P8LE-NEXT:    lxsiwzx v4, 0, r3
156; P8LE-NEXT:    lvx v3, 0, r4
157; P8LE-NEXT:    vperm v2, v2, v4, v3
158; P8LE-NEXT:    blr
159;
160; P8BE-LABEL: s2v_test4:
161; P8BE:       # %bb.0: # %entry
162; P8BE-NEXT:    addi r3, r3, 4
163; P8BE-NEXT:    lxsiwzx v3, 0, r3
164; P8BE-NEXT:    vmrghw v4, v2, v3
165; P8BE-NEXT:    xxsldwi vs0, v2, v3, 1
166; P8BE-NEXT:    xxsldwi v2, v4, vs0, 3
167; P8BE-NEXT:    blr
168entry:
169  %arrayidx = getelementptr inbounds i32, i32* %int32, i64 1
170  %0 = load i32, i32* %arrayidx, align 4
171  %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
172  ret <4 x i32> %vecins
173}
174
175; Function Attrs: norecurse nounwind readonly
176define <4 x i32> @s2v_test5(<4 x i32> %vec, i32* nocapture readonly %ptr1)  {
177; P9LE-LABEL: s2v_test5:
178; P9LE:       # %bb.0: # %entry
179; P9LE-NEXT:    lwz r3, 0(r5)
180; P9LE-NEXT:    mtfprwz f0, r3
181; P9LE-NEXT:    xxinsertw v2, vs0, 12
182; P9LE-NEXT:    blr
183;
184; P9BE-LABEL: s2v_test5:
185; P9BE:       # %bb.0: # %entry
186; P9BE-NEXT:    lwz r3, 0(r5)
187; P9BE-NEXT:    mtfprwz f0, r3
188; P9BE-NEXT:    xxinsertw v2, vs0, 0
189; P9BE-NEXT:    blr
190;
191; P8LE-LABEL: s2v_test5:
192; P8LE:       # %bb.0: # %entry
193; P8LE-NEXT:    addis r3, r2, .LCPI4_0@toc@ha
194; P8LE-NEXT:    lxsiwzx v4, 0, r5
195; P8LE-NEXT:    addi r3, r3, .LCPI4_0@toc@l
196; P8LE-NEXT:    lvx v3, 0, r3
197; P8LE-NEXT:    vperm v2, v2, v4, v3
198; P8LE-NEXT:    blr
199;
200; P8BE-LABEL: s2v_test5:
201; P8BE:       # %bb.0: # %entry
202; P8BE-NEXT:    lxsiwzx v3, 0, r5
203; P8BE-NEXT:    vmrghw v4, v2, v3
204; P8BE-NEXT:    xxsldwi vs0, v2, v3, 1
205; P8BE-NEXT:    xxsldwi v2, v4, vs0, 3
206; P8BE-NEXT:    blr
207entry:
208  %0 = load i32, i32* %ptr1, align 4
209  %vecins = insertelement <4 x i32> %vec, i32 %0, i32 0
210  ret <4 x i32> %vecins
211}
212
213; Function Attrs: norecurse nounwind readonly
214define <4 x float> @s2v_test_f1(float* nocapture readonly %f64, <4 x float> %vec)  {
215; P9LE-LABEL: s2v_test_f1:
216; P9LE:       # %bb.0: # %entry
217; P9LE-NEXT:    lfs f0, 0(r3)
218; P9LE-NEXT:    xscvdpspn vs0, f0
219; P9LE-NEXT:    xxinsertw v2, vs0, 12
220; P9LE-NEXT:    blr
221;
222; P9BE-LABEL: s2v_test_f1:
223; P9BE:       # %bb.0: # %entry
224; P9BE-NEXT:    lfs f0, 0(r3)
225; P9BE-NEXT:    xscvdpspn vs0, f0
226; P9BE-NEXT:    xxinsertw v2, vs0, 0
227; P9BE-NEXT:    blr
228;
229; P8LE-LABEL: s2v_test_f1:
230; P8LE:       # %bb.0: # %entry
231; P8LE-NEXT:    addis r4, r2, .LCPI5_0@toc@ha
232; P8LE-NEXT:    lxsiwzx v4, 0, r3
233; P8LE-NEXT:    addi r4, r4, .LCPI5_0@toc@l
234; P8LE-NEXT:    lvx v3, 0, r4
235; P8LE-NEXT:    vperm v2, v2, v4, v3
236; P8LE-NEXT:    blr
237;
238; P8BE-LABEL: s2v_test_f1:
239; P8BE:       # %bb.0: # %entry
240; P8BE-NEXT:    lxsiwzx v3, 0, r3
241; P8BE-NEXT:    vmrghw v4, v2, v3
242; P8BE-NEXT:    xxsldwi vs0, v2, v3, 1
243; P8BE-NEXT:    xxsldwi v2, v4, vs0, 3
244; P8BE-NEXT:    blr
245entry:
246  %0 = load float, float* %f64, align 4
247  %vecins = insertelement <4 x float> %vec, float %0, i32 0
248  ret <4 x float> %vecins
249}
250
251; Function Attrs: norecurse nounwind readonly
252define <2 x float> @s2v_test_f2(float* nocapture readonly %f64, <2 x float> %vec)  {
253; P9LE-LABEL: s2v_test_f2:
254; P9LE:       # %bb.0: # %entry
255; P9LE-NEXT:    addi r3, r3, 4
256; P9LE-NEXT:    vmrglw v2, v2, v2
257; P9LE-NEXT:    lxsiwzx v3, 0, r3
258; P9LE-NEXT:    vmrghw v2, v2, v3
259; P9LE-NEXT:    blr
260;
261; P9BE-LABEL: s2v_test_f2:
262; P9BE:       # %bb.0: # %entry
263; P9BE-NEXT:    addi r3, r3, 4
264; P9BE-NEXT:    lxsiwzx v3, 0, r3
265; P9BE-NEXT:    vmrgow v2, v3, v2
266; P9BE-NEXT:    blr
267;
268; P8LE-LABEL: s2v_test_f2:
269; P8LE:       # %bb.0: # %entry
270; P8LE-NEXT:    vmrglw v2, v2, v2
271; P8LE-NEXT:    addi r3, r3, 4
272; P8LE-NEXT:    lxsiwzx v3, 0, r3
273; P8LE-NEXT:    vmrghw v2, v2, v3
274; P8LE-NEXT:    blr
275;
276; P8BE-LABEL: s2v_test_f2:
277; P8BE:       # %bb.0: # %entry
278; P8BE-NEXT:    addi r3, r3, 4
279; P8BE-NEXT:    lxsiwzx v3, 0, r3
280; P8BE-NEXT:    vmrgow v2, v3, v2
281; P8BE-NEXT:    blr
282entry:
283  %arrayidx = getelementptr inbounds float, float* %f64, i64 1
284  %0 = load float, float* %arrayidx, align 8
285  %vecins = insertelement <2 x float> %vec, float %0, i32 0
286  ret <2 x float> %vecins
287}
288
289; Function Attrs: norecurse nounwind readonly
290define <2 x float> @s2v_test_f3(float* nocapture readonly %f64, <2 x float> %vec, i32 signext %Idx)  {
291; P9LE-LABEL: s2v_test_f3:
292; P9LE:       # %bb.0: # %entry
293; P9LE-NEXT:    sldi r4, r7, 2
294; P9LE-NEXT:    vmrglw v2, v2, v2
295; P9LE-NEXT:    lxsiwzx v3, r3, r4
296; P9LE-NEXT:    vmrghw v2, v2, v3
297; P9LE-NEXT:    blr
298;
299; P9BE-LABEL: s2v_test_f3:
300; P9BE:       # %bb.0: # %entry
301; P9BE-NEXT:    sldi r4, r7, 2
302; P9BE-NEXT:    lxsiwzx v3, r3, r4
303; P9BE-NEXT:    vmrgow v2, v3, v2
304; P9BE-NEXT:    blr
305;
306; P8LE-LABEL: s2v_test_f3:
307; P8LE:       # %bb.0: # %entry
308; P8LE-NEXT:    vmrglw v2, v2, v2
309; P8LE-NEXT:    sldi r4, r7, 2
310; P8LE-NEXT:    lxsiwzx v3, r3, r4
311; P8LE-NEXT:    vmrghw v2, v2, v3
312; P8LE-NEXT:    blr
313;
314; P8BE-LABEL: s2v_test_f3:
315; P8BE:       # %bb.0: # %entry
316; P8BE-NEXT:    sldi r4, r7, 2
317; P8BE-NEXT:    lxsiwzx v3, r3, r4
318; P8BE-NEXT:    vmrgow v2, v3, v2
319; P8BE-NEXT:    blr
320entry:
321  %idxprom = sext i32 %Idx to i64
322  %arrayidx = getelementptr inbounds float, float* %f64, i64 %idxprom
323  %0 = load float, float* %arrayidx, align 8
324  %vecins = insertelement <2 x float> %vec, float %0, i32 0
325  ret <2 x float> %vecins
326}
327
328; Function Attrs: norecurse nounwind readonly
329define <2 x float> @s2v_test_f4(float* nocapture readonly %f64, <2 x float> %vec)  {
330; P9LE-LABEL: s2v_test_f4:
331; P9LE:       # %bb.0: # %entry
332; P9LE-NEXT:    addi r3, r3, 4
333; P9LE-NEXT:    vmrglw v2, v2, v2
334; P9LE-NEXT:    lxsiwzx v3, 0, r3
335; P9LE-NEXT:    vmrghw v2, v2, v3
336; P9LE-NEXT:    blr
337;
338; P9BE-LABEL: s2v_test_f4:
339; P9BE:       # %bb.0: # %entry
340; P9BE-NEXT:    addi r3, r3, 4
341; P9BE-NEXT:    lxsiwzx v3, 0, r3
342; P9BE-NEXT:    vmrgow v2, v3, v2
343; P9BE-NEXT:    blr
344;
345; P8LE-LABEL: s2v_test_f4:
346; P8LE:       # %bb.0: # %entry
347; P8LE-NEXT:    vmrglw v2, v2, v2
348; P8LE-NEXT:    addi r3, r3, 4
349; P8LE-NEXT:    lxsiwzx v3, 0, r3
350; P8LE-NEXT:    vmrghw v2, v2, v3
351; P8LE-NEXT:    blr
352;
353; P8BE-LABEL: s2v_test_f4:
354; P8BE:       # %bb.0: # %entry
355; P8BE-NEXT:    addi r3, r3, 4
356; P8BE-NEXT:    lxsiwzx v3, 0, r3
357; P8BE-NEXT:    vmrgow v2, v3, v2
358; P8BE-NEXT:    blr
359entry:
360  %arrayidx = getelementptr inbounds float, float* %f64, i64 1
361  %0 = load float, float* %arrayidx, align 8
362  %vecins = insertelement <2 x float> %vec, float %0, i32 0
363  ret <2 x float> %vecins
364}
365
366; Function Attrs: norecurse nounwind readonly
367define <2 x float> @s2v_test_f5(<2 x float> %vec, float* nocapture readonly %ptr1)  {
368; P9LE-LABEL: s2v_test_f5:
369; P9LE:       # %bb.0: # %entry
370; P9LE-NEXT:    lxsiwzx v3, 0, r5
371; P9LE-NEXT:    vmrglw v2, v2, v2
372; P9LE-NEXT:    vmrghw v2, v2, v3
373; P9LE-NEXT:    blr
374;
375; P9BE-LABEL: s2v_test_f5:
376; P9BE:       # %bb.0: # %entry
377; P9BE-NEXT:    lxsiwzx v3, 0, r5
378; P9BE-NEXT:    vmrgow v2, v3, v2
379; P9BE-NEXT:    blr
380;
381; P8LE-LABEL: s2v_test_f5:
382; P8LE:       # %bb.0: # %entry
383; P8LE-NEXT:    vmrglw v2, v2, v2
384; P8LE-NEXT:    lxsiwzx v3, 0, r5
385; P8LE-NEXT:    vmrghw v2, v2, v3
386; P8LE-NEXT:    blr
387;
388; P8BE-LABEL: s2v_test_f5:
389; P8BE:       # %bb.0: # %entry
390; P8BE-NEXT:    lxsiwzx v3, 0, r5
391; P8BE-NEXT:    vmrgow v2, v3, v2
392; P8BE-NEXT:    blr
393entry:
394  %0 = load float, float* %ptr1, align 8
395  %vecins = insertelement <2 x float> %vec, float %0, i32 0
396  ret <2 x float> %vecins
397}
398
399