1; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
3; RUN:     -mcpu=pwr8 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
4; RUN: FileCheck %s --check-prefix=CHECK-P8
5; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-linux-gnu \
6; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
7; RUN: FileCheck %s --check-prefix=CHECK-P9
8; RUN: llc -verify-machineinstrs -mtriple=powerpc64-unknown-linux-gnu \
9; RUN:     -mcpu=pwr9 -ppc-asm-full-reg-names -ppc-vsr-nums-as-vr < %s | \
10; RUN: FileCheck %s --check-prefix=CHECK-BE
11
12define void @test8(<8 x double>* nocapture %Sink, <8 x i16>* nocapture readonly %SrcPtr) {
13; CHECK-P8-LABEL: test8:
14; CHECK-P8:       # %bb.0: # %entry
15; CHECK-P8-NEXT:    addis r5, r2, .LCPI0_0@toc@ha
16; CHECK-P8-NEXT:    addis r6, r2, .LCPI0_2@toc@ha
17; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
18; CHECK-P8-NEXT:    addis r4, r2, .LCPI0_3@toc@ha
19; CHECK-P8-NEXT:    xxlxor v4, v4, v4
20; CHECK-P8-NEXT:    addi r5, r5, .LCPI0_0@toc@l
21; CHECK-P8-NEXT:    addi r4, r4, .LCPI0_3@toc@l
22; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
23; CHECK-P8-NEXT:    addi r5, r6, .LCPI0_2@toc@l
24; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
25; CHECK-P8-NEXT:    li r4, 48
26; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
27; CHECK-P8-NEXT:    addis r5, r2, .LCPI0_1@toc@ha
28; CHECK-P8-NEXT:    xxswapd v2, vs0
29; CHECK-P8-NEXT:    addi r5, r5, .LCPI0_1@toc@l
30; CHECK-P8-NEXT:    lxvd2x vs4, 0, r5
31; CHECK-P8-NEXT:    xxswapd v0, vs3
32; CHECK-P8-NEXT:    xxswapd v3, vs1
33; CHECK-P8-NEXT:    li r5, 32
34; CHECK-P8-NEXT:    xxswapd v5, vs2
35; CHECK-P8-NEXT:    xxswapd v1, vs4
36; CHECK-P8-NEXT:    vperm v0, v4, v2, v0
37; CHECK-P8-NEXT:    vperm v3, v4, v2, v3
38; CHECK-P8-NEXT:    vperm v5, v4, v2, v5
39; CHECK-P8-NEXT:    vperm v2, v4, v2, v1
40; CHECK-P8-NEXT:    xvcvuxddp vs2, v0
41; CHECK-P8-NEXT:    xvcvuxddp vs0, v3
42; CHECK-P8-NEXT:    xvcvuxddp vs1, v5
43; CHECK-P8-NEXT:    xvcvuxddp vs3, v2
44; CHECK-P8-NEXT:    xxswapd vs2, vs2
45; CHECK-P8-NEXT:    xxswapd vs0, vs0
46; CHECK-P8-NEXT:    xxswapd vs1, vs1
47; CHECK-P8-NEXT:    xxswapd vs3, vs3
48; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
49; CHECK-P8-NEXT:    li r4, 16
50; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
51; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
52; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
53; CHECK-P8-NEXT:    blr
54;
55; CHECK-P9-LABEL: test8:
56; CHECK-P9:       # %bb.0: # %entry
57; CHECK-P9-NEXT:    lxv v2, 0(r4)
58; CHECK-P9-NEXT:    addis r4, r2, .LCPI0_0@toc@ha
59; CHECK-P9-NEXT:    xxlxor v4, v4, v4
60; CHECK-P9-NEXT:    addi r4, r4, .LCPI0_0@toc@l
61; CHECK-P9-NEXT:    lxv v3, 0(r4)
62; CHECK-P9-NEXT:    addis r4, r2, .LCPI0_1@toc@ha
63; CHECK-P9-NEXT:    addi r4, r4, .LCPI0_1@toc@l
64; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
65; CHECK-P9-NEXT:    xvcvuxddp vs0, v3
66; CHECK-P9-NEXT:    lxv v3, 0(r4)
67; CHECK-P9-NEXT:    addis r4, r2, .LCPI0_2@toc@ha
68; CHECK-P9-NEXT:    addi r4, r4, .LCPI0_2@toc@l
69; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
70; CHECK-P9-NEXT:    stxv vs0, 0(r3)
71; CHECK-P9-NEXT:    xvcvuxddp vs1, v3
72; CHECK-P9-NEXT:    lxv v3, 0(r4)
73; CHECK-P9-NEXT:    addis r4, r2, .LCPI0_3@toc@ha
74; CHECK-P9-NEXT:    addi r4, r4, .LCPI0_3@toc@l
75; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
76; CHECK-P9-NEXT:    stxv vs1, 16(r3)
77; CHECK-P9-NEXT:    xvcvuxddp vs2, v3
78; CHECK-P9-NEXT:    lxv v3, 0(r4)
79; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
80; CHECK-P9-NEXT:    stxv vs2, 32(r3)
81; CHECK-P9-NEXT:    xvcvuxddp vs3, v2
82; CHECK-P9-NEXT:    stxv vs3, 48(r3)
83; CHECK-P9-NEXT:    blr
84;
85; CHECK-BE-LABEL: test8:
86; CHECK-BE:       # %bb.0: # %entry
87; CHECK-BE-NEXT:    lxv v2, 0(r4)
88; CHECK-BE-NEXT:    addis r4, r2, .LCPI0_0@toc@ha
89; CHECK-BE-NEXT:    xxlxor v4, v4, v4
90; CHECK-BE-NEXT:    addi r4, r4, .LCPI0_0@toc@l
91; CHECK-BE-NEXT:    lxv v3, 0(r4)
92; CHECK-BE-NEXT:    addis r4, r2, .LCPI0_1@toc@ha
93; CHECK-BE-NEXT:    addi r4, r4, .LCPI0_1@toc@l
94; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
95; CHECK-BE-NEXT:    xvcvuxddp vs0, v3
96; CHECK-BE-NEXT:    lxv v3, 0(r4)
97; CHECK-BE-NEXT:    addis r4, r2, .LCPI0_2@toc@ha
98; CHECK-BE-NEXT:    addi r4, r4, .LCPI0_2@toc@l
99; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
100; CHECK-BE-NEXT:    stxv vs0, 0(r3)
101; CHECK-BE-NEXT:    xvcvuxddp vs1, v3
102; CHECK-BE-NEXT:    lxv v3, 0(r4)
103; CHECK-BE-NEXT:    addis r4, r2, .LCPI0_3@toc@ha
104; CHECK-BE-NEXT:    addi r4, r4, .LCPI0_3@toc@l
105; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
106; CHECK-BE-NEXT:    stxv vs1, 16(r3)
107; CHECK-BE-NEXT:    xvcvuxddp vs2, v3
108; CHECK-BE-NEXT:    lxv v3, 0(r4)
109; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
110; CHECK-BE-NEXT:    stxv vs2, 32(r3)
111; CHECK-BE-NEXT:    xvcvuxddp vs3, v2
112; CHECK-BE-NEXT:    stxv vs3, 48(r3)
113; CHECK-BE-NEXT:    blr
114entry:
115  %0 = load <8 x i16>, <8 x i16>* %SrcPtr, align 16
116  %1 = uitofp <8 x i16> %0 to <8 x double>
117  store <8 x double> %1, <8 x double>* %Sink, align 16
118  ret void
119}
120
121define void @test4(<4 x double>* nocapture %Sink, <4 x i16>* nocapture readonly %SrcPtr) {
122; CHECK-P8-LABEL: test4:
123; CHECK-P8:       # %bb.0: # %entry
124; CHECK-P8-NEXT:    addis r5, r2, .LCPI1_0@toc@ha
125; CHECK-P8-NEXT:    addis r6, r2, .LCPI1_1@toc@ha
126; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
127; CHECK-P8-NEXT:    xxlxor v4, v4, v4
128; CHECK-P8-NEXT:    addi r5, r5, .LCPI1_0@toc@l
129; CHECK-P8-NEXT:    addi r4, r6, .LCPI1_1@toc@l
130; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
131; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
132; CHECK-P8-NEXT:    li r4, 16
133; CHECK-P8-NEXT:    xxswapd v2, vs0
134; CHECK-P8-NEXT:    xxswapd v3, vs1
135; CHECK-P8-NEXT:    xxswapd v5, vs2
136; CHECK-P8-NEXT:    vperm v3, v4, v2, v3
137; CHECK-P8-NEXT:    vperm v2, v4, v2, v5
138; CHECK-P8-NEXT:    xvcvuxddp vs0, v3
139; CHECK-P8-NEXT:    xvcvuxddp vs1, v2
140; CHECK-P8-NEXT:    xxswapd vs0, vs0
141; CHECK-P8-NEXT:    xxswapd vs1, vs1
142; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
143; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
144; CHECK-P8-NEXT:    blr
145;
146; CHECK-P9-LABEL: test4:
147; CHECK-P9:       # %bb.0: # %entry
148; CHECK-P9-NEXT:    lxv v2, 0(r4)
149; CHECK-P9-NEXT:    addis r4, r2, .LCPI1_0@toc@ha
150; CHECK-P9-NEXT:    xxlxor v4, v4, v4
151; CHECK-P9-NEXT:    addi r4, r4, .LCPI1_0@toc@l
152; CHECK-P9-NEXT:    lxv v3, 0(r4)
153; CHECK-P9-NEXT:    addis r4, r2, .LCPI1_1@toc@ha
154; CHECK-P9-NEXT:    addi r4, r4, .LCPI1_1@toc@l
155; CHECK-P9-NEXT:    vperm v3, v4, v2, v3
156; CHECK-P9-NEXT:    xvcvuxddp vs0, v3
157; CHECK-P9-NEXT:    lxv v3, 0(r4)
158; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
159; CHECK-P9-NEXT:    stxv vs0, 0(r3)
160; CHECK-P9-NEXT:    xvcvuxddp vs1, v2
161; CHECK-P9-NEXT:    stxv vs1, 16(r3)
162; CHECK-P9-NEXT:    blr
163;
164; CHECK-BE-LABEL: test4:
165; CHECK-BE:       # %bb.0: # %entry
166; CHECK-BE-NEXT:    lxv v2, 0(r4)
167; CHECK-BE-NEXT:    addis r4, r2, .LCPI1_0@toc@ha
168; CHECK-BE-NEXT:    xxlxor v4, v4, v4
169; CHECK-BE-NEXT:    addi r4, r4, .LCPI1_0@toc@l
170; CHECK-BE-NEXT:    lxv v3, 0(r4)
171; CHECK-BE-NEXT:    addis r4, r2, .LCPI1_1@toc@ha
172; CHECK-BE-NEXT:    addi r4, r4, .LCPI1_1@toc@l
173; CHECK-BE-NEXT:    vperm v3, v4, v2, v3
174; CHECK-BE-NEXT:    xvcvuxddp vs0, v3
175; CHECK-BE-NEXT:    lxv v3, 0(r4)
176; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
177; CHECK-BE-NEXT:    stxv vs0, 0(r3)
178; CHECK-BE-NEXT:    xvcvuxddp vs1, v2
179; CHECK-BE-NEXT:    stxv vs1, 16(r3)
180; CHECK-BE-NEXT:    blr
181entry:
182  %0 = load <4 x i16>, <4 x i16>* %SrcPtr, align 16
183  %1 = uitofp <4 x i16> %0 to <4 x double>
184  store <4 x double> %1, <4 x double>* %Sink, align 16
185  ret void
186}
187
188define void @test2(<2 x double>* nocapture %Sink, <2 x i16>* nocapture readonly %SrcPtr) {
189; CHECK-P8-LABEL: test2:
190; CHECK-P8:       # %bb.0: # %entry
191; CHECK-P8-NEXT:    addis r5, r2, .LCPI2_0@toc@ha
192; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
193; CHECK-P8-NEXT:    xxlxor v4, v4, v4
194; CHECK-P8-NEXT:    addi r5, r5, .LCPI2_0@toc@l
195; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
196; CHECK-P8-NEXT:    xxswapd v2, vs0
197; CHECK-P8-NEXT:    xxswapd v3, vs1
198; CHECK-P8-NEXT:    vperm v2, v4, v2, v3
199; CHECK-P8-NEXT:    xvcvuxddp vs0, v2
200; CHECK-P8-NEXT:    xxswapd vs0, vs0
201; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
202; CHECK-P8-NEXT:    blr
203;
204; CHECK-P9-LABEL: test2:
205; CHECK-P9:       # %bb.0: # %entry
206; CHECK-P9-NEXT:    lxv v2, 0(r4)
207; CHECK-P9-NEXT:    addis r4, r2, .LCPI2_0@toc@ha
208; CHECK-P9-NEXT:    xxlxor v4, v4, v4
209; CHECK-P9-NEXT:    addi r4, r4, .LCPI2_0@toc@l
210; CHECK-P9-NEXT:    lxv v3, 0(r4)
211; CHECK-P9-NEXT:    vperm v2, v4, v2, v3
212; CHECK-P9-NEXT:    xvcvuxddp vs0, v2
213; CHECK-P9-NEXT:    stxv vs0, 0(r3)
214; CHECK-P9-NEXT:    blr
215;
216; CHECK-BE-LABEL: test2:
217; CHECK-BE:       # %bb.0: # %entry
218; CHECK-BE-NEXT:    lxv v2, 0(r4)
219; CHECK-BE-NEXT:    addis r4, r2, .LCPI2_0@toc@ha
220; CHECK-BE-NEXT:    xxlxor v4, v4, v4
221; CHECK-BE-NEXT:    addi r4, r4, .LCPI2_0@toc@l
222; CHECK-BE-NEXT:    lxv v3, 0(r4)
223; CHECK-BE-NEXT:    vperm v2, v4, v2, v3
224; CHECK-BE-NEXT:    xvcvuxddp vs0, v2
225; CHECK-BE-NEXT:    stxv vs0, 0(r3)
226; CHECK-BE-NEXT:    blr
227entry:
228  %0 = load <2 x i16>, <2 x i16>* %SrcPtr, align 16
229  %1 = uitofp <2 x i16> %0 to <2 x double>
230  store <2 x double> %1, <2 x double>* %Sink, align 16
231  ret void
232}
233
234define void @stest8(<8 x double>* nocapture %Sink, <8 x i16>* nocapture readonly %SrcPtr) {
235; CHECK-P8-LABEL: stest8:
236; CHECK-P8:       # %bb.0: # %entry
237; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_0@toc@ha
238; CHECK-P8-NEXT:    addis r6, r2, .LCPI3_2@toc@ha
239; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
240; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_3@toc@ha
241; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_0@toc@l
242; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_3@toc@l
243; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
244; CHECK-P8-NEXT:    addi r5, r6, .LCPI3_2@toc@l
245; CHECK-P8-NEXT:    lxvd2x vs3, 0, r4
246; CHECK-P8-NEXT:    addis r4, r2, .LCPI3_1@toc@ha
247; CHECK-P8-NEXT:    lxvd2x vs2, 0, r5
248; CHECK-P8-NEXT:    addis r5, r2, .LCPI3_4@toc@ha
249; CHECK-P8-NEXT:    xxswapd v2, vs0
250; CHECK-P8-NEXT:    addi r4, r4, .LCPI3_1@toc@l
251; CHECK-P8-NEXT:    addi r5, r5, .LCPI3_4@toc@l
252; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
253; CHECK-P8-NEXT:    li r4, 48
254; CHECK-P8-NEXT:    lxvd2x vs4, 0, r5
255; CHECK-P8-NEXT:    xxswapd v3, vs1
256; CHECK-P8-NEXT:    xxswapd v5, vs3
257; CHECK-P8-NEXT:    li r5, 32
258; CHECK-P8-NEXT:    xxswapd v4, vs2
259; CHECK-P8-NEXT:    xxswapd v0, vs4
260; CHECK-P8-NEXT:    vperm v3, v2, v2, v3
261; CHECK-P8-NEXT:    vperm v4, v2, v2, v4
262; CHECK-P8-NEXT:    vperm v5, v2, v2, v5
263; CHECK-P8-NEXT:    vperm v2, v2, v2, v0
264; CHECK-P8-NEXT:    xxswapd v0, vs0
265; CHECK-P8-NEXT:    vsld v3, v3, v0
266; CHECK-P8-NEXT:    vsld v4, v4, v0
267; CHECK-P8-NEXT:    vsld v5, v5, v0
268; CHECK-P8-NEXT:    vsld v2, v2, v0
269; CHECK-P8-NEXT:    vsrad v3, v3, v0
270; CHECK-P8-NEXT:    vsrad v2, v2, v0
271; CHECK-P8-NEXT:    vsrad v4, v4, v0
272; CHECK-P8-NEXT:    vsrad v5, v5, v0
273; CHECK-P8-NEXT:    xvcvsxddp vs2, v2
274; CHECK-P8-NEXT:    xvcvsxddp vs0, v3
275; CHECK-P8-NEXT:    xvcvsxddp vs1, v5
276; CHECK-P8-NEXT:    xvcvsxddp vs3, v4
277; CHECK-P8-NEXT:    xxswapd vs2, vs2
278; CHECK-P8-NEXT:    xxswapd vs0, vs0
279; CHECK-P8-NEXT:    xxswapd vs1, vs1
280; CHECK-P8-NEXT:    xxswapd vs3, vs3
281; CHECK-P8-NEXT:    stxvd2x vs2, r3, r4
282; CHECK-P8-NEXT:    li r4, 16
283; CHECK-P8-NEXT:    stxvd2x vs1, r3, r5
284; CHECK-P8-NEXT:    stxvd2x vs3, r3, r4
285; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
286; CHECK-P8-NEXT:    blr
287;
288; CHECK-P9-LABEL: stest8:
289; CHECK-P9:       # %bb.0: # %entry
290; CHECK-P9-NEXT:    lxv v2, 0(r4)
291; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_0@toc@ha
292; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_0@toc@l
293; CHECK-P9-NEXT:    lxv v3, 0(r4)
294; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_1@toc@ha
295; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_1@toc@l
296; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
297; CHECK-P9-NEXT:    vextsh2d v3, v3
298; CHECK-P9-NEXT:    xvcvsxddp vs0, v3
299; CHECK-P9-NEXT:    lxv v3, 0(r4)
300; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_2@toc@ha
301; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_2@toc@l
302; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
303; CHECK-P9-NEXT:    stxv vs0, 0(r3)
304; CHECK-P9-NEXT:    vextsh2d v3, v3
305; CHECK-P9-NEXT:    xvcvsxddp vs1, v3
306; CHECK-P9-NEXT:    lxv v3, 0(r4)
307; CHECK-P9-NEXT:    addis r4, r2, .LCPI3_3@toc@ha
308; CHECK-P9-NEXT:    addi r4, r4, .LCPI3_3@toc@l
309; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
310; CHECK-P9-NEXT:    stxv vs1, 16(r3)
311; CHECK-P9-NEXT:    vextsh2d v3, v3
312; CHECK-P9-NEXT:    xvcvsxddp vs2, v3
313; CHECK-P9-NEXT:    lxv v3, 0(r4)
314; CHECK-P9-NEXT:    vperm v2, v2, v2, v3
315; CHECK-P9-NEXT:    stxv vs2, 32(r3)
316; CHECK-P9-NEXT:    vextsh2d v2, v2
317; CHECK-P9-NEXT:    xvcvsxddp vs3, v2
318; CHECK-P9-NEXT:    stxv vs3, 48(r3)
319; CHECK-P9-NEXT:    blr
320;
321; CHECK-BE-LABEL: stest8:
322; CHECK-BE:       # %bb.0: # %entry
323; CHECK-BE-NEXT:    lxv v2, 0(r4)
324; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_0@toc@ha
325; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_0@toc@l
326; CHECK-BE-NEXT:    lxv v3, 0(r4)
327; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_1@toc@ha
328; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_1@toc@l
329; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
330; CHECK-BE-NEXT:    vextsh2d v3, v3
331; CHECK-BE-NEXT:    xvcvsxddp vs0, v3
332; CHECK-BE-NEXT:    lxv v3, 0(r4)
333; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_2@toc@ha
334; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_2@toc@l
335; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
336; CHECK-BE-NEXT:    stxv vs0, 0(r3)
337; CHECK-BE-NEXT:    vextsh2d v3, v3
338; CHECK-BE-NEXT:    xvcvsxddp vs1, v3
339; CHECK-BE-NEXT:    lxv v3, 0(r4)
340; CHECK-BE-NEXT:    addis r4, r2, .LCPI3_3@toc@ha
341; CHECK-BE-NEXT:    addi r4, r4, .LCPI3_3@toc@l
342; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
343; CHECK-BE-NEXT:    stxv vs1, 16(r3)
344; CHECK-BE-NEXT:    vextsh2d v3, v3
345; CHECK-BE-NEXT:    xvcvsxddp vs2, v3
346; CHECK-BE-NEXT:    lxv v3, 0(r4)
347; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
348; CHECK-BE-NEXT:    stxv vs2, 32(r3)
349; CHECK-BE-NEXT:    vextsh2d v2, v2
350; CHECK-BE-NEXT:    xvcvsxddp vs3, v2
351; CHECK-BE-NEXT:    stxv vs3, 48(r3)
352; CHECK-BE-NEXT:    blr
353entry:
354  %0 = load <8 x i16>, <8 x i16>* %SrcPtr, align 16
355  %1 = sitofp <8 x i16> %0 to <8 x double>
356  store <8 x double> %1, <8 x double>* %Sink, align 16
357  ret void
358}
359
360define void @stest4(<4 x double>* nocapture %Sink, <4 x i16>* nocapture readonly %SrcPtr) {
361; CHECK-P8-LABEL: stest4:
362; CHECK-P8:       # %bb.0: # %entry
363; CHECK-P8-NEXT:    addis r5, r2, .LCPI4_0@toc@ha
364; CHECK-P8-NEXT:    addis r6, r2, .LCPI4_2@toc@ha
365; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
366; CHECK-P8-NEXT:    addi r5, r5, .LCPI4_0@toc@l
367; CHECK-P8-NEXT:    addi r4, r6, .LCPI4_2@toc@l
368; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
369; CHECK-P8-NEXT:    lxvd2x vs2, 0, r4
370; CHECK-P8-NEXT:    addis r4, r2, .LCPI4_1@toc@ha
371; CHECK-P8-NEXT:    xxswapd v2, vs0
372; CHECK-P8-NEXT:    addi r4, r4, .LCPI4_1@toc@l
373; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
374; CHECK-P8-NEXT:    li r4, 16
375; CHECK-P8-NEXT:    xxswapd v3, vs1
376; CHECK-P8-NEXT:    xxswapd v4, vs2
377; CHECK-P8-NEXT:    vperm v3, v2, v2, v3
378; CHECK-P8-NEXT:    vperm v2, v2, v2, v4
379; CHECK-P8-NEXT:    xxswapd v4, vs0
380; CHECK-P8-NEXT:    vsld v3, v3, v4
381; CHECK-P8-NEXT:    vsld v2, v2, v4
382; CHECK-P8-NEXT:    vsrad v3, v3, v4
383; CHECK-P8-NEXT:    vsrad v2, v2, v4
384; CHECK-P8-NEXT:    xvcvsxddp vs0, v3
385; CHECK-P8-NEXT:    xvcvsxddp vs1, v2
386; CHECK-P8-NEXT:    xxswapd vs0, vs0
387; CHECK-P8-NEXT:    xxswapd vs1, vs1
388; CHECK-P8-NEXT:    stxvd2x vs1, r3, r4
389; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
390; CHECK-P8-NEXT:    blr
391;
392; CHECK-P9-LABEL: stest4:
393; CHECK-P9:       # %bb.0: # %entry
394; CHECK-P9-NEXT:    lxv v2, 0(r4)
395; CHECK-P9-NEXT:    addis r4, r2, .LCPI4_0@toc@ha
396; CHECK-P9-NEXT:    addi r4, r4, .LCPI4_0@toc@l
397; CHECK-P9-NEXT:    lxv v3, 0(r4)
398; CHECK-P9-NEXT:    addis r4, r2, .LCPI4_1@toc@ha
399; CHECK-P9-NEXT:    addi r4, r4, .LCPI4_1@toc@l
400; CHECK-P9-NEXT:    vperm v3, v2, v2, v3
401; CHECK-P9-NEXT:    vextsh2d v3, v3
402; CHECK-P9-NEXT:    xvcvsxddp vs0, v3
403; CHECK-P9-NEXT:    lxv v3, 0(r4)
404; CHECK-P9-NEXT:    vperm v2, v2, v2, v3
405; CHECK-P9-NEXT:    stxv vs0, 0(r3)
406; CHECK-P9-NEXT:    vextsh2d v2, v2
407; CHECK-P9-NEXT:    xvcvsxddp vs1, v2
408; CHECK-P9-NEXT:    stxv vs1, 16(r3)
409; CHECK-P9-NEXT:    blr
410;
411; CHECK-BE-LABEL: stest4:
412; CHECK-BE:       # %bb.0: # %entry
413; CHECK-BE-NEXT:    lxv v2, 0(r4)
414; CHECK-BE-NEXT:    addis r4, r2, .LCPI4_0@toc@ha
415; CHECK-BE-NEXT:    addi r4, r4, .LCPI4_0@toc@l
416; CHECK-BE-NEXT:    lxv v3, 0(r4)
417; CHECK-BE-NEXT:    addis r4, r2, .LCPI4_1@toc@ha
418; CHECK-BE-NEXT:    addi r4, r4, .LCPI4_1@toc@l
419; CHECK-BE-NEXT:    vperm v3, v2, v2, v3
420; CHECK-BE-NEXT:    vextsh2d v3, v3
421; CHECK-BE-NEXT:    xvcvsxddp vs0, v3
422; CHECK-BE-NEXT:    lxv v3, 0(r4)
423; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
424; CHECK-BE-NEXT:    stxv vs0, 0(r3)
425; CHECK-BE-NEXT:    vextsh2d v2, v2
426; CHECK-BE-NEXT:    xvcvsxddp vs1, v2
427; CHECK-BE-NEXT:    stxv vs1, 16(r3)
428; CHECK-BE-NEXT:    blr
429entry:
430  %0 = load <4 x i16>, <4 x i16>* %SrcPtr, align 16
431  %1 = sitofp <4 x i16> %0 to <4 x double>
432  store <4 x double> %1, <4 x double>* %Sink, align 16
433  ret void
434}
435
436define void @stest2(<2 x double>* nocapture %Sink, <2 x i16>* nocapture readonly %SrcPtr) {
437; CHECK-P8-LABEL: stest2:
438; CHECK-P8:       # %bb.0: # %entry
439; CHECK-P8-NEXT:    addis r5, r2, .LCPI5_0@toc@ha
440; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
441; CHECK-P8-NEXT:    addis r4, r2, .LCPI5_1@toc@ha
442; CHECK-P8-NEXT:    addi r5, r5, .LCPI5_0@toc@l
443; CHECK-P8-NEXT:    addi r4, r4, .LCPI5_1@toc@l
444; CHECK-P8-NEXT:    lxvd2x vs1, 0, r5
445; CHECK-P8-NEXT:    xxswapd v2, vs0
446; CHECK-P8-NEXT:    lxvd2x vs0, 0, r4
447; CHECK-P8-NEXT:    xxswapd v3, vs1
448; CHECK-P8-NEXT:    vperm v2, v2, v2, v3
449; CHECK-P8-NEXT:    xxswapd v3, vs0
450; CHECK-P8-NEXT:    vsld v2, v2, v3
451; CHECK-P8-NEXT:    vsrad v2, v2, v3
452; CHECK-P8-NEXT:    xvcvsxddp vs0, v2
453; CHECK-P8-NEXT:    xxswapd vs0, vs0
454; CHECK-P8-NEXT:    stxvd2x vs0, 0, r3
455; CHECK-P8-NEXT:    blr
456;
457; CHECK-P9-LABEL: stest2:
458; CHECK-P9:       # %bb.0: # %entry
459; CHECK-P9-NEXT:    lxv v2, 0(r4)
460; CHECK-P9-NEXT:    addis r4, r2, .LCPI5_0@toc@ha
461; CHECK-P9-NEXT:    addi r4, r4, .LCPI5_0@toc@l
462; CHECK-P9-NEXT:    lxv v3, 0(r4)
463; CHECK-P9-NEXT:    vperm v2, v2, v2, v3
464; CHECK-P9-NEXT:    vextsh2d v2, v2
465; CHECK-P9-NEXT:    xvcvsxddp vs0, v2
466; CHECK-P9-NEXT:    stxv vs0, 0(r3)
467; CHECK-P9-NEXT:    blr
468;
469; CHECK-BE-LABEL: stest2:
470; CHECK-BE:       # %bb.0: # %entry
471; CHECK-BE-NEXT:    lxv v2, 0(r4)
472; CHECK-BE-NEXT:    addis r4, r2, .LCPI5_0@toc@ha
473; CHECK-BE-NEXT:    addi r4, r4, .LCPI5_0@toc@l
474; CHECK-BE-NEXT:    lxv v3, 0(r4)
475; CHECK-BE-NEXT:    vperm v2, v2, v2, v3
476; CHECK-BE-NEXT:    vextsh2d v2, v2
477; CHECK-BE-NEXT:    xvcvsxddp vs0, v2
478; CHECK-BE-NEXT:    stxv vs0, 0(r3)
479; CHECK-BE-NEXT:    blr
480entry:
481  %0 = load <2 x i16>, <2 x i16>* %SrcPtr, align 16
482  %1 = sitofp <2 x i16> %0 to <2 x double>
483  store <2 x double> %1, <2 x double>* %Sink, align 16
484  ret void
485}
486