1; Verifies correctness of load/store of parameters and return values.
2; RUN: llc < %s -march=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | FileCheck -allow-deprecated-dag-overlap %s
3; RUN: %if ptxas %{ llc < %s -march=nvptx64 -mcpu=sm_35 -O0 -verify-machineinstrs | %ptxas-verify -arch=sm_35 %}
4
5%s_i1 = type { i1 }
6%s_i8 = type { i8 }
7%s_i16 = type { i16 }
8%s_f16 = type { half }
9%s_i32 = type { i32 }
10%s_f32 = type { float }
11%s_i64 = type { i64 }
12%s_f64 = type { double }
13
14; More complicated types. i64 is used to increase natural alignment
15; requirement for the type.
16%s_i32x4 = type { i32, i32, i32, i32, i64}
17%s_i32f32 = type { i32, float, i32, float, i64}
18%s_i8i32x4 = type { i32, i32, i8, i32, i32, i64}
19%s_i8i32x4p = type <{ i32, i32, i8, i32, i32, i64}>
20%s_crossfield = type { i32, [2 x i32], <4 x i32>, [3 x {i32, i32, i32}]}
21; All scalar parameters must be at least 32 bits in size.
22; i1 is loaded/stored as i8.
23
24; CHECK: .func  (.param .b32 func_retval0)
25; CHECK-LABEL: test_i1(
26; CHECK-NEXT: .param .b32 test_i1_param_0
27; CHECK:      ld.param.u8 [[A8:%rs[0-9]+]], [test_i1_param_0];
28; CHECK:      and.b16 [[A:%rs[0-9]+]], [[A8]], 1;
29; CHECK:      setp.eq.b16 %p1, [[A]], 1
30; CHECK:      cvt.u32.u16 [[B:%r[0-9]+]], [[A8]]
31; CHECK:      and.b32 [[C:%r[0-9]+]], [[B]], 1;
32; CHECK:      .param .b32 param0;
33; CHECK:      st.param.b32    [param0+0], [[C]]
34; CHECK:      .param .b32 retval0;
35; CHECK:      call.uni
36; CHECK-NEXT: test_i1,
37; CHECK:      ld.param.b32    [[R8:%r[0-9]+]], [retval0+0];
38; CHECK:      and.b32         [[R:%r[0-9]+]], [[R8]], 1;
39; CHECK:      st.param.b32    [func_retval0+0], [[R]];
40; CHECK:      ret;
41define i1 @test_i1(i1 %a) {
42  %r = tail call i1 @test_i1(i1 %a);
43  ret i1 %r;
44}
45
46; Signed i1 is a somewhat special case. We only care about one bit and
47; then us neg.s32 to convert it to 32-bit -1 if it's set.
48; CHECK: .func  (.param .b32 func_retval0)
49; CHECK-LABEL: test_i1s(
50; CHECK-NEXT: .param .b32 test_i1s_param_0
51; CHECK:      ld.param.u8 [[A8:%rs[0-9]+]], [test_i1s_param_0];
52; CHECK:      cvt.u32.u16     [[A32:%r[0-9]+]], [[A8]];
53; CHECK:      and.b32         [[A1:%r[0-9]+]], [[A32]], 1;
54; CHECK:      neg.s32         [[A:%r[0-9]+]], [[A1]];
55; CHECK:      .param .b32 param0;
56; CHECK:      st.param.b32    [param0+0], [[A]];
57; CHECK:      .param .b32 retval0;
58; CHECK:      call.uni
59; CHECK:      ld.param.b32    [[R8:%r[0-9]+]], [retval0+0];
60; CHECK:      and.b32         [[R1:%r[0-9]+]], [[R8]], 1;
61; CHECK:      neg.s32         [[R:%r[0-9]+]], [[R1]];
62; CHECK:      st.param.b32    [func_retval0+0], [[R]];
63; CHECK-NEXT: ret;
64define signext i1 @test_i1s(i1 signext %a) {
65       %r = tail call signext i1 @test_i1s(i1 signext %a);
66       ret i1 %r;
67}
68
69; Make sure that i1 loads are vectorized as i8 loads, respecting each element alignment.
70; CHECK: .func  (.param .align 1 .b8 func_retval0[1])
71; CHECK-LABEL: test_v3i1(
72; CHECK-NEXT: .param .align 1 .b8 test_v3i1_param_0[1]
73; CHECK-DAG:  ld.param.u8     [[E2:%rs[0-9]+]], [test_v3i1_param_0+2];
74; CHECK-DAG:  ld.param.u8     [[E0:%rs[0-9]+]], [test_v3i1_param_0]
75; CHECK:      .param .align 1 .b8 param0[1];
76; CHECK-DAG:  st.param.b8     [param0+0], [[E0]];
77; CHECK-DAG:  st.param.b8     [param0+2], [[E2]];
78; CHECK:      .param .align 1 .b8 retval0[1];
79; CHECK:      call.uni (retval0),
80; CHECK-NEXT: test_v3i1,
81; CHECK-DAG:  ld.param.b8     [[RE0:%rs[0-9]+]], [retval0+0];
82; CHECK-DAG:  ld.param.b8     [[RE2:%rs[0-9]+]], [retval0+2];
83; CHECK-DAG:  st.param.b8     [func_retval0+0], [[RE0]]
84; CHECK-DAG:  st.param.b8     [func_retval0+2], [[RE2]];
85; CHECK-NEXT: ret;
86define <3 x i1> @test_v3i1(<3 x i1> %a) {
87       %r = tail call <3 x i1> @test_v3i1(<3 x i1> %a);
88       ret <3 x i1> %r;
89}
90
91; CHECK: .func  (.param .align 1 .b8 func_retval0[1])
92; CHECK-LABEL: test_v4i1(
93; CHECK-NEXT: .param .align 1 .b8 test_v4i1_param_0[1]
94; CHECK:      ld.param.u8 [[E0:%rs[0-9]+]], [test_v4i1_param_0]
95; CHECK:      .param .align 1 .b8 param0[1];
96; CHECK:      st.param.b8  [param0+0], [[E0]];
97; CHECK:      .param .align 1 .b8 retval0[1];
98; CHECK:      call.uni (retval0),
99; CHECK:      test_v4i1,
100; CHECK:      ld.param.b8  [[RE0:%rs[0-9]+]], [retval0+0];
101; CHECK:      ld.param.b8  [[RE1:%rs[0-9]+]], [retval0+1];
102; CHECK:      ld.param.b8  [[RE2:%rs[0-9]+]], [retval0+2];
103; CHECK:      ld.param.b8  [[RE3:%rs[0-9]+]], [retval0+3];
104; CHECK:      st.param.b8  [func_retval0+0], [[RE0]];
105; CHECK:      st.param.b8  [func_retval0+1], [[RE1]];
106; CHECK:      st.param.b8  [func_retval0+2], [[RE2]];
107; CHECK:      st.param.b8  [func_retval0+3], [[RE3]];
108; CHECK-NEXT: ret;
109define <4 x i1> @test_v4i1(<4 x i1> %a) {
110       %r = tail call <4 x i1> @test_v4i1(<4 x i1> %a);
111       ret <4 x i1> %r;
112}
113
114; CHECK: .func  (.param .align 1 .b8 func_retval0[1])
115; CHECK-LABEL: test_v5i1(
116; CHECK-NEXT: .param .align 1 .b8 test_v5i1_param_0[1]
117; CHECK-DAG:  ld.param.u8     [[E4:%rs[0-9]+]], [test_v5i1_param_0+4];
118; CHECK-DAG:  ld.param.u8     [[E0:%rs[0-9]+]], [test_v5i1_param_0]
119; CHECK:      .param .align 1 .b8 param0[1];
120; CHECK-DAG:  st.param.b8     [param0+0], [[E0]];
121; CHECK-DAG:  st.param.b8     [param0+4], [[E4]];
122; CHECK:      .param .align 1 .b8 retval0[1];
123; CHECK:      call.uni (retval0),
124; CHECK-NEXT: test_v5i1,
125; CHECK-DAG:  ld.param.b8  [[RE0:%rs[0-9]+]], [retval0+0];
126; CHECK-DAG:  ld.param.b8     [[RE4:%rs[0-9]+]], [retval0+4];
127; CHECK-DAG:  st.param.b8  [func_retval0+0], [[RE0]]
128; CHECK-DAG:  st.param.b8     [func_retval0+4], [[RE4]];
129; CHECK-NEXT: ret;
130define <5 x i1> @test_v5i1(<5 x i1> %a) {
131       %r = tail call <5 x i1> @test_v5i1(<5 x i1> %a);
132       ret <5 x i1> %r;
133}
134
135; CHECK: .func  (.param .b32 func_retval0)
136; CHECK-LABEL: test_i2(
137; CHECK-NEXT: .param .b32 test_i2_param_0
138; CHECK:      ld.param.u8 {{%rs[0-9]+}}, [test_i2_param_0];
139; CHECK:      .param .b32 param0;
140; CHECK:      st.param.b32    [param0+0], {{%r[0-9]+}};
141; CHECK:      .param .b32 retval0;
142; CHECK:      call.uni (retval0),
143; CHECK:      test_i2,
144; CHECK:      ld.param.b32    {{%r[0-9]+}}, [retval0+0];
145; CHECK:      st.param.b32    [func_retval0+0], {{%r[0-9]+}};
146; CHECK-NEXT: ret;
147define i2 @test_i2(i2 %a) {
148       %r = tail call i2 @test_i2(i2 %a);
149       ret i2 %r;
150}
151
152; CHECK: .func  (.param .b32 func_retval0)
153; CHECK-LABEL: test_i3(
154; CHECK-NEXT: .param .b32 test_i3_param_0
155; CHECK:      ld.param.u8 {{%rs[0-9]+}}, [test_i3_param_0];
156; CHECK:      .param .b32 param0;
157; CHECK:      st.param.b32    [param0+0], {{%r[0-9]+}};
158; CHECK:      .param .b32 retval0;
159; CHECK:      call.uni (retval0),
160; CHECK:      test_i3,
161; CHECK:      ld.param.b32    {{%r[0-9]+}}, [retval0+0];
162; CHECK:      st.param.b32    [func_retval0+0], {{%r[0-9]+}};
163; CHECK-NEXT: ret;
164define i3 @test_i3(i3 %a) {
165       %r = tail call i3 @test_i3(i3 %a);
166       ret i3 %r;
167}
168
169; Unsigned i8 is loaded directly into 32-bit register.
170; CHECK: .func  (.param .b32 func_retval0)
171; CHECK-LABEL: test_i8(
172; CHECK-NEXT: .param .b32 test_i8_param_0
173; CHECK:      ld.param.u8 [[A8:%rs[0-9]+]], [test_i8_param_0];
174; CHECK:      cvt.u32.u16     [[A32:%r[0-9]+]], [[A8]];
175; CHECK:      and.b32         [[A:%r[0-9]+]], [[A32]], 255;
176; CHECK:      .param .b32 param0;
177; CHECK:      st.param.b32    [param0+0], [[A]];
178; CHECK:      .param .b32 retval0;
179; CHECK:      call.uni (retval0),
180; CHECK:      test_i8,
181; CHECK:      ld.param.b32    [[R32:%r[0-9]+]], [retval0+0];
182; CHECK:      and.b32         [[R:%r[0-9]+]], [[R32]], 255;
183; CHECK:      st.param.b32    [func_retval0+0], [[R]];
184; CHECK-NEXT: ret;
185define i8 @test_i8(i8 %a) {
186       %r = tail call i8 @test_i8(i8 %a);
187       ret i8 %r;
188}
189
190; signed i8 is loaded into 16-bit register which is then sign-extended to i32.
191; CHECK: .func  (.param .b32 func_retval0)
192; CHECK-LABEL: test_i8s(
193; CHECK-NEXT: .param .b32 test_i8s_param_0
194; CHECK:      ld.param.s8 [[A8:%rs[0-9]+]], [test_i8s_param_0];
195; CHECK:      cvt.s32.s16     [[A:%r[0-9]+]], [[A8]];
196; CHECK:      .param .b32 param0;
197; CHECK:      st.param.b32    [param0+0], [[A]];
198; CHECK:      .param .b32 retval0;
199; CHECK:      call.uni (retval0),
200; CHECK:      test_i8s,
201; CHECK:      ld.param.b32    [[R32:%r[0-9]+]], [retval0+0];
202; -- This is suspicious (though correct) -- why not cvt.u8.u32, cvt.s8.s32 ?
203; CHECK:      cvt.u16.u32     [[R16:%rs[0-9]+]], [[R32]];
204; CHECK:      cvt.s32.s16     [[R:%r[0-9]+]], [[R16]];
205; CHECK:      st.param.b32    [func_retval0+0], [[R]];
206; CHECK-NEXT: ret;
207define signext i8 @test_i8s(i8 signext %a) {
208       %r = tail call signext i8 @test_i8s(i8 signext %a);
209       ret i8 %r;
210}
211
212; CHECK: .func  (.param .align 4 .b8 func_retval0[4])
213; CHECK-LABEL: test_v3i8(
214; CHECK-NEXT: .param .align 4 .b8 test_v3i8_param_0[4]
215; CHECK-DAG:  ld.param.u8     [[E2:%rs[0-9]+]], [test_v3i8_param_0+2];
216; CHECK-DAG:  ld.param.v2.u8  {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i8_param_0];
217; CHECK:      .param .align 4 .b8 param0[4];
218; CHECK:      st.param.v2.b8  [param0+0], {[[E0]], [[E1]]};
219; CHECK:      st.param.b8     [param0+2], [[E2]];
220; CHECK:      .param .align 4 .b8 retval0[4];
221; CHECK:      call.uni (retval0),
222; CHECK-NEXT: test_v3i8,
223; CHECK-DAG:  ld.param.v2.b8  {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0+0];
224; CHECK-DAG:  ld.param.b8     [[RE2:%rs[0-9]+]], [retval0+2];
225; CHECK-DAG:  st.param.v2.b8  [func_retval0+0], {[[RE0]], [[RE1]]};
226; CHECK-DAG:  st.param.b8     [func_retval0+2], [[RE2]];
227; CHECK-NEXT: ret;
228define <3 x i8> @test_v3i8(<3 x i8> %a) {
229       %r = tail call <3 x i8> @test_v3i8(<3 x i8> %a);
230       ret <3 x i8> %r;
231}
232
233; CHECK: .func  (.param .align 4 .b8 func_retval0[4])
234; CHECK-LABEL: test_v4i8(
235; CHECK-NEXT: .param .align 4 .b8 test_v4i8_param_0[4]
236; CHECK:      ld.param.v4.u8 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v4i8_param_0]
237; CHECK:      .param .align 4 .b8 param0[4];
238; CHECK:      st.param.v4.b8  [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
239; CHECK:      .param .align 4 .b8 retval0[4];
240; CHECK:      call.uni (retval0),
241; CHECK-NEXT: test_v4i8,
242; CHECK:      ld.param.v4.b8  {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
243; CHECK:      st.param.v4.b8 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
244; CHECK-NEXT: ret;
245define <4 x i8> @test_v4i8(<4 x i8> %a) {
246       %r = tail call <4 x i8> @test_v4i8(<4 x i8> %a);
247       ret <4 x i8> %r;
248}
249
250; CHECK: .func  (.param .align 8 .b8 func_retval0[8])
251; CHECK-LABEL: test_v5i8(
252; CHECK-NEXT: .param .align 8 .b8 test_v5i8_param_0[8]
253; CHECK-DAG:  ld.param.u8     [[E4:%rs[0-9]+]], [test_v5i8_param_0+4];
254; CHECK-DAG:  ld.param.v4.u8  {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i8_param_0]
255; CHECK:      .param .align 8 .b8 param0[8];
256; CHECK-DAG:  st.param.v4.b8  [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
257; CHECK-DAG:  st.param.b8     [param0+4], [[E4]];
258; CHECK:      .param .align 8 .b8 retval0[8];
259; CHECK:      call.uni (retval0),
260; CHECK-NEXT: test_v5i8,
261; CHECK-DAG:  ld.param.v4.b8  {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
262; CHECK-DAG:  ld.param.b8     [[RE4:%rs[0-9]+]], [retval0+4];
263; CHECK-DAG:  st.param.v4.b8  [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
264; CHECK-DAG:  st.param.b8     [func_retval0+4], [[RE4]];
265; CHECK-NEXT: ret;
266define <5 x i8> @test_v5i8(<5 x i8> %a) {
267       %r = tail call <5 x i8> @test_v5i8(<5 x i8> %a);
268       ret <5 x i8> %r;
269}
270
271; CHECK: .func  (.param .b32 func_retval0)
272; CHECK-LABEL: test_i11(
273; CHECK-NEXT: .param .b32 test_i11_param_0
274; CHECK:      ld.param.u16    {{%rs[0-9]+}}, [test_i11_param_0];
275; CHECK:      st.param.b32    [param0+0], {{%r[0-9]+}};
276; CHECK:      .param .b32 retval0;
277; CHECK:      call.uni (retval0),
278; CHECK-NEXT: test_i11,
279; CHECK:      ld.param.b32    {{%r[0-9]+}}, [retval0+0];
280; CHECK:      st.param.b32    [func_retval0+0], {{%r[0-9]+}};
281; CHECK-NEXT: ret;
282define i11 @test_i11(i11 %a) {
283       %r = tail call i11 @test_i11(i11 %a);
284       ret i11 %r;
285}
286
287; CHECK: .func  (.param .b32 func_retval0)
288; CHECK-LABEL: test_i16(
289; CHECK-NEXT: .param .b32 test_i16_param_0
290; CHECK:      ld.param.u16    [[E16:%rs[0-9]+]], [test_i16_param_0];
291; CHECK:      cvt.u32.u16     [[E32:%r[0-9]+]], [[E16]];
292; CHECK:      .param .b32 param0;
293; CHECK:      st.param.b32    [param0+0], [[E32]];
294; CHECK:      .param .b32 retval0;
295; CHECK:      call.uni (retval0),
296; CHECK-NEXT: test_i16,
297; CHECK:      ld.param.b32    [[RE32:%r[0-9]+]], [retval0+0];
298; CHECK:      and.b32         [[R:%r[0-9]+]], [[RE32]], 65535;
299; CHECK:      st.param.b32    [func_retval0+0], [[R]];
300; CHECK-NEXT: ret;
301define i16 @test_i16(i16 %a) {
302       %r = tail call i16 @test_i16(i16 %a);
303       ret i16 %r;
304}
305
306; CHECK: .func  (.param .b32 func_retval0)
307; CHECK-LABEL: test_i16s(
308; CHECK-NEXT: .param .b32 test_i16s_param_0
309; CHECK:      ld.param.u16    [[E16:%rs[0-9]+]], [test_i16s_param_0];
310; CHECK:      cvt.s32.s16     [[E32:%r[0-9]+]], [[E16]];
311; CHECK:      .param .b32 param0;
312; CHECK:      st.param.b32    [param0+0], [[E32]];
313; CHECK:      .param .b32 retval0;
314; CHECK:      call.uni (retval0),
315; CHECK-NEXT: test_i16s,
316; CHECK:      ld.param.b32    [[RE32:%r[0-9]+]], [retval0+0];
317; CHECK:      cvt.s32.s16     [[R:%r[0-9]+]], [[RE32]];
318; CHECK:      st.param.b32    [func_retval0+0], [[R]];
319; CHECK-NEXT: ret;
320define signext i16 @test_i16s(i16 signext %a) {
321       %r = tail call signext i16 @test_i16s(i16 signext %a);
322       ret i16 %r;
323}
324
325; CHECK: .func  (.param .align 8 .b8 func_retval0[8])
326; CHECK-LABEL: test_v3i16(
327; CHECK-NEXT: .param .align 8 .b8 test_v3i16_param_0[8]
328; CHECK-DAG:  ld.param.u16    [[E2:%rs[0-9]+]], [test_v3i16_param_0+4];
329; CHECK-DAG:  ld.param.v2.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]]}, [test_v3i16_param_0];
330; CHECK:      .param .align 8 .b8 param0[8];
331; CHECK:      st.param.v2.b16 [param0+0], {[[E0]], [[E1]]};
332; CHECK:      st.param.b16    [param0+4], [[E2]];
333; CHECK:      .param .align 8 .b8 retval0[8];
334; CHECK:      call.uni (retval0),
335; CHECK-NEXT: test_v3i16,
336; CHECK:      ld.param.v2.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]]}, [retval0+0];
337; CHECK:      ld.param.b16    [[RE2:%rs[0-9]+]], [retval0+4];
338; CHECK-DAG:  st.param.v2.b16 [func_retval0+0], {[[RE0]], [[RE1]]};
339; CHECK-DAG:  st.param.b16    [func_retval0+4], [[RE2]];
340; CHECK-NEXT: ret;
341define <3 x i16> @test_v3i16(<3 x i16> %a) {
342       %r = tail call <3 x i16> @test_v3i16(<3 x i16> %a);
343       ret <3 x i16> %r;
344}
345
346; CHECK: .func  (.param .align 8 .b8 func_retval0[8])
347; CHECK-LABEL: test_v4i16(
348; CHECK-NEXT: .param .align 8 .b8 test_v4i16_param_0[8]
349; CHECK:      ld.param.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v4i16_param_0]
350; CHECK:      .param .align 8 .b8 param0[8];
351; CHECK:      st.param.v4.b16 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
352; CHECK:      .param .align 8 .b8 retval0[8];
353; CHECK:      call.uni (retval0),
354; CHECK-NEXT: test_v4i16,
355; CHECK:      ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
356; CHECK:      st.param.v4.b16 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
357; CHECK-NEXT: ret;
358define <4 x i16> @test_v4i16(<4 x i16> %a) {
359       %r = tail call <4 x i16> @test_v4i16(<4 x i16> %a);
360       ret <4 x i16> %r;
361}
362
363; CHECK: .func  (.param .align 16 .b8 func_retval0[16])
364; CHECK-LABEL: test_v5i16(
365; CHECK-NEXT: .param .align 16 .b8 test_v5i16_param_0[16]
366; CHECK-DAG:  ld.param.u16    [[E4:%rs[0-9]+]], [test_v5i16_param_0+8];
367; CHECK-DAG:  ld.param.v4.u16 {[[E0:%rs[0-9]+]], [[E1:%rs[0-9]+]], [[E2:%rs[0-9]+]], [[E3:%rs[0-9]+]]}, [test_v5i16_param_0]
368; CHECK:      .param .align 16 .b8 param0[16];
369; CHECK-DAG:  st.param.v4.b16 [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
370; CHECK-DAG:  st.param.b16    [param0+8], [[E4]];
371; CHECK:      .param .align 16 .b8 retval0[16];
372; CHECK:      call.uni (retval0),
373; CHECK-NEXT: test_v5i16,
374; CHECK-DAG:  ld.param.v4.b16 {[[RE0:%rs[0-9]+]], [[RE1:%rs[0-9]+]], [[RE2:%rs[0-9]+]], [[RE3:%rs[0-9]+]]}, [retval0+0];
375; CHECK-DAG:  ld.param.b16    [[RE4:%rs[0-9]+]], [retval0+8];
376; CHECK-DAG:  st.param.v4.b16 [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
377; CHECK-DAG:  st.param.b16    [func_retval0+8], [[RE4]];
378; CHECK-NEXT: ret;
379define <5 x i16> @test_v5i16(<5 x i16> %a) {
380       %r = tail call <5 x i16> @test_v5i16(<5 x i16> %a);
381       ret <5 x i16> %r;
382}
383
384; CHECK: .func  (.param .b32 func_retval0)
385; CHECK-LABEL: test_f16(
386; CHECK-NEXT: .param .b32 test_f16_param_0
387; CHECK:      ld.param.b16    [[E:%h[0-9]+]], [test_f16_param_0];
388; CHECK:      .param .b32 param0;
389; CHECK:      st.param.b16    [param0+0], [[E]];
390; CHECK:      .param .b32 retval0;
391; CHECK:      call.uni (retval0),
392; CHECK-NEXT: test_f16,
393; CHECK:      ld.param.b16    [[R:%h[0-9]+]], [retval0+0];
394; CHECK:      st.param.b16    [func_retval0+0], [[R]]
395; CHECK-NEXT: ret;
396define half @test_f16(half %a) {
397       %r = tail call half @test_f16(half %a);
398       ret half %r;
399}
400
401; CHECK: .func  (.param .align 4 .b8 func_retval0[4])
402; CHECK-LABEL: test_v2f16(
403; CHECK-NEXT: .param .align 4 .b8 test_v2f16_param_0[4]
404; CHECK:      ld.param.b32    [[E:%hh[0-9]+]], [test_v2f16_param_0];
405; CHECK:      .param .align 4 .b8 param0[4];
406; CHECK:      st.param.b32    [param0+0], [[E]];
407; CHECK:      .param .align 4 .b8 retval0[4];
408; CHECK:      call.uni (retval0),
409; CHECK-NEXT: test_v2f16,
410; CHECK:      ld.param.b32    [[R:%hh[0-9]+]], [retval0+0];
411; CHECK:      st.param.b32    [func_retval0+0], [[R]]
412; CHECK-NEXT: ret;
413define <2 x half> @test_v2f16(<2 x half> %a) {
414       %r = tail call <2 x half> @test_v2f16(<2 x half> %a);
415       ret <2 x half> %r;
416}
417
418; CHECK:.func  (.param .align 8 .b8 func_retval0[8])
419; CHECK-LABEL: test_v3f16(
420; CHECK:      .param .align 8 .b8 test_v3f16_param_0[8]
421; CHECK-DAG:  ld.param.b32    [[HH01:%hh[0-9]+]], [test_v3f16_param_0];
422; CHECK-DAG:  mov.b32         {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[HH01]];
423; CHECK-DAG:  ld.param.b16    [[E2:%h[0-9]+]], [test_v3f16_param_0+4];
424; CHECK:      .param .align 8 .b8 param0[8];
425; CHECK-DAG:  st.param.v2.b16 [param0+0], {[[E0]], [[E1]]};
426; CHECK-DAG:  st.param.b16    [param0+4], [[E2]];
427; CHECK:      .param .align 8 .b8 retval0[8];
428; CHECK:      call.uni (retval0),
429; CHECK:      test_v3f16,
430; CHECK-DAG:  ld.param.v2.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]]}, [retval0+0];
431; CHECK-DAG:  ld.param.b16    [[R2:%h[0-9]+]], [retval0+4];
432; CHECK-DAG:  st.param.v2.b16 [func_retval0+0], {[[R0]], [[R1]]};
433; CHECK-DAG:  st.param.b16    [func_retval0+4], [[R2]];
434; CHECK:      ret;
435define <3 x half> @test_v3f16(<3 x half> %a) {
436       %r = tail call <3 x half> @test_v3f16(<3 x half> %a);
437       ret <3 x half> %r;
438}
439
440; CHECK:.func  (.param .align 8 .b8 func_retval0[8])
441; CHECK-LABEL: test_v4f16(
442; CHECK:      .param .align 8 .b8 test_v4f16_param_0[8]
443; CHECK:      ld.param.v2.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]]}, [test_v4f16_param_0];
444; CHECK-DAG:  mov.b32         [[HH01:%hh[0-9]+]], [[R01]];
445; CHECK-DAG:  mov.b32         [[HH23:%hh[0-9]+]], [[R23]];
446; CHECK:      .param .align 8 .b8 param0[8];
447; CHECK:      st.param.v2.b32 [param0+0], {[[HH01]], [[HH23]]};
448; CHECK:      .param .align 8 .b8 retval0[8];
449; CHECK:      call.uni (retval0),
450; CHECK:      test_v4f16,
451; CHECK:      ld.param.v2.b32 {[[RH01:%hh[0-9]+]], [[RH23:%hh[0-9]+]]}, [retval0+0];
452; CHECK:      st.param.v2.b32 [func_retval0+0], {[[RH01]], [[RH23]]};
453; CHECK:      ret;
454define <4 x half> @test_v4f16(<4 x half> %a) {
455       %r = tail call <4 x half> @test_v4f16(<4 x half> %a);
456       ret <4 x half> %r;
457}
458
459; CHECK:.func  (.param .align 16 .b8 func_retval0[16])
460; CHECK-LABEL: test_v5f16(
461; CHECK:      .param .align 16 .b8 test_v5f16_param_0[16]
462; CHECK-DAG:  ld.param.v4.b16  {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [test_v5f16_param_0];
463; CHECK-DAG:  mov.b32         {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]]}, [[HH01]];
464; CHECK-DAG:  ld.param.b16    [[E4:%h[0-9]+]], [test_v5f16_param_0+8];
465; CHECK:      .param .align 16 .b8 param0[16];
466; CHECK-DAG:  st.param.v4.b16 [param0+0],
467; CHECK-DAG:  st.param.b16    [param0+8], [[E4]];
468; CHECK:      .param .align 16 .b8 retval0[16];
469; CHECK:      call.uni (retval0),
470; CHECK:      test_v5f16,
471; CHECK-DAG:  ld.param.v4.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]], [[R2:%h[0-9]+]], [[R3:%h[0-9]+]]}, [retval0+0];
472; CHECK-DAG:  ld.param.b16    [[R4:%h[0-9]+]], [retval0+8];
473; CHECK-DAG:  st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]};
474; CHECK-DAG:  st.param.b16    [func_retval0+8], [[R4]];
475; CHECK:      ret;
476define <5 x half> @test_v5f16(<5 x half> %a) {
477       %r = tail call <5 x half> @test_v5f16(<5 x half> %a);
478       ret <5 x half> %r;
479}
480
481; CHECK:.func  (.param .align 16 .b8 func_retval0[16])
482; CHECK-LABEL: test_v8f16(
483; CHECK:      .param .align 16 .b8 test_v8f16_param_0[16]
484; CHECK:      ld.param.v4.u32 {[[R01:%r[0-9]+]], [[R23:%r[0-9]+]], [[R45:%r[0-9]+]], [[R67:%r[0-9]+]]}, [test_v8f16_param_0];
485; CHECK-DAG:  mov.b32         [[HH01:%hh[0-9]+]], [[R01]];
486; CHECK-DAG:  mov.b32         [[HH23:%hh[0-9]+]], [[R23]];
487; CHECK-DAG:  mov.b32         [[HH45:%hh[0-9]+]], [[R45]];
488; CHECK-DAG:  mov.b32         [[HH67:%hh[0-9]+]], [[R67]];
489; CHECK:      .param .align 16 .b8 param0[16];
490; CHECK:      st.param.v4.b32 [param0+0], {[[HH01]], [[HH23]], [[HH45]], [[HH67]]};
491; CHECK:      .param .align 16 .b8 retval0[16];
492; CHECK:      call.uni (retval0),
493; CHECK:      test_v8f16,
494; CHECK:      ld.param.v4.b32 {[[RH01:%hh[0-9]+]], [[RH23:%hh[0-9]+]], [[RH45:%hh[0-9]+]], [[RH67:%hh[0-9]+]]}, [retval0+0];
495; CHECK:      st.param.v4.b32 [func_retval0+0], {[[RH01]], [[RH23]], [[RH45]], [[RH67]]};
496; CHECK:      ret;
497define <8 x half> @test_v8f16(<8 x half> %a) {
498       %r = tail call <8 x half> @test_v8f16(<8 x half> %a);
499       ret <8 x half> %r;
500}
501
502; CHECK:.func  (.param .align 32 .b8 func_retval0[32])
503; CHECK-LABEL: test_v9f16(
504; CHECK:      .param .align 32 .b8 test_v9f16_param_0[32]
505; CHECK-DAG:  ld.param.v4.b16  {[[E0:%h[0-9]+]], [[E1:%h[0-9]+]], [[E2:%h[0-9]+]], [[E3:%h[0-9]+]]}, [test_v9f16_param_0];
506; CHECK-DAG:  ld.param.v4.b16  {[[E4:%h[0-9]+]], [[E5:%h[0-9]+]], [[E6:%h[0-9]+]], [[E7:%h[0-9]+]]}, [test_v9f16_param_0+8];
507; CHECK-DAG:  ld.param.b16     [[E8:%h[0-9]+]], [test_v9f16_param_0+16];
508; CHECK:      .param .align 32 .b8 param0[32];
509; CHECK-DAG:  st.param.v4.b16 [param0+0],
510; CHECK-DAG:  st.param.v4.b16 [param0+8],
511; CHECK-DAG:  st.param.b16    [param0+16], [[E8]];
512; CHECK:      .param .align 32 .b8 retval0[32];
513; CHECK:      call.uni (retval0),
514; CHECK:      test_v9f16,
515; CHECK-DAG:  ld.param.v4.b16 {[[R0:%h[0-9]+]], [[R1:%h[0-9]+]], [[R2:%h[0-9]+]], [[R3:%h[0-9]+]]}, [retval0+0];
516; CHECK-DAG:  ld.param.v4.b16 {[[R4:%h[0-9]+]], [[R5:%h[0-9]+]], [[R6:%h[0-9]+]], [[R7:%h[0-9]+]]}, [retval0+8];
517; CHECK-DAG:  ld.param.b16    [[R8:%h[0-9]+]], [retval0+16];
518; CHECK-DAG:  st.param.v4.b16 [func_retval0+0], {[[R0]], [[R1]], [[R2]], [[R3]]};
519; CHECK-DAG:  st.param.v4.b16 [func_retval0+8], {[[R4]], [[R5]], [[R6]], [[R7]]};
520; CHECK-DAG:  st.param.b16    [func_retval0+16], [[R8]];
521; CHECK:      ret;
522define <9 x half> @test_v9f16(<9 x half> %a) {
523       %r = tail call <9 x half> @test_v9f16(<9 x half> %a);
524       ret <9 x half> %r;
525}
526
527; CHECK: .func  (.param .b32 func_retval0)
528; CHECK-LABEL: test_i19(
529; CHECK-NEXT: .param .b32 test_i19_param_0
530; CHECK-DAG:  ld.param.u16    {{%r[0-9]+}}, [test_i19_param_0];
531; CHECK-DAG:  ld.param.u8     {{%r[0-9]+}}, [test_i19_param_0+2];
532; CHECK:      .param .b32 param0;
533; CHECK:      st.param.b32    [param0+0], {{%r[0-9]+}};
534; CHECK:      .param .b32 retval0;
535; CHECK:      call.uni (retval0),
536; CHECK-NEXT: test_i19,
537; CHECK:      ld.param.b32    {{%r[0-9]+}}, [retval0+0];
538; CHECK:      st.param.b32    [func_retval0+0], {{%r[0-9]+}};
539; CHECK-NEXT: ret;
540define i19 @test_i19(i19 %a) {
541       %r = tail call i19 @test_i19(i19 %a);
542       ret i19 %r;
543}
544
545; CHECK: .func  (.param .b32 func_retval0)
546; CHECK-LABEL: test_i23(
547; CHECK-NEXT: .param .b32 test_i23_param_0
548; CHECK-DAG:  ld.param.u16    {{%r[0-9]+}}, [test_i23_param_0];
549; CHECK-DAG:  ld.param.u8     {{%r[0-9]+}}, [test_i23_param_0+2];
550; CHECK:      .param .b32 param0;
551; CHECK:      st.param.b32    [param0+0], {{%r[0-9]+}};
552; CHECK:      .param .b32 retval0;
553; CHECK:      call.uni (retval0),
554; CHECK-NEXT: test_i23,
555; CHECK:      ld.param.b32    {{%r[0-9]+}}, [retval0+0];
556; CHECK:      st.param.b32    [func_retval0+0], {{%r[0-9]+}};
557; CHECK-NEXT: ret;
558define i23 @test_i23(i23 %a) {
559       %r = tail call i23 @test_i23(i23 %a);
560       ret i23 %r;
561}
562
563; CHECK: .func  (.param .b32 func_retval0)
564; CHECK-LABEL: test_i24(
565; CHECK-NEXT: .param .b32 test_i24_param_0
566; CHECK-DAG:  ld.param.u8     {{%r[0-9]+}}, [test_i24_param_0+2];
567; CHECK-DAG:  ld.param.u16    {{%r[0-9]+}}, [test_i24_param_0];
568; CHECK:      .param .b32 param0;
569; CHECK:      st.param.b32    [param0+0], {{%r[0-9]+}};
570; CHECK:      .param .b32 retval0;
571; CHECK:      call.uni (retval0),
572; CHECK-NEXT: test_i24,
573; CHECK:      ld.param.b32    {{%r[0-9]+}}, [retval0+0];
574; CHECK:      st.param.b32    [func_retval0+0], {{%r[0-9]+}};
575; CHECK-NEXT: ret;
576define i24 @test_i24(i24 %a) {
577       %r = tail call i24 @test_i24(i24 %a);
578       ret i24 %r;
579}
580
581; CHECK: .func  (.param .b32 func_retval0)
582; CHECK-LABEL: test_i29(
583; CHECK-NEXT: .param .b32 test_i29_param_0
584; CHECK:      ld.param.u32    {{%r[0-9]+}}, [test_i29_param_0];
585; CHECK:      .param .b32 param0;
586; CHECK:      st.param.b32    [param0+0], {{%r[0-9]+}};
587; CHECK:      .param .b32 retval0;
588; CHECK:      call.uni (retval0),
589; CHECK-NEXT: test_i29,
590; CHECK:      ld.param.b32    {{%r[0-9]+}}, [retval0+0];
591; CHECK:      st.param.b32    [func_retval0+0], {{%r[0-9]+}};
592; CHECK-NEXT: ret;
593define i29 @test_i29(i29 %a) {
594       %r = tail call i29 @test_i29(i29 %a);
595       ret i29 %r;
596}
597
598; CHECK: .func  (.param .b32 func_retval0)
599; CHECK-LABEL: test_i32(
600; CHECK-NEXT: .param .b32 test_i32_param_0
601; CHECK:      ld.param.u32    [[E:%r[0-9]+]], [test_i32_param_0];
602; CHECK:      .param .b32 param0;
603; CHECK:      st.param.b32    [param0+0], [[E]];
604; CHECK:      .param .b32 retval0;
605; CHECK:      call.uni (retval0),
606; CHECK-NEXT: test_i32,
607; CHECK:      ld.param.b32    [[R:%r[0-9]+]], [retval0+0];
608; CHECK:      st.param.b32    [func_retval0+0], [[R]];
609; CHECK-NEXT: ret;
610define i32 @test_i32(i32 %a) {
611       %r = tail call i32 @test_i32(i32 %a);
612       ret i32 %r;
613}
614
615; CHECK: .func  (.param .align 16 .b8 func_retval0[16])
616; CHECK-LABEL: test_v3i32(
617; CHECK-NEXT: .param .align 16 .b8 test_v3i32_param_0[16]
618; CHECK-DAG:  ld.param.u32     [[E2:%r[0-9]+]], [test_v3i32_param_0+8];
619; CHECK-DAG:  ld.param.v2.u32  {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_v3i32_param_0];
620; CHECK:      .param .align 16 .b8 param0[16];
621; CHECK:      st.param.v2.b32  [param0+0], {[[E0]], [[E1]]};
622; CHECK:      st.param.b32     [param0+8], [[E2]];
623; CHECK:      .param .align 16 .b8 retval0[16];
624; CHECK:      call.uni (retval0),
625; CHECK-NEXT: test_v3i32,
626; CHECK:      ld.param.v2.b32  {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0];
627; CHECK:      ld.param.b32     [[RE2:%r[0-9]+]], [retval0+8];
628; CHECK-DAG:  st.param.v2.b32  [func_retval0+0], {[[RE0]], [[RE1]]};
629; CHECK-DAG:  st.param.b32     [func_retval0+8], [[RE2]];
630; CHECK-NEXT: ret;
631define <3 x i32> @test_v3i32(<3 x i32> %a) {
632       %r = tail call <3 x i32> @test_v3i32(<3 x i32> %a);
633       ret <3 x i32> %r;
634}
635
636; CHECK: .func  (.param .align 16 .b8 func_retval0[16])
637; CHECK-LABEL: test_v4i32(
638; CHECK-NEXT: .param .align 16 .b8 test_v4i32_param_0[16]
639; CHECK:      ld.param.v4.u32  {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v4i32_param_0]
640; CHECK:      .param .align 16 .b8 param0[16];
641; CHECK:      st.param.v4.b32  [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
642; CHECK:      .param .align 16 .b8 retval0[16];
643; CHECK:      call.uni (retval0),
644; CHECK-NEXT: test_v4i32,
645; CHECK:      ld.param.v4.b32  {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+0];
646; CHECK:      st.param.v4.b32  [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
647; CHECK-NEXT: ret;
648define <4 x i32> @test_v4i32(<4 x i32> %a) {
649       %r = tail call <4 x i32> @test_v4i32(<4 x i32> %a);
650       ret <4 x i32> %r;
651}
652
653; CHECK: .func  (.param .align 32 .b8 func_retval0[32])
654; CHECK-LABEL: test_v5i32(
655; CHECK-NEXT: .param .align 32 .b8 test_v5i32_param_0[32]
656; CHECK-DAG:  ld.param.u32     [[E4:%r[0-9]+]], [test_v5i32_param_0+16];
657; CHECK-DAG:  ld.param.v4.u32  {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]], [[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_v5i32_param_0]
658; CHECK:      .param .align 32 .b8 param0[32];
659; CHECK-DAG:  st.param.v4.b32  [param0+0], {[[E0]], [[E1]], [[E2]], [[E3]]};
660; CHECK-DAG:  st.param.b32     [param0+16], [[E4]];
661; CHECK:      .param .align 32 .b8 retval0[32];
662; CHECK:      call.uni (retval0),
663; CHECK-NEXT: test_v5i32,
664; CHECK-DAG:  ld.param.v4.b32  {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]], [[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+0];
665; CHECK-DAG:  ld.param.b32     [[RE4:%r[0-9]+]], [retval0+16];
666; CHECK-DAG:  st.param.v4.b32  [func_retval0+0], {[[RE0]], [[RE1]], [[RE2]], [[RE3]]}
667; CHECK-DAG:  st.param.b32     [func_retval0+16], [[RE4]];
668; CHECK-NEXT: ret;
669define <5 x i32> @test_v5i32(<5 x i32> %a) {
670       %r = tail call <5 x i32> @test_v5i32(<5 x i32> %a);
671       ret <5 x i32> %r;
672}
673
674; CHECK: .func  (.param .b32 func_retval0)
675; CHECK-LABEL: test_f32(
676; CHECK-NEXT: .param .b32 test_f32_param_0
677; CHECK:      ld.param.f32    [[E:%f[0-9]+]], [test_f32_param_0];
678; CHECK:      .param .b32 param0;
679; CHECK:      st.param.f32    [param0+0], [[E]];
680; CHECK:      .param .b32 retval0;
681; CHECK:      call.uni (retval0),
682; CHECK-NEXT: test_f32,
683; CHECK:      ld.param.f32    [[R:%f[0-9]+]], [retval0+0];
684; CHECK:      st.param.f32    [func_retval0+0], [[R]];
685; CHECK-NEXT: ret;
686define float @test_f32(float %a) {
687       %r = tail call float @test_f32(float %a);
688       ret float %r;
689}
690
691; CHECK: .func  (.param .b64 func_retval0)
692; CHECK-LABEL: test_i40(
693; CHECK-NEXT: .param .b64 test_i40_param_0
694; CHECK-DAG:  ld.param.u8    {{%rd[0-9]+}}, [test_i40_param_0+4];
695; CHECK-DAG:  ld.param.u32   {{%rd[0-9]+}}, [test_i40_param_0];
696; CHECK:      .param .b64 param0;
697; CHECK:      st.param.b64    [param0+0], {{%rd[0-9]+}};
698; CHECK:      .param .b64 retval0;
699; CHECK:      call.uni (retval0),
700; CHECK-NEXT: test_i40,
701; CHECK:      ld.param.b64    {{%rd[0-9]+}}, [retval0+0];
702; CHECK:      st.param.b64    [func_retval0+0], {{%rd[0-9]+}};
703; CHECK-NEXT: ret;
704define i40 @test_i40(i40 %a) {
705       %r = tail call i40 @test_i40(i40 %a);
706       ret i40 %r;
707}
708
709; CHECK: .func  (.param .b64 func_retval0)
710; CHECK-LABEL: test_i47(
711; CHECK-NEXT: .param .b64 test_i47_param_0
712; CHECK-DAG:  ld.param.u16   {{%rd[0-9]+}}, [test_i47_param_0+4];
713; CHECK-DAG:  ld.param.u32   {{%rd[0-9]+}}, [test_i47_param_0];
714; CHECK:      .param .b64 param0;
715; CHECK:      st.param.b64    [param0+0], {{%rd[0-9]+}};
716; CHECK:      .param .b64 retval0;
717; CHECK:      call.uni (retval0),
718; CHECK-NEXT: test_i47,
719; CHECK:      ld.param.b64    {{%rd[0-9]+}}, [retval0+0];
720; CHECK:      st.param.b64    [func_retval0+0], {{%rd[0-9]+}};
721; CHECK-NEXT: ret;
722define i47 @test_i47(i47 %a) {
723       %r = tail call i47 @test_i47(i47 %a);
724       ret i47 %r;
725}
726
727; CHECK: .func  (.param .b64 func_retval0)
728; CHECK-LABEL: test_i48(
729; CHECK-NEXT: .param .b64 test_i48_param_0
730; CHECK-DAG:  ld.param.u16   {{%rd[0-9]+}}, [test_i48_param_0+4];
731; CHECK-DAG:  ld.param.u32   {{%rd[0-9]+}}, [test_i48_param_0];
732; CHECK:      .param .b64 param0;
733; CHECK:      st.param.b64    [param0+0], {{%rd[0-9]+}};
734; CHECK:      .param .b64 retval0;
735; CHECK:      call.uni (retval0),
736; CHECK-NEXT: test_i48,
737; CHECK:      ld.param.b64    {{%rd[0-9]+}}, [retval0+0];
738; CHECK:      st.param.b64    [func_retval0+0], {{%rd[0-9]+}};
739; CHECK-NEXT: ret;
740define i48 @test_i48(i48 %a) {
741       %r = tail call i48 @test_i48(i48 %a);
742       ret i48 %r;
743}
744
745; CHECK: .func  (.param .b64 func_retval0)
746; CHECK-LABEL: test_i51(
747; CHECK-NEXT: .param .b64 test_i51_param_0
748; CHECK-DAG:  ld.param.u8    {{%rd[0-9]+}}, [test_i51_param_0+6];
749; CHECK-DAG:  ld.param.u16   {{%rd[0-9]+}}, [test_i51_param_0+4];
750; CHECK-DAG:  ld.param.u32   {{%rd[0-9]+}}, [test_i51_param_0];
751; CHECK:      .param .b64 param0;
752; CHECK:      st.param.b64    [param0+0], {{%rd[0-9]+}};
753; CHECK:      .param .b64 retval0;
754; CHECK:      call.uni (retval0),
755; CHECK-NEXT: test_i51,
756; CHECK:      ld.param.b64    {{%rd[0-9]+}}, [retval0+0];
757; CHECK:      st.param.b64    [func_retval0+0], {{%rd[0-9]+}};
758; CHECK-NEXT: ret;
759define i51 @test_i51(i51 %a) {
760       %r = tail call i51 @test_i51(i51 %a);
761       ret i51 %r;
762}
763
764; CHECK: .func  (.param .b64 func_retval0)
765; CHECK-LABEL: test_i56(
766; CHECK-NEXT: .param .b64 test_i56_param_0
767; CHECK-DAG:  ld.param.u8    {{%rd[0-9]+}}, [test_i56_param_0+6];
768; CHECK-DAG:  ld.param.u16   {{%rd[0-9]+}}, [test_i56_param_0+4];
769; CHECK-DAG:  ld.param.u32   {{%rd[0-9]+}}, [test_i56_param_0];
770; CHECK:      .param .b64 param0;
771; CHECK:      st.param.b64    [param0+0], {{%rd[0-9]+}};
772; CHECK:      .param .b64 retval0;
773; CHECK:      call.uni (retval0),
774; CHECK-NEXT: test_i56,
775; CHECK:      ld.param.b64    {{%rd[0-9]+}}, [retval0+0];
776; CHECK:      st.param.b64    [func_retval0+0], {{%rd[0-9]+}};
777; CHECK-NEXT: ret;
778define i56 @test_i56(i56 %a) {
779       %r = tail call i56 @test_i56(i56 %a);
780       ret i56 %r;
781}
782
783; CHECK: .func  (.param .b64 func_retval0)
784; CHECK-LABEL: test_i57(
785; CHECK-NEXT: .param .b64 test_i57_param_0
786; CHECK:      ld.param.u64    {{%rd[0-9]+}}, [test_i57_param_0];
787; CHECK:      .param .b64 param0;
788; CHECK:      st.param.b64    [param0+0], {{%rd[0-9]+}};
789; CHECK:      .param .b64 retval0;
790; CHECK:      call.uni (retval0),
791; CHECK-NEXT: test_i57,
792; CHECK:      ld.param.b64    {{%rd[0-9]+}}, [retval0+0];
793; CHECK:      st.param.b64    [func_retval0+0], {{%rd[0-9]+}};
794; CHECK-NEXT: ret;
795define i57 @test_i57(i57 %a) {
796       %r = tail call i57 @test_i57(i57 %a);
797       ret i57 %r;
798}
799
800; CHECK: .func  (.param .b64 func_retval0)
801; CHECK-LABEL: test_i64(
802; CHECK-NEXT: .param .b64 test_i64_param_0
803; CHECK:      ld.param.u64    [[E:%rd[0-9]+]], [test_i64_param_0];
804; CHECK:      .param .b64 param0;
805; CHECK:      st.param.b64    [param0+0], [[E]];
806; CHECK:      .param .b64 retval0;
807; CHECK:      call.uni (retval0),
808; CHECK-NEXT: test_i64,
809; CHECK:      ld.param.b64    [[R:%rd[0-9]+]], [retval0+0];
810; CHECK:      st.param.b64    [func_retval0+0], [[R]];
811; CHECK-NEXT: ret;
812define i64 @test_i64(i64 %a) {
813       %r = tail call i64 @test_i64(i64 %a);
814       ret i64 %r;
815}
816
817; CHECK: .func  (.param .align 32 .b8 func_retval0[32])
818; CHECK-LABEL: test_v3i64(
819; CHECK-NEXT: .param .align 32 .b8 test_v3i64_param_0[32]
820; CHECK-DAG:  ld.param.u64     [[E2:%rd[0-9]+]], [test_v3i64_param_0+16];
821; CHECK-DAG:  ld.param.v2.u64  {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v3i64_param_0];
822; CHECK:      .param .align 32 .b8 param0[32];
823; CHECK:      st.param.v2.b64  [param0+0], {[[E0]], [[E1]]};
824; CHECK:      st.param.b64     [param0+16], [[E2]];
825; CHECK:      .param .align 32 .b8 retval0[32];
826; CHECK:      call.uni (retval0),
827; CHECK-NEXT: test_v3i64,
828; CHECK:      ld.param.v2.b64  {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0+0];
829; CHECK:      ld.param.b64     [[RE2:%rd[0-9]+]], [retval0+16];
830; CHECK-DAG:  st.param.v2.b64  [func_retval0+0], {[[RE0]], [[RE1]]};
831; CHECK-DAG:  st.param.b64     [func_retval0+16], [[RE2]];
832; CHECK-DAG:  st.param.v2.b64  [func_retval0+0], {[[RE0]], [[RE1]]};
833; CHECK-DAG:  st.param.b64     [func_retval0+16], [[RE2]];
834; CHECK-NEXT: ret;
835define <3 x i64> @test_v3i64(<3 x i64> %a) {
836       %r = tail call <3 x i64> @test_v3i64(<3 x i64> %a);
837       ret <3 x i64> %r;
838}
839
840; For i64 vector loads are limited by PTX to 2 elements.
841; CHECK: .func  (.param .align 32 .b8 func_retval0[32])
842; CHECK-LABEL: test_v4i64(
843; CHECK-NEXT: .param .align 32 .b8 test_v4i64_param_0[32]
844; CHECK-DAG:  ld.param.v2.u64  {[[E2:%rd[0-9]+]], [[E3:%rd[0-9]+]]}, [test_v4i64_param_0+16];
845; CHECK-DAG:  ld.param.v2.u64  {[[E0:%rd[0-9]+]], [[E1:%rd[0-9]+]]}, [test_v4i64_param_0];
846; CHECK:      .param .align 32 .b8 param0[32];
847; CHECK:      st.param.v2.b64  [param0+0], {[[E0]], [[E1]]};
848; CHECK:      st.param.v2.b64  [param0+16], {[[E2]], [[E3]]};
849; CHECK:      .param .align 32 .b8 retval0[32];
850; CHECK:      call.uni (retval0),
851; CHECK-NEXT: test_v4i64,
852; CHECK:      ld.param.v2.b64  {[[RE0:%rd[0-9]+]], [[RE1:%rd[0-9]+]]}, [retval0+0];
853; CHECK:      ld.param.v2.b64  {[[RE2:%rd[0-9]+]], [[RE3:%rd[0-9]+]]}, [retval0+16];
854; CHECK-DAG:  st.param.v2.b64  [func_retval0+16], {[[RE2]], [[RE3]]};
855; CHECK-DAG:  st.param.v2.b64  [func_retval0+0], {[[RE0]], [[RE1]]};
856; CHECK-NEXT: ret;
857define <4 x i64> @test_v4i64(<4 x i64> %a) {
858       %r = tail call <4 x i64> @test_v4i64(<4 x i64> %a);
859       ret <4 x i64> %r;
860}
861
862; Aggregates, on the other hand, do not get extended.
863
864; CHECK: .func  (.param .align 1 .b8 func_retval0[1])
865; CHECK-LABEL: test_s_i1(
866; CHECK-NEXT: .align 1 .b8 test_s_i1_param_0[1]
867; CHECK:      ld.param.u8 [[A:%rs[0-9]+]], [test_s_i1_param_0];
868; CHECK:      .param .align 1 .b8 param0[1];
869; CHECK:      st.param.b8    [param0+0], [[A]]
870; CHECK:      .param .align 1 .b8 retval0[1];
871; CHECK:      call.uni
872; CHECK-NEXT: test_s_i1,
873; CHECK:      ld.param.b8    [[R:%rs[0-9]+]], [retval0+0];
874; CHECK:      st.param.b8    [func_retval0+0], [[R]];
875; CHECK-NEXT: ret;
876define %s_i1 @test_s_i1(%s_i1 %a) {
877       %r = tail call %s_i1 @test_s_i1(%s_i1 %a);
878       ret %s_i1 %r;
879}
880
881; CHECK: .func  (.param .align 1 .b8 func_retval0[1])
882; CHECK-LABEL: test_s_i8(
883; CHECK-NEXT: .param .align 1 .b8 test_s_i8_param_0[1]
884; CHECK:      ld.param.u8 [[A:%rs[0-9]+]], [test_s_i8_param_0];
885; CHECK:      .param .align 1 .b8 param0[1];
886; CHECK:      st.param.b8    [param0+0], [[A]]
887; CHECK:      .param .align 1 .b8 retval0[1];
888; CHECK:      call.uni
889; CHECK-NEXT: test_s_i8,
890; CHECK:      ld.param.b8    [[R:%rs[0-9]+]], [retval0+0];
891; CHECK:      st.param.b8    [func_retval0+0], [[R]];
892; CHECK-NEXT: ret;
893define %s_i8 @test_s_i8(%s_i8 %a) {
894       %r = tail call %s_i8 @test_s_i8(%s_i8 %a);
895       ret %s_i8 %r;
896}
897
898; CHECK: .func  (.param .align 2 .b8 func_retval0[2])
899; CHECK-LABEL: test_s_i16(
900; CHECK-NEXT: .param .align 2 .b8 test_s_i16_param_0[2]
901; CHECK:      ld.param.u16 [[A:%rs[0-9]+]], [test_s_i16_param_0];
902; CHECK:      .param .align 2 .b8 param0[2];
903; CHECK:      st.param.b16    [param0+0], [[A]]
904; CHECK:      .param .align 2 .b8 retval0[2];
905; CHECK:      call.uni
906; CHECK-NEXT: test_s_i16,
907; CHECK:      ld.param.b16    [[R:%rs[0-9]+]], [retval0+0];
908; CHECK:      st.param.b16    [func_retval0+0], [[R]];
909; CHECK-NEXT: ret;
910define %s_i16 @test_s_i16(%s_i16 %a) {
911       %r = tail call %s_i16 @test_s_i16(%s_i16 %a);
912       ret %s_i16 %r;
913}
914
915; CHECK: .func  (.param .align 2 .b8 func_retval0[2])
916; CHECK-LABEL: test_s_f16(
917; CHECK-NEXT: .param .align 2 .b8 test_s_f16_param_0[2]
918; CHECK:      ld.param.b16 [[A:%h[0-9]+]], [test_s_f16_param_0];
919; CHECK:      .param .align 2 .b8 param0[2];
920; CHECK:      st.param.b16    [param0+0], [[A]]
921; CHECK:      .param .align 2 .b8 retval0[2];
922; CHECK:      call.uni
923; CHECK-NEXT: test_s_f16,
924; CHECK:      ld.param.b16    [[R:%h[0-9]+]], [retval0+0];
925; CHECK:      st.param.b16    [func_retval0+0], [[R]];
926; CHECK-NEXT: ret;
927define %s_f16 @test_s_f16(%s_f16 %a) {
928       %r = tail call %s_f16 @test_s_f16(%s_f16 %a);
929       ret %s_f16 %r;
930}
931
932; CHECK: .func  (.param .align 4 .b8 func_retval0[4])
933; CHECK-LABEL: test_s_i32(
934; CHECK-NEXT: .param .align 4 .b8 test_s_i32_param_0[4]
935; CHECK:      ld.param.u32    [[E:%r[0-9]+]], [test_s_i32_param_0];
936; CHECK:      .param .align 4 .b8 param0[4]
937; CHECK:      st.param.b32    [param0+0], [[E]];
938; CHECK:      .param .align 4 .b8 retval0[4];
939; CHECK:      call.uni (retval0),
940; CHECK-NEXT: test_s_i32,
941; CHECK:      ld.param.b32    [[R:%r[0-9]+]], [retval0+0];
942; CHECK:      st.param.b32    [func_retval0+0], [[R]];
943; CHECK-NEXT: ret;
944define %s_i32 @test_s_i32(%s_i32 %a) {
945       %r = tail call %s_i32 @test_s_i32(%s_i32 %a);
946       ret %s_i32 %r;
947}
948
949; CHECK: .func  (.param .align 4 .b8 func_retval0[4])
950; CHECK-LABEL: test_s_f32(
951; CHECK-NEXT: .param .align 4 .b8 test_s_f32_param_0[4]
952; CHECK:      ld.param.f32    [[E:%f[0-9]+]], [test_s_f32_param_0];
953; CHECK:      .param .align 4 .b8 param0[4]
954; CHECK:      st.param.f32    [param0+0], [[E]];
955; CHECK:      .param .align 4 .b8 retval0[4];
956; CHECK:      call.uni (retval0),
957; CHECK-NEXT: test_s_f32,
958; CHECK:      ld.param.f32    [[R:%f[0-9]+]], [retval0+0];
959; CHECK:      st.param.f32    [func_retval0+0], [[R]];
960; CHECK-NEXT: ret;
961define %s_f32 @test_s_f32(%s_f32 %a) {
962       %r = tail call %s_f32 @test_s_f32(%s_f32 %a);
963       ret %s_f32 %r;
964}
965
966; CHECK: .func  (.param .align 8 .b8 func_retval0[8])
967; CHECK-LABEL: test_s_i64(
968; CHECK-NEXT: .param .align 8 .b8 test_s_i64_param_0[8]
969; CHECK:      ld.param.u64    [[E:%rd[0-9]+]], [test_s_i64_param_0];
970; CHECK:      .param .align 8 .b8 param0[8];
971; CHECK:      st.param.b64    [param0+0], [[E]];
972; CHECK:      .param .align 8 .b8 retval0[8];
973; CHECK:      call.uni (retval0),
974; CHECK-NEXT: test_s_i64,
975; CHECK:      ld.param.b64    [[R:%rd[0-9]+]], [retval0+0];
976; CHECK:      st.param.b64    [func_retval0+0], [[R]];
977; CHECK-NEXT: ret;
978define %s_i64 @test_s_i64(%s_i64 %a) {
979       %r = tail call %s_i64 @test_s_i64(%s_i64 %a);
980       ret %s_i64 %r;
981}
982
983; Fields that have different types, but identical sizes are not vectorized.
984; CHECK: .func  (.param .align 8 .b8 func_retval0[24])
985; CHECK-LABEL: test_s_i32f32(
986; CHECK:        .param .align 8 .b8 test_s_i32f32_param_0[24]
987; CHECK-DAG:    ld.param.u64    [[E4:%rd[0-9]+]], [test_s_i32f32_param_0+16];
988; CHECK-DAG:    ld.param.f32    [[E3:%f[0-9]+]], [test_s_i32f32_param_0+12];
989; CHECK-DAG:    ld.param.u32    [[E2:%r[0-9]+]], [test_s_i32f32_param_0+8];
990; CHECK-DAG:    ld.param.f32    [[E1:%f[0-9]+]], [test_s_i32f32_param_0+4];
991; CHECK-DAG:    ld.param.u32    [[E0:%r[0-9]+]], [test_s_i32f32_param_0];
992; CHECK:        .param .align 8 .b8 param0[24];
993; CHECK-DAG:    st.param.b32    [param0+0], [[E0]];
994; CHECK-DAG:    st.param.f32    [param0+4], [[E1]];
995; CHECK-DAG:    st.param.b32    [param0+8], [[E2]];
996; CHECK-DAG:    st.param.f32    [param0+12], [[E3]];
997; CHECK-DAG:    st.param.b64    [param0+16], [[E4]];
998; CHECK:        .param .align 8 .b8 retval0[24];
999; CHECK:        call.uni (retval0),
1000; CHECK-NEXT:   test_s_i32f32,
1001; CHECK-DAG:    ld.param.b32    [[RE0:%r[0-9]+]], [retval0+0];
1002; CHECK-DAG:    ld.param.f32    [[RE1:%f[0-9]+]], [retval0+4];
1003; CHECK-DAG:    ld.param.b32    [[RE2:%r[0-9]+]], [retval0+8];
1004; CHECK-DAG:    ld.param.f32    [[RE3:%f[0-9]+]], [retval0+12];
1005; CHECK-DAG:    ld.param.b64    [[RE4:%rd[0-9]+]], [retval0+16];
1006; CHECK-DAG:    st.param.b32    [func_retval0+0], [[RE0]];
1007; CHECK-DAG:    st.param.f32    [func_retval0+4], [[RE1]];
1008; CHECK-DAG:    st.param.b32    [func_retval0+8], [[RE2]];
1009; CHECK-DAG:    st.param.f32    [func_retval0+12], [[RE3]];
1010; CHECK-DAG:    st.param.b64    [func_retval0+16], [[RE4]];
1011; CHECK:        ret;
1012define %s_i32f32 @test_s_i32f32(%s_i32f32 %a) {
1013       %r = tail call %s_i32f32 @test_s_i32f32(%s_i32f32 %a);
1014       ret %s_i32f32 %r;
1015}
1016
1017; We do vectorize consecutive fields with matching types.
1018; CHECK:.visible .func  (.param .align 8 .b8 func_retval0[24])
1019; CHECK-LABEL: test_s_i32x4(
1020; CHECK:        .param .align 8 .b8 test_s_i32x4_param_0[24]
1021; CHECK-DAG:    ld.param.u64    [[RD1:%rd[0-9]+]], [test_s_i32x4_param_0+16];
1022; CHECK-DAG:    ld.param.v2.u32 {[[E2:%r[0-9]+]], [[E3:%r[0-9]+]]}, [test_s_i32x4_param_0+8];
1023; CHECK-DAG:    ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i32x4_param_0];
1024; CHECK:        .param .align 8 .b8 param0[24];
1025; CHECK:        st.param.v2.b32 [param0+0], {[[E0]], [[E1]]};
1026; CHECK:        st.param.v2.b32 [param0+8], {[[E2]], [[E3]]};
1027; CHECK:        st.param.b64    [param0+16], [[E4]];
1028; CHECK:        .param .align 8 .b8 retval0[24];
1029; CHECK:        call.uni (retval0),
1030; CHECK-NEXT:   test_s_i32x4,
1031; CHECK:        ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0];
1032; CHECK:        ld.param.v2.b32 {[[RE2:%r[0-9]+]], [[RE3:%r[0-9]+]]}, [retval0+8];
1033; CHECK:        ld.param.b64    [[RE4:%rd[0-9]+]], [retval0+16];
1034; CHECK-DAG:    st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]};
1035; CHECK-DAG:    st.param.v2.b32 [func_retval0+8], {[[RE2]], [[RE3]]};
1036; CHECK-DAG:    st.param.b64    [func_retval0+16], [[RE4]];
1037; CHECK:        ret;
1038
1039define %s_i32x4 @test_s_i32x4(%s_i32x4 %a) {
1040       %r = tail call %s_i32x4 @test_s_i32x4(%s_i32x4 %a);
1041       ret %s_i32x4 %r;
1042}
1043
1044; CHECK:.visible .func  (.param .align 8 .b8 func_retval0[32])
1045; CHECK-LABEL: test_s_i1i32x4(
1046; CHECK:        .param .align 8 .b8 test_s_i1i32x4_param_0[32]
1047; CHECK:        ld.param.u64    [[E5:%rd[0-9]+]], [test_s_i1i32x4_param_0+24];
1048; CHECK:        ld.param.u32    [[E4:%r[0-9]+]], [test_s_i1i32x4_param_0+16];
1049; CHECK:        ld.param.u32    [[E3:%r[0-9]+]], [test_s_i1i32x4_param_0+12];
1050; CHECK:        ld.param.u8     [[E2:%rs[0-9]+]], [test_s_i1i32x4_param_0+8];
1051; CHECK:        ld.param.v2.u32         {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_i1i32x4_param_0];
1052; CHECK:        .param .align 8 .b8 param0[32];
1053; CHECK:        st.param.v2.b32 [param0+0], {[[E0]], [[E1]]};
1054; CHECK:        st.param.b8     [param0+8], [[E2]];
1055; CHECK:        st.param.b32    [param0+12], [[E3]];
1056; CHECK:        st.param.b32    [param0+16], [[E4]];
1057; CHECK:        st.param.b64    [param0+24], [[E5]];
1058; CHECK:        .param .align 8 .b8 retval0[32];
1059; CHECK:        call.uni (retval0),
1060; CHECK:        test_s_i1i32x4,
1061; CHECK:        (
1062; CHECK:        param0
1063; CHECK:        );
1064; CHECK:        ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0];
1065; CHECK:        ld.param.b8     [[RE2:%rs[0-9]+]], [retval0+8];
1066; CHECK:        ld.param.b32    [[RE3:%r[0-9]+]], [retval0+12];
1067; CHECK:        ld.param.b32    [[RE4:%r[0-9]+]], [retval0+16];
1068; CHECK:        ld.param.b64    [[RE5:%rd[0-9]+]], [retval0+24];
1069; CHECK:        st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]};
1070; CHECK:        st.param.b8     [func_retval0+8], [[RE2]];
1071; CHECK:        st.param.b32    [func_retval0+12], [[RE3]];
1072; CHECK:        st.param.b32    [func_retval0+16], [[RE4]];
1073; CHECK:        st.param.b64    [func_retval0+24], [[RE5]];
1074; CHECK:        ret;
1075
1076define %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a) {
1077       %r = tail call %s_i8i32x4 @test_s_i1i32x4(%s_i8i32x4 %a);
1078       ret %s_i8i32x4 %r;
1079}
1080
1081; -- All loads/stores from parameters aligned by one must be done one
1082; -- byte at a time.
1083; CHECK:.visible .func  (.param .align 1 .b8 func_retval0[25])
1084; CHECK-LABEL: test_s_i1i32x4p(
1085; CHECK-DAG:        .param .align 1 .b8 test_s_i1i32x4p_param_0[25]
1086; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+24];
1087; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+23];
1088; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+22];
1089; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+21];
1090; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+20];
1091; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+19];
1092; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+18];
1093; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+17];
1094; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+16];
1095; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+15];
1096; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+14];
1097; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+13];
1098; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+12];
1099; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+11];
1100; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+10];
1101; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+9];
1102; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+8];
1103; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+7];
1104; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+6];
1105; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+5];
1106; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+4];
1107; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+3];
1108; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+2];
1109; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0+1];
1110; CHECK-DAG:        ld.param.u8     %r{{.*}}, [test_s_i1i32x4p_param_0];
1111; --- TODO
1112; --- Unaligned parameter store/ return value load is broken in both nvcc
1113; --- and llvm and needs to be fixed.
1114; CHECK:        .param .align 1 .b8 param0[25];
1115; CHECK-DAG:        st.param.b32    [param0+0],
1116; CHECK-DAG:        st.param.b32    [param0+4],
1117; CHECK-DAG:        st.param.b8     [param0+8],
1118; CHECK-DAG:        st.param.b32    [param0+9],
1119; CHECK-DAG:        st.param.b32    [param0+13],
1120; CHECK-DAG:        st.param.b64    [param0+17],
1121; CHECK:            .param .align 1 .b8 retval0[25];
1122; CHECK:            call.uni (retval0),
1123; CHECK-NEXT:       test_s_i1i32x4p,
1124; CHECK-DAG:        ld.param.b32    %r41, [retval0+0];
1125; CHECK-DAG:        ld.param.b32    %r42, [retval0+4];
1126; CHECK-DAG:        ld.param.b8     %rs2, [retval0+8];
1127; CHECK-DAG:        ld.param.b32    %r43, [retval0+9];
1128; CHECK-DAG:        ld.param.b32    %r44, [retval0+13];
1129; CHECK-DAG:        ld.param.b64    %rd23, [retval0+17];
1130; CHECK-DAG:        st.param.b32    [func_retval0+0],
1131; CHECK-DAG:        st.param.b32    [func_retval0+4],
1132; CHECK-DAG:        st.param.b8     [func_retval0+8],
1133; CHECK-DAG:        st.param.b32    [func_retval0+9],
1134; CHECK-DAG:        st.param.b32    [func_retval0+13],
1135; CHECK-DAG:        st.param.b64    [func_retval0+17],
1136
1137define %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a) {
1138       %r = tail call %s_i8i32x4p @test_s_i1i32x4p(%s_i8i32x4p %a);
1139       ret %s_i8i32x4p %r;
1140}
1141
1142; Check that we can vectorize loads that span multiple aggregate fields.
1143; CHECK:.visible .func  (.param .align 16 .b8 func_retval0[80])
1144; CHECK-LABEL: test_s_crossfield(
1145; CHECK:        .param .align 16 .b8 test_s_crossfield_param_0[80]
1146; CHECK:        ld.param.u32    [[E15:%r[0-9]+]], [test_s_crossfield_param_0+64];
1147; CHECK:        ld.param.v4.u32 {[[E11:%r[0-9]+]], [[E12:%r[0-9]+]], [[E13:%r[0-9]+]], [[E14:%r[0-9]+]]}, [test_s_crossfield_param_0+48];
1148; CHECK:        ld.param.v4.u32 {[[E7:%r[0-9]+]], [[E8:%r[0-9]+]], [[E9:%r[0-9]+]], [[E10:%r[0-9]+]]}, [test_s_crossfield_param_0+32];
1149; CHECK:        ld.param.v4.u32 {[[E3:%r[0-9]+]], [[E4:%r[0-9]+]], [[E5:%r[0-9]+]], [[E6:%r[0-9]+]]}, [test_s_crossfield_param_0+16];
1150; CHECK:        ld.param.u32    [[E2:%r[0-9]+]], [test_s_crossfield_param_0+8];
1151; CHECK:        ld.param.v2.u32 {[[E0:%r[0-9]+]], [[E1:%r[0-9]+]]}, [test_s_crossfield_param_0];
1152; CHECK:        .param .align 16 .b8 param0[80];
1153; CHECK:        st.param.v2.b32 [param0+0], {[[E0]], [[E1]]};
1154; CHECK:        st.param.b32    [param0+8], [[E2]];
1155; CHECK:        st.param.v4.b32 [param0+16], {[[E3]], [[E4]], [[E5]], [[E6]]};
1156; CHECK:        st.param.v4.b32 [param0+32], {[[E7]], [[E8]], [[E9]], [[E10]]};
1157; CHECK:        st.param.v4.b32 [param0+48], {[[E11]], [[E12]], [[E13]], [[E14]]};
1158; CHECK:        st.param.b32    [param0+64], [[E15]];
1159; CHECK:        .param .align 16 .b8 retval0[80];
1160; CHECK:        call.uni (retval0),
1161; CHECK:        test_s_crossfield,
1162; CHECK:        ld.param.v2.b32 {[[RE0:%r[0-9]+]], [[RE1:%r[0-9]+]]}, [retval0+0];
1163; CHECK:        ld.param.b32    [[RE2:%r[0-9]+]], [retval0+8];
1164; CHECK:        ld.param.v4.b32 {[[RE3:%r[0-9]+]], [[RE4:%r[0-9]+]], [[RE5:%r[0-9]+]], [[RE6:%r[0-9]+]]}, [retval0+16];
1165; CHECK:        ld.param.v4.b32 {[[RE7:%r[0-9]+]], [[RE8:%r[0-9]+]], [[RE9:%r[0-9]+]], [[RE10:%r[0-9]+]]}, [retval0+32];
1166; CHECK:        ld.param.v4.b32 {[[RE11:%r[0-9]+]], [[RE12:%r[0-9]+]], [[RE13:%r[0-9]+]], [[RE14:%r[0-9]+]]}, [retval0+48];
1167; CHECK:        ld.param.b32    [[RE15:%r[0-9]+]], [retval0+64];
1168; CHECK:        st.param.v2.b32 [func_retval0+0], {[[RE0]], [[RE1]]};
1169; CHECK:        st.param.b32    [func_retval0+8], [[RE2]];
1170; CHECK:        st.param.v4.b32 [func_retval0+16], {[[RE3]], [[RE4]], [[RE5]], [[RE6]]};
1171; CHECK:        st.param.v4.b32 [func_retval0+32], {[[RE7]], [[RE8]], [[RE9]], [[RE10]]};
1172; CHECK:        st.param.v4.b32 [func_retval0+48], {[[RE11]], [[RE12]], [[RE13]], [[RE14]]};
1173; CHECK:        st.param.b32    [func_retval0+64], [[RE15]];
1174; CHECK:        ret;
1175
1176define %s_crossfield @test_s_crossfield(%s_crossfield %a) {
1177       %r = tail call %s_crossfield @test_s_crossfield(%s_crossfield %a);
1178       ret %s_crossfield %r;
1179}
1180