1; RUN: llc < %s -mtriple=nvptx-unknown-unknown | FileCheck %s
2;
3; Check that parameters of a __global__ (kernel) function do not get increased
4; alignment, and no additional vectorization is performed on loads/stores with
5; that parameters.
6;
7; Test IR is a minimized version of IR generated with the following command
8; from the source code below:
9; $ clang++ -O3 --cuda-gpu-arch=sm_35 -S -emit-llvm src.cu
10;
11; ----------------------------------------------------------------------------
12; #include <stdint.h>
13;
14; struct St4x1 { uint32_t field[1]; };
15; struct St4x2 { uint32_t field[2]; };
16; struct St4x3 { uint32_t field[3]; };
17; struct St4x4 { uint32_t field[4]; };
18; struct St4x5 { uint32_t field[5]; };
19; struct St4x6 { uint32_t field[6]; };
20; struct St4x7 { uint32_t field[7]; };
21; struct St4x8 { uint32_t field[8]; };
22; struct St8x1 { uint64_t field[1]; };
23; struct St8x2 { uint64_t field[2]; };
24; struct St8x3 { uint64_t field[3]; };
25; struct St8x4 { uint64_t field[4]; };
26;
27; #define DECLARE_FUNCTION(StName)                                    \
28; static __global__  __attribute__((noinline))                        \
29; void foo_##StName(struct StName in, struct StName* ret) {           \
30;   const unsigned size = sizeof(ret->field) / sizeof(*ret->field);   \
31;   for (unsigned i = 0; i != size; ++i)                              \
32;     ret->field[i] = in.field[i];                                    \
33; }                                                                   \
34;
35; DECLARE_FUNCTION(St4x1)
36; DECLARE_FUNCTION(St4x2)
37; DECLARE_FUNCTION(St4x3)
38; DECLARE_FUNCTION(St4x4)
39; DECLARE_FUNCTION(St4x5)
40; DECLARE_FUNCTION(St4x6)
41; DECLARE_FUNCTION(St4x7)
42; DECLARE_FUNCTION(St4x8)
43; DECLARE_FUNCTION(St8x1)
44; DECLARE_FUNCTION(St8x2)
45; DECLARE_FUNCTION(St8x3)
46; DECLARE_FUNCTION(St8x4)
47; ----------------------------------------------------------------------------
48
49%struct.St4x1 = type { [1 x i32] }
50%struct.St4x2 = type { [2 x i32] }
51%struct.St4x3 = type { [3 x i32] }
52%struct.St4x4 = type { [4 x i32] }
53%struct.St4x5 = type { [5 x i32] }
54%struct.St4x6 = type { [6 x i32] }
55%struct.St4x7 = type { [7 x i32] }
56%struct.St4x8 = type { [8 x i32] }
57%struct.St8x1 = type { [1 x i64] }
58%struct.St8x2 = type { [2 x i64] }
59%struct.St8x3 = type { [3 x i64] }
60%struct.St8x4 = type { [4 x i64] }
61
62define dso_local void @foo_St4x1(%struct.St4x1* nocapture noundef readonly byval(%struct.St4x1) align 4 %in, %struct.St4x1* nocapture noundef writeonly %ret) {
63  ; CHECK-LABEL: .visible .func foo_St4x1(
64  ; CHECK:               .param .align 4 .b8 foo_St4x1_param_0[4],
65  ; CHECK:               .param .b32 foo_St4x1_param_1
66  ; CHECK:       )
67  ; CHECK:       ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x1_param_1];
68  ; CHECK:       ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x1_param_0];
69  ; CHECK:       st.u32  [[[R1]]], [[R2]];
70  ; CHECK:       ret;
71  %arrayidx = getelementptr inbounds %struct.St4x1, %struct.St4x1* %in, i64 0, i32 0, i64 0
72  %1 = load i32, i32* %arrayidx, align 4
73  %arrayidx3 = getelementptr inbounds %struct.St4x1, %struct.St4x1* %ret, i64 0, i32 0, i64 0
74  store i32 %1, i32* %arrayidx3, align 4
75  ret void
76}
77
78define dso_local void @foo_St4x2(%struct.St4x2* nocapture noundef readonly byval(%struct.St4x2) align 4 %in, %struct.St4x2* nocapture noundef writeonly %ret) {
79  ; CHECK-LABEL: .visible .func foo_St4x2(
80  ; CHECK:               .param .align 4 .b8 foo_St4x2_param_0[8],
81  ; CHECK:               .param .b32 foo_St4x2_param_1
82  ; CHECK:       )
83  ; CHECK:       ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x2_param_1];
84  ; CHECK:       ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x2_param_0];
85  ; CHECK:       st.u32  [[[R1]]], [[R2]];
86  ; CHECK:       ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x2_param_0+4];
87  ; CHECK:       st.u32  [[[R1]]+4], [[R3]];
88  ; CHECK:       ret;
89  %arrayidx = getelementptr inbounds %struct.St4x2, %struct.St4x2* %in, i64 0, i32 0, i64 0
90  %1 = load i32, i32* %arrayidx, align 4
91  %arrayidx3 = getelementptr inbounds %struct.St4x2, %struct.St4x2* %ret, i64 0, i32 0, i64 0
92  store i32 %1, i32* %arrayidx3, align 4
93  %arrayidx.1 = getelementptr inbounds %struct.St4x2, %struct.St4x2* %in, i64 0, i32 0, i64 1
94  %2 = load i32, i32* %arrayidx.1, align 4
95  %arrayidx3.1 = getelementptr inbounds %struct.St4x2, %struct.St4x2* %ret, i64 0, i32 0, i64 1
96  store i32 %2, i32* %arrayidx3.1, align 4
97  ret void
98}
99
100define dso_local void @foo_St4x3(%struct.St4x3* nocapture noundef readonly byval(%struct.St4x3) align 4 %in, %struct.St4x3* nocapture noundef writeonly %ret) {
101  ; CHECK-LABEL: .visible .func foo_St4x3(
102  ; CHECK:               .param .align 4 .b8 foo_St4x3_param_0[12],
103  ; CHECK:               .param .b32 foo_St4x3_param_1
104  ; CHECK:       )
105  ; CHECK:       ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x3_param_1];
106  ; CHECK:       ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x3_param_0];
107  ; CHECK:       st.u32  [[[R1]]], [[R2]];
108  ; CHECK:       ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x3_param_0+4];
109  ; CHECK:       st.u32  [[[R1]]+4], [[R3]];
110  ; CHECK:       ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x3_param_0+8];
111  ; CHECK:       st.u32  [[[R1]]+8], [[R4]];
112  ; CHECK:       ret;
113  %arrayidx = getelementptr inbounds %struct.St4x3, %struct.St4x3* %in, i64 0, i32 0, i64 0
114  %1 = load i32, i32* %arrayidx, align 4
115  %arrayidx3 = getelementptr inbounds %struct.St4x3, %struct.St4x3* %ret, i64 0, i32 0, i64 0
116  store i32 %1, i32* %arrayidx3, align 4
117  %arrayidx.1 = getelementptr inbounds %struct.St4x3, %struct.St4x3* %in, i64 0, i32 0, i64 1
118  %2 = load i32, i32* %arrayidx.1, align 4
119  %arrayidx3.1 = getelementptr inbounds %struct.St4x3, %struct.St4x3* %ret, i64 0, i32 0, i64 1
120  store i32 %2, i32* %arrayidx3.1, align 4
121  %arrayidx.2 = getelementptr inbounds %struct.St4x3, %struct.St4x3* %in, i64 0, i32 0, i64 2
122  %3 = load i32, i32* %arrayidx.2, align 4
123  %arrayidx3.2 = getelementptr inbounds %struct.St4x3, %struct.St4x3* %ret, i64 0, i32 0, i64 2
124  store i32 %3, i32* %arrayidx3.2, align 4
125  ret void
126}
127
128define dso_local void @foo_St4x4(%struct.St4x4* nocapture noundef readonly byval(%struct.St4x4) align 4 %in, %struct.St4x4* nocapture noundef writeonly %ret) {
129  ; CHECK-LABEL: .visible .func foo_St4x4(
130  ; CHECK:               .param .align 4 .b8 foo_St4x4_param_0[16],
131  ; CHECK:               .param .b32 foo_St4x4_param_1
132  ; CHECK:       )
133  ; CHECK:       ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x4_param_1];
134  ; CHECK:       ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x4_param_0];
135  ; CHECK:       st.u32  [[[R1]]], [[R2]];
136  ; CHECK:       ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x4_param_0+4];
137  ; CHECK:       st.u32  [[[R1]]+4], [[R3]];
138  ; CHECK:       ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x4_param_0+8];
139  ; CHECK:       st.u32  [[[R1]]+8], [[R4]];
140  ; CHECK:       ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x4_param_0+12];
141  ; CHECK:       st.u32  [[[R1]]+12], [[R5]];
142  ; CHECK:       ret;
143  %arrayidx = getelementptr inbounds %struct.St4x4, %struct.St4x4* %in, i64 0, i32 0, i64 0
144  %1 = load i32, i32* %arrayidx, align 4
145  %arrayidx3 = getelementptr inbounds %struct.St4x4, %struct.St4x4* %ret, i64 0, i32 0, i64 0
146  store i32 %1, i32* %arrayidx3, align 4
147  %arrayidx.1 = getelementptr inbounds %struct.St4x4, %struct.St4x4* %in, i64 0, i32 0, i64 1
148  %2 = load i32, i32* %arrayidx.1, align 4
149  %arrayidx3.1 = getelementptr inbounds %struct.St4x4, %struct.St4x4* %ret, i64 0, i32 0, i64 1
150  store i32 %2, i32* %arrayidx3.1, align 4
151  %arrayidx.2 = getelementptr inbounds %struct.St4x4, %struct.St4x4* %in, i64 0, i32 0, i64 2
152  %3 = load i32, i32* %arrayidx.2, align 4
153  %arrayidx3.2 = getelementptr inbounds %struct.St4x4, %struct.St4x4* %ret, i64 0, i32 0, i64 2
154  store i32 %3, i32* %arrayidx3.2, align 4
155  %arrayidx.3 = getelementptr inbounds %struct.St4x4, %struct.St4x4* %in, i64 0, i32 0, i64 3
156  %4 = load i32, i32* %arrayidx.3, align 4
157  %arrayidx3.3 = getelementptr inbounds %struct.St4x4, %struct.St4x4* %ret, i64 0, i32 0, i64 3
158  store i32 %4, i32* %arrayidx3.3, align 4
159  ret void
160}
161
162define dso_local void @foo_St4x5(%struct.St4x5* nocapture noundef readonly byval(%struct.St4x5) align 4 %in, %struct.St4x5* nocapture noundef writeonly %ret) {
163  ; CHECK-LABEL: .visible .func foo_St4x5(
164  ; CHECK:               .param .align 4 .b8 foo_St4x5_param_0[20],
165  ; CHECK:               .param .b32 foo_St4x5_param_1
166  ; CHECK:       )
167  ; CHECK:       ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x5_param_1];
168  ; CHECK:       ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x5_param_0];
169  ; CHECK:       st.u32  [[[R1]]], [[R2]];
170  ; CHECK:       ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x5_param_0+4];
171  ; CHECK:       st.u32  [[[R1]]+4], [[R3]];
172  ; CHECK:       ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x5_param_0+8];
173  ; CHECK:       st.u32  [[[R1]]+8], [[R4]];
174  ; CHECK:       ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x5_param_0+12];
175  ; CHECK:       st.u32  [[[R1]]+12], [[R5]];
176  ; CHECK:       ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x5_param_0+16];
177  ; CHECK:       st.u32  [[[R1]]+16], [[R6]];
178  ; CHECK:       ret;
179  %arrayidx = getelementptr inbounds %struct.St4x5, %struct.St4x5* %in, i64 0, i32 0, i64 0
180  %1 = load i32, i32* %arrayidx, align 4
181  %arrayidx3 = getelementptr inbounds %struct.St4x5, %struct.St4x5* %ret, i64 0, i32 0, i64 0
182  store i32 %1, i32* %arrayidx3, align 4
183  %arrayidx.1 = getelementptr inbounds %struct.St4x5, %struct.St4x5* %in, i64 0, i32 0, i64 1
184  %2 = load i32, i32* %arrayidx.1, align 4
185  %arrayidx3.1 = getelementptr inbounds %struct.St4x5, %struct.St4x5* %ret, i64 0, i32 0, i64 1
186  store i32 %2, i32* %arrayidx3.1, align 4
187  %arrayidx.2 = getelementptr inbounds %struct.St4x5, %struct.St4x5* %in, i64 0, i32 0, i64 2
188  %3 = load i32, i32* %arrayidx.2, align 4
189  %arrayidx3.2 = getelementptr inbounds %struct.St4x5, %struct.St4x5* %ret, i64 0, i32 0, i64 2
190  store i32 %3, i32* %arrayidx3.2, align 4
191  %arrayidx.3 = getelementptr inbounds %struct.St4x5, %struct.St4x5* %in, i64 0, i32 0, i64 3
192  %4 = load i32, i32* %arrayidx.3, align 4
193  %arrayidx3.3 = getelementptr inbounds %struct.St4x5, %struct.St4x5* %ret, i64 0, i32 0, i64 3
194  store i32 %4, i32* %arrayidx3.3, align 4
195  %arrayidx.4 = getelementptr inbounds %struct.St4x5, %struct.St4x5* %in, i64 0, i32 0, i64 4
196  %5 = load i32, i32* %arrayidx.4, align 4
197  %arrayidx3.4 = getelementptr inbounds %struct.St4x5, %struct.St4x5* %ret, i64 0, i32 0, i64 4
198  store i32 %5, i32* %arrayidx3.4, align 4
199  ret void
200}
201
202define dso_local void @foo_St4x6(%struct.St4x6* nocapture noundef readonly byval(%struct.St4x6) align 4 %in, %struct.St4x6* nocapture noundef writeonly %ret) {
203  ; CHECK-LABEL: .visible .func foo_St4x6(
204  ; CHECK:               .param .align 4 .b8 foo_St4x6_param_0[24],
205  ; CHECK:               .param .b32 foo_St4x6_param_1
206  ; CHECK:       )
207  ; CHECK:       ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x6_param_1];
208  ; CHECK:       ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x6_param_0];
209  ; CHECK:       st.u32  [[[R1]]], [[R2]];
210  ; CHECK:       ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x6_param_0+4];
211  ; CHECK:       st.u32  [[[R1]]+4], [[R3]];
212  ; CHECK:       ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x6_param_0+8];
213  ; CHECK:       st.u32  [[[R1]]+8], [[R4]];
214  ; CHECK:       ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x6_param_0+12];
215  ; CHECK:       st.u32  [[[R1]]+12], [[R5]];
216  ; CHECK:       ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x6_param_0+16];
217  ; CHECK:       st.u32  [[[R1]]+16], [[R6]];
218  ; CHECK:       ld.param.u32 [[R7:%r[0-9]+]], [foo_St4x6_param_0+20];
219  ; CHECK:       st.u32  [[[R1]]+20], [[R7]];
220  ; CHECK:       ret;
221  %arrayidx = getelementptr inbounds %struct.St4x6, %struct.St4x6* %in, i64 0, i32 0, i64 0
222  %1 = load i32, i32* %arrayidx, align 4
223  %arrayidx3 = getelementptr inbounds %struct.St4x6, %struct.St4x6* %ret, i64 0, i32 0, i64 0
224  store i32 %1, i32* %arrayidx3, align 4
225  %arrayidx.1 = getelementptr inbounds %struct.St4x6, %struct.St4x6* %in, i64 0, i32 0, i64 1
226  %2 = load i32, i32* %arrayidx.1, align 4
227  %arrayidx3.1 = getelementptr inbounds %struct.St4x6, %struct.St4x6* %ret, i64 0, i32 0, i64 1
228  store i32 %2, i32* %arrayidx3.1, align 4
229  %arrayidx.2 = getelementptr inbounds %struct.St4x6, %struct.St4x6* %in, i64 0, i32 0, i64 2
230  %3 = load i32, i32* %arrayidx.2, align 4
231  %arrayidx3.2 = getelementptr inbounds %struct.St4x6, %struct.St4x6* %ret, i64 0, i32 0, i64 2
232  store i32 %3, i32* %arrayidx3.2, align 4
233  %arrayidx.3 = getelementptr inbounds %struct.St4x6, %struct.St4x6* %in, i64 0, i32 0, i64 3
234  %4 = load i32, i32* %arrayidx.3, align 4
235  %arrayidx3.3 = getelementptr inbounds %struct.St4x6, %struct.St4x6* %ret, i64 0, i32 0, i64 3
236  store i32 %4, i32* %arrayidx3.3, align 4
237  %arrayidx.4 = getelementptr inbounds %struct.St4x6, %struct.St4x6* %in, i64 0, i32 0, i64 4
238  %5 = load i32, i32* %arrayidx.4, align 4
239  %arrayidx3.4 = getelementptr inbounds %struct.St4x6, %struct.St4x6* %ret, i64 0, i32 0, i64 4
240  store i32 %5, i32* %arrayidx3.4, align 4
241  %arrayidx.5 = getelementptr inbounds %struct.St4x6, %struct.St4x6* %in, i64 0, i32 0, i64 5
242  %6 = load i32, i32* %arrayidx.5, align 4
243  %arrayidx3.5 = getelementptr inbounds %struct.St4x6, %struct.St4x6* %ret, i64 0, i32 0, i64 5
244  store i32 %6, i32* %arrayidx3.5, align 4
245  ret void
246}
247
248define dso_local void @foo_St4x7(%struct.St4x7* nocapture noundef readonly byval(%struct.St4x7) align 4 %in, %struct.St4x7* nocapture noundef writeonly %ret) {
249  ; CHECK-LABEL: .visible .func foo_St4x7(
250  ; CHECK:               .param .align 4 .b8 foo_St4x7_param_0[28],
251  ; CHECK:               .param .b32 foo_St4x7_param_1
252  ; CHECK:       )
253  ; CHECK:       ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x7_param_1];
254  ; CHECK:       ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x7_param_0];
255  ; CHECK:       st.u32  [[[R1]]], [[R2]];
256  ; CHECK:       ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x7_param_0+4];
257  ; CHECK:       st.u32  [[[R1]]+4], [[R3]];
258  ; CHECK:       ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x7_param_0+8];
259  ; CHECK:       st.u32  [[[R1]]+8], [[R4]];
260  ; CHECK:       ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x7_param_0+12];
261  ; CHECK:       st.u32  [[[R1]]+12], [[R5]];
262  ; CHECK:       ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x7_param_0+16];
263  ; CHECK:       st.u32  [[[R1]]+16], [[R6]];
264  ; CHECK:       ld.param.u32 [[R7:%r[0-9]+]], [foo_St4x7_param_0+20];
265  ; CHECK:       st.u32  [[[R1]]+20], [[R7]];
266  ; CHECK:       ld.param.u32 [[R8:%r[0-9]+]], [foo_St4x7_param_0+24];
267  ; CHECK:       st.u32  [[[R1]]+24], [[R8]];
268  ; CHECK:       ret;
269  %arrayidx = getelementptr inbounds %struct.St4x7, %struct.St4x7* %in, i64 0, i32 0, i64 0
270  %1 = load i32, i32* %arrayidx, align 4
271  %arrayidx3 = getelementptr inbounds %struct.St4x7, %struct.St4x7* %ret, i64 0, i32 0, i64 0
272  store i32 %1, i32* %arrayidx3, align 4
273  %arrayidx.1 = getelementptr inbounds %struct.St4x7, %struct.St4x7* %in, i64 0, i32 0, i64 1
274  %2 = load i32, i32* %arrayidx.1, align 4
275  %arrayidx3.1 = getelementptr inbounds %struct.St4x7, %struct.St4x7* %ret, i64 0, i32 0, i64 1
276  store i32 %2, i32* %arrayidx3.1, align 4
277  %arrayidx.2 = getelementptr inbounds %struct.St4x7, %struct.St4x7* %in, i64 0, i32 0, i64 2
278  %3 = load i32, i32* %arrayidx.2, align 4
279  %arrayidx3.2 = getelementptr inbounds %struct.St4x7, %struct.St4x7* %ret, i64 0, i32 0, i64 2
280  store i32 %3, i32* %arrayidx3.2, align 4
281  %arrayidx.3 = getelementptr inbounds %struct.St4x7, %struct.St4x7* %in, i64 0, i32 0, i64 3
282  %4 = load i32, i32* %arrayidx.3, align 4
283  %arrayidx3.3 = getelementptr inbounds %struct.St4x7, %struct.St4x7* %ret, i64 0, i32 0, i64 3
284  store i32 %4, i32* %arrayidx3.3, align 4
285  %arrayidx.4 = getelementptr inbounds %struct.St4x7, %struct.St4x7* %in, i64 0, i32 0, i64 4
286  %5 = load i32, i32* %arrayidx.4, align 4
287  %arrayidx3.4 = getelementptr inbounds %struct.St4x7, %struct.St4x7* %ret, i64 0, i32 0, i64 4
288  store i32 %5, i32* %arrayidx3.4, align 4
289  %arrayidx.5 = getelementptr inbounds %struct.St4x7, %struct.St4x7* %in, i64 0, i32 0, i64 5
290  %6 = load i32, i32* %arrayidx.5, align 4
291  %arrayidx3.5 = getelementptr inbounds %struct.St4x7, %struct.St4x7* %ret, i64 0, i32 0, i64 5
292  store i32 %6, i32* %arrayidx3.5, align 4
293  %arrayidx.6 = getelementptr inbounds %struct.St4x7, %struct.St4x7* %in, i64 0, i32 0, i64 6
294  %7 = load i32, i32* %arrayidx.6, align 4
295  %arrayidx3.6 = getelementptr inbounds %struct.St4x7, %struct.St4x7* %ret, i64 0, i32 0, i64 6
296  store i32 %7, i32* %arrayidx3.6, align 4
297  ret void
298}
299
300define dso_local void @foo_St4x8(%struct.St4x8* nocapture noundef readonly byval(%struct.St4x8) align 4 %in, %struct.St4x8* nocapture noundef writeonly %ret) {
301  ; CHECK-LABEL: .visible .func foo_St4x8(
302  ; CHECK:               .param .align 4 .b8 foo_St4x8_param_0[32],
303  ; CHECK:               .param .b32 foo_St4x8_param_1
304  ; CHECK:       )
305  ; CHECK:       ld.param.u32 [[R1:%r[0-9]+]], [foo_St4x8_param_1];
306  ; CHECK:       ld.param.u32 [[R2:%r[0-9]+]], [foo_St4x8_param_0];
307  ; CHECK:       st.u32  [[[R1]]], [[R2]];
308  ; CHECK:       ld.param.u32 [[R3:%r[0-9]+]], [foo_St4x8_param_0+4];
309  ; CHECK:       st.u32  [[[R1]]+4], [[R3]];
310  ; CHECK:       ld.param.u32 [[R4:%r[0-9]+]], [foo_St4x8_param_0+8];
311  ; CHECK:       st.u32  [[[R1]]+8], [[R4]];
312  ; CHECK:       ld.param.u32 [[R5:%r[0-9]+]], [foo_St4x8_param_0+12];
313  ; CHECK:       st.u32  [[[R1]]+12], [[R5]];
314  ; CHECK:       ld.param.u32 [[R6:%r[0-9]+]], [foo_St4x8_param_0+16];
315  ; CHECK:       st.u32  [[[R1]]+16], [[R6]];
316  ; CHECK:       ld.param.u32 [[R7:%r[0-9]+]], [foo_St4x8_param_0+20];
317  ; CHECK:       st.u32  [[[R1]]+20], [[R7]];
318  ; CHECK:       ld.param.u32 [[R8:%r[0-9]+]], [foo_St4x8_param_0+24];
319  ; CHECK:       st.u32  [[[R1]]+24], [[R8]];
320  ; CHECK:       ld.param.u32 [[R9:%r[0-9]+]], [foo_St4x8_param_0+28];
321  ; CHECK:       st.u32  [[[R1]]+28], [[R9]];
322  ; CHECK:       ret;
323  %arrayidx = getelementptr inbounds %struct.St4x8, %struct.St4x8* %in, i64 0, i32 0, i64 0
324  %1 = load i32, i32* %arrayidx, align 4
325  %arrayidx3 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %ret, i64 0, i32 0, i64 0
326  store i32 %1, i32* %arrayidx3, align 4
327  %arrayidx.1 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %in, i64 0, i32 0, i64 1
328  %2 = load i32, i32* %arrayidx.1, align 4
329  %arrayidx3.1 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %ret, i64 0, i32 0, i64 1
330  store i32 %2, i32* %arrayidx3.1, align 4
331  %arrayidx.2 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %in, i64 0, i32 0, i64 2
332  %3 = load i32, i32* %arrayidx.2, align 4
333  %arrayidx3.2 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %ret, i64 0, i32 0, i64 2
334  store i32 %3, i32* %arrayidx3.2, align 4
335  %arrayidx.3 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %in, i64 0, i32 0, i64 3
336  %4 = load i32, i32* %arrayidx.3, align 4
337  %arrayidx3.3 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %ret, i64 0, i32 0, i64 3
338  store i32 %4, i32* %arrayidx3.3, align 4
339  %arrayidx.4 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %in, i64 0, i32 0, i64 4
340  %5 = load i32, i32* %arrayidx.4, align 4
341  %arrayidx3.4 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %ret, i64 0, i32 0, i64 4
342  store i32 %5, i32* %arrayidx3.4, align 4
343  %arrayidx.5 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %in, i64 0, i32 0, i64 5
344  %6 = load i32, i32* %arrayidx.5, align 4
345  %arrayidx3.5 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %ret, i64 0, i32 0, i64 5
346  store i32 %6, i32* %arrayidx3.5, align 4
347  %arrayidx.6 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %in, i64 0, i32 0, i64 6
348  %7 = load i32, i32* %arrayidx.6, align 4
349  %arrayidx3.6 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %ret, i64 0, i32 0, i64 6
350  store i32 %7, i32* %arrayidx3.6, align 4
351  %arrayidx.7 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %in, i64 0, i32 0, i64 7
352  %8 = load i32, i32* %arrayidx.7, align 4
353  %arrayidx3.7 = getelementptr inbounds %struct.St4x8, %struct.St4x8* %ret, i64 0, i32 0, i64 7
354  store i32 %8, i32* %arrayidx3.7, align 4
355  ret void
356}
357
358define dso_local void @foo_St8x1(%struct.St8x1* nocapture noundef readonly byval(%struct.St8x1) align 8 %in, %struct.St8x1* nocapture noundef writeonly %ret) {
359  ; CHECK-LABEL: .visible .func foo_St8x1(
360  ; CHECK:               .param .align 8 .b8 foo_St8x1_param_0[8],
361  ; CHECK:               .param .b32 foo_St8x1_param_1
362  ; CHECK:       )
363  ; CHECK:       ld.param.u32 [[R1:%r[0-9]+]], [foo_St8x1_param_1];
364  ; CHECK:       ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x1_param_0];
365  ; CHECK:       st.u64 [[[R1]]], [[RD1]];
366  ; CHECK:       ret;
367  %arrayidx = getelementptr inbounds %struct.St8x1, %struct.St8x1* %in, i64 0, i32 0, i64 0
368  %1 = load i64, i64* %arrayidx, align 8
369  %arrayidx3 = getelementptr inbounds %struct.St8x1, %struct.St8x1* %ret, i64 0, i32 0, i64 0
370  store i64 %1, i64* %arrayidx3, align 8
371  ret void
372}
373
374define dso_local void @foo_St8x2(%struct.St8x2* nocapture noundef readonly byval(%struct.St8x2) align 8 %in, %struct.St8x2* nocapture noundef writeonly %ret) {
375  ; CHECK-LABEL: .visible .func foo_St8x2(
376  ; CHECK:               .param .align 8 .b8 foo_St8x2_param_0[16],
377  ; CHECK:               .param .b32 foo_St8x2_param_1
378  ; CHECK:       )
379  ; CHECK:       ld.param.u32 [[R1:%r[0-9]+]], [foo_St8x2_param_1];
380  ; CHECK:       ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x2_param_0];
381  ; CHECK:       st.u64 [[[R1]]], [[RD1]];
382  ; CHECK:       ld.param.u64 [[RD2:%rd[0-9]+]], [foo_St8x2_param_0+8];
383  ; CHECK:       st.u64 [[[R1]]+8], [[RD2]];
384  ; CHECK:       ret;
385  %arrayidx = getelementptr inbounds %struct.St8x2, %struct.St8x2* %in, i64 0, i32 0, i64 0
386  %1 = load i64, i64* %arrayidx, align 8
387  %arrayidx3 = getelementptr inbounds %struct.St8x2, %struct.St8x2* %ret, i64 0, i32 0, i64 0
388  store i64 %1, i64* %arrayidx3, align 8
389  %arrayidx.1 = getelementptr inbounds %struct.St8x2, %struct.St8x2* %in, i64 0, i32 0, i64 1
390  %2 = load i64, i64* %arrayidx.1, align 8
391  %arrayidx3.1 = getelementptr inbounds %struct.St8x2, %struct.St8x2* %ret, i64 0, i32 0, i64 1
392  store i64 %2, i64* %arrayidx3.1, align 8
393  ret void
394}
395
396define dso_local void @foo_St8x3(%struct.St8x3* nocapture noundef readonly byval(%struct.St8x3) align 8 %in, %struct.St8x3* nocapture noundef writeonly %ret) {
397  ; CHECK-LABEL: .visible .func foo_St8x3(
398  ; CHECK:               .param .align 8 .b8 foo_St8x3_param_0[24],
399  ; CHECK:               .param .b32 foo_St8x3_param_1
400  ; CHECK:       )
401  ; CHECK:       ld.param.u32 [[R1:%r[0-9]+]], [foo_St8x3_param_1];
402  ; CHECK:       ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x3_param_0];
403  ; CHECK:       st.u64 [[[R1]]], [[RD1]];
404  ; CHECK:       ld.param.u64 [[RD2:%rd[0-9]+]], [foo_St8x3_param_0+8];
405  ; CHECK:       st.u64 [[[R1]]+8], [[RD2]];
406  ; CHECK:       ld.param.u64 [[RD3:%rd[0-9]+]], [foo_St8x3_param_0+16];
407  ; CHECK:       st.u64 [[[R1]]+16], [[RD3]];
408  ; CHECK:       ret;
409  %arrayidx = getelementptr inbounds %struct.St8x3, %struct.St8x3* %in, i64 0, i32 0, i64 0
410  %1 = load i64, i64* %arrayidx, align 8
411  %arrayidx3 = getelementptr inbounds %struct.St8x3, %struct.St8x3* %ret, i64 0, i32 0, i64 0
412  store i64 %1, i64* %arrayidx3, align 8
413  %arrayidx.1 = getelementptr inbounds %struct.St8x3, %struct.St8x3* %in, i64 0, i32 0, i64 1
414  %2 = load i64, i64* %arrayidx.1, align 8
415  %arrayidx3.1 = getelementptr inbounds %struct.St8x3, %struct.St8x3* %ret, i64 0, i32 0, i64 1
416  store i64 %2, i64* %arrayidx3.1, align 8
417  %arrayidx.2 = getelementptr inbounds %struct.St8x3, %struct.St8x3* %in, i64 0, i32 0, i64 2
418  %3 = load i64, i64* %arrayidx.2, align 8
419  %arrayidx3.2 = getelementptr inbounds %struct.St8x3, %struct.St8x3* %ret, i64 0, i32 0, i64 2
420  store i64 %3, i64* %arrayidx3.2, align 8
421  ret void
422}
423
424define dso_local void @foo_St8x4(%struct.St8x4* nocapture noundef readonly byval(%struct.St8x4) align 8 %in, %struct.St8x4* nocapture noundef writeonly %ret) {
425  ; CHECK-LABEL: .visible .func foo_St8x4(
426  ; CHECK:               .param .align 8 .b8 foo_St8x4_param_0[32],
427  ; CHECK:               .param .b32 foo_St8x4_param_1
428  ; CHECK:       )
429  ; CHECK:       ld.param.u32 [[R1:%r[0-9]+]], [foo_St8x4_param_1];
430  ; CHECK:       ld.param.u64 [[RD1:%rd[0-9]+]], [foo_St8x4_param_0];
431  ; CHECK:       st.u64 [[[R1]]], [[RD1]];
432  ; CHECK:       ld.param.u64 [[RD2:%rd[0-9]+]], [foo_St8x4_param_0+8];
433  ; CHECK:       st.u64 [[[R1]]+8], [[RD2]];
434  ; CHECK:       ld.param.u64 [[RD3:%rd[0-9]+]], [foo_St8x4_param_0+16];
435  ; CHECK:       st.u64 [[[R1]]+16], [[RD3]];
436  ; CHECK:       ld.param.u64 [[RD4:%rd[0-9]+]], [foo_St8x4_param_0+24];
437  ; CHECK:       st.u64 [[[R1]]+24], [[RD4]];
438  ; CHECK:       ret;
439  %arrayidx = getelementptr inbounds %struct.St8x4, %struct.St8x4* %in, i64 0, i32 0, i64 0
440  %1 = load i64, i64* %arrayidx, align 8
441  %arrayidx3 = getelementptr inbounds %struct.St8x4, %struct.St8x4* %ret, i64 0, i32 0, i64 0
442  store i64 %1, i64* %arrayidx3, align 8
443  %arrayidx.1 = getelementptr inbounds %struct.St8x4, %struct.St8x4* %in, i64 0, i32 0, i64 1
444  %2 = load i64, i64* %arrayidx.1, align 8
445  %arrayidx3.1 = getelementptr inbounds %struct.St8x4, %struct.St8x4* %ret, i64 0, i32 0, i64 1
446  store i64 %2, i64* %arrayidx3.1, align 8
447  %arrayidx.2 = getelementptr inbounds %struct.St8x4, %struct.St8x4* %in, i64 0, i32 0, i64 2
448  %3 = load i64, i64* %arrayidx.2, align 8
449  %arrayidx3.2 = getelementptr inbounds %struct.St8x4, %struct.St8x4* %ret, i64 0, i32 0, i64 2
450  store i64 %3, i64* %arrayidx3.2, align 8
451  %arrayidx.3 = getelementptr inbounds %struct.St8x4, %struct.St8x4* %in, i64 0, i32 0, i64 3
452  %4 = load i64, i64* %arrayidx.3, align 8
453  %arrayidx3.3 = getelementptr inbounds %struct.St8x4, %struct.St8x4* %ret, i64 0, i32 0, i64 3
454  store i64 %4, i64* %arrayidx3.3, align 8
455  ret void
456}
457