1; RUN: llc -verify-machineinstrs -march=amdgcn -mattr=+max-private-element-size-16 < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga -mattr=+max-private-element-size-16 < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
3
4; FIXME: Broken on evergreen
5; FIXME: For some reason the 8 and 16 vectors are being stored as
6; individual elements instead of 128-bit stores.
7
8
9; FIXME: Why is the constant moved into the intermediate register and
10; not just directly into the vector component?
11
12; GCN-LABEL: {{^}}insertelement_v4f32_0:
13; GCN: s_load_dwordx4
14; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
15; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
16; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
17; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
18; GCN-DAG: s_mov_b32 [[CONSTREG:s[0-9]+]], 0x40a00000
19; GCN-DAG: v_mov_b32_e32 v[[LOW_REG:[0-9]+]], [[CONSTREG]]
20; GCN: buffer_store_dwordx4 v{{\[}}[[LOW_REG]]:
21define void @insertelement_v4f32_0(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
22  %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 0
23  store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
24  ret void
25}
26
27; GCN-LABEL: {{^}}insertelement_v4f32_1:
28define void @insertelement_v4f32_1(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
29  %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 1
30  store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
31  ret void
32}
33
34; GCN-LABEL: {{^}}insertelement_v4f32_2:
35define void @insertelement_v4f32_2(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
36  %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 2
37  store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
38  ret void
39}
40
41; GCN-LABEL: {{^}}insertelement_v4f32_3:
42define void @insertelement_v4f32_3(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
43  %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 3
44  store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
45  ret void
46}
47
48; GCN-LABEL: {{^}}insertelement_v4i32_0:
49define void @insertelement_v4i32_0(<4 x i32> addrspace(1)* %out, <4 x i32> %a) nounwind {
50  %vecins = insertelement <4 x i32> %a, i32 999, i32 0
51  store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16
52  ret void
53}
54
55; GCN-LABEL: {{^}}insertelement_v3f32_1:
56define void @insertelement_v3f32_1(<3 x float> addrspace(1)* %out, <3 x float> %a) nounwind {
57  %vecins = insertelement <3 x float> %a, float 5.000000e+00, i32 1
58  store <3 x float> %vecins, <3 x float> addrspace(1)* %out, align 16
59  ret void
60}
61
62; GCN-LABEL: {{^}}insertelement_v3f32_2:
63define void @insertelement_v3f32_2(<3 x float> addrspace(1)* %out, <3 x float> %a) nounwind {
64  %vecins = insertelement <3 x float> %a, float 5.000000e+00, i32 2
65  store <3 x float> %vecins, <3 x float> addrspace(1)* %out, align 16
66  ret void
67}
68
69; GCN-LABEL: {{^}}insertelement_v3f32_3:
70define void @insertelement_v3f32_3(<3 x float> addrspace(1)* %out, <3 x float> %a) nounwind {
71  %vecins = insertelement <3 x float> %a, float 5.000000e+00, i32 3
72  store <3 x float> %vecins, <3 x float> addrspace(1)* %out, align 16
73  ret void
74}
75
76; GCN-LABEL: {{^}}insertelement_to_sgpr:
77; GCN-NOT: v_readfirstlane
78define amdgpu_ps <4 x float> @insertelement_to_sgpr() nounwind {
79  %tmp = load <4 x i32>, <4 x i32> addrspace(2)* undef
80  %tmp1 = insertelement <4 x i32> %tmp, i32 0, i32 0
81  %tmp2 = call <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32> undef, <8 x i32> undef, <4 x i32> %tmp1, i32 8, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0)
82  ret <4 x float> %tmp2
83}
84
85; GCN-LABEL: {{^}}dynamic_insertelement_v2f32:
86; GCN: v_mov_b32_e32 [[CONST:v[0-9]+]], 0x40a00000
87; GCN: v_movreld_b32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
88; GCN: buffer_store_dwordx2 {{v\[}}[[LOW_RESULT_REG]]:
89define void @dynamic_insertelement_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, i32 %b) nounwind {
90  %vecins = insertelement <2 x float> %a, float 5.000000e+00, i32 %b
91  store <2 x float> %vecins, <2 x float> addrspace(1)* %out, align 8
92  ret void
93}
94
95; GCN-LABEL: {{^}}dynamic_insertelement_v3f32:
96; GCN: v_mov_b32_e32 [[CONST:v[0-9]+]], 0x40a00000
97; GCN: v_movreld_b32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
98; GCN-DAG: buffer_store_dwordx2 {{v\[}}[[LOW_RESULT_REG]]:
99; GCN-DAG: buffer_store_dword v
100define void @dynamic_insertelement_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %a, i32 %b) nounwind {
101  %vecins = insertelement <3 x float> %a, float 5.000000e+00, i32 %b
102  store <3 x float> %vecins, <3 x float> addrspace(1)* %out, align 16
103  ret void
104}
105
106; GCN-LABEL: {{^}}dynamic_insertelement_v4f32:
107; GCN: v_mov_b32_e32 [[CONST:v[0-9]+]], 0x40a00000
108; GCN: v_movreld_b32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
109; GCN: buffer_store_dwordx4 {{v\[}}[[LOW_RESULT_REG]]:
110define void @dynamic_insertelement_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, i32 %b) nounwind {
111  %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 %b
112  store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
113  ret void
114}
115
116; GCN-LABEL: {{^}}dynamic_insertelement_v8f32:
117; GCN: v_movreld_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
118; GCN: buffer_store_dwordx4
119; GCN: buffer_store_dwordx4
120define void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, i32 %b) nounwind {
121  %vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b
122  store <8 x float> %vecins, <8 x float> addrspace(1)* %out, align 32
123  ret void
124}
125
126; GCN-LABEL: {{^}}dynamic_insertelement_v16f32:
127; GCN: v_movreld_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
128; GCN: buffer_store_dwordx4
129; GCN: buffer_store_dwordx4
130; GCN: buffer_store_dwordx4
131; GCN: buffer_store_dwordx4
132define void @dynamic_insertelement_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, i32 %b) nounwind {
133  %vecins = insertelement <16 x float> %a, float 5.000000e+00, i32 %b
134  store <16 x float> %vecins, <16 x float> addrspace(1)* %out, align 64
135  ret void
136}
137
138; GCN-LABEL: {{^}}dynamic_insertelement_v2i32:
139; GCN: v_movreld_b32
140; GCN: buffer_store_dwordx2
141define void @dynamic_insertelement_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, i32 %b) nounwind {
142  %vecins = insertelement <2 x i32> %a, i32 5, i32 %b
143  store <2 x i32> %vecins, <2 x i32> addrspace(1)* %out, align 8
144  ret void
145}
146
147; GCN-LABEL: {{^}}dynamic_insertelement_v3i32:
148; GCN: v_movreld_b32_e32 v[[LOW_RESULT_REG:[0-9]+]], 5
149; GCN-DAG: buffer_store_dwordx2 {{v\[}}[[LOW_RESULT_REG]]:
150; GCN-DAG: buffer_store_dword v
151define void @dynamic_insertelement_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> %a, i32 %b) nounwind {
152  %vecins = insertelement <3 x i32> %a, i32 5, i32 %b
153  store <3 x i32> %vecins, <3 x i32> addrspace(1)* %out, align 16
154  ret void
155}
156
157; GCN-LABEL: {{^}}dynamic_insertelement_v4i32:
158; GCN: s_load_dword [[SVAL:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0x12|0x48}}
159; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[SVAL]]
160; GCN: v_movreld_b32_e32 v{{[0-9]+}}, [[VVAL]]
161; GCN: buffer_store_dwordx4
162define void @dynamic_insertelement_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, i32 %b, i32 %val) nounwind {
163  %vecins = insertelement <4 x i32> %a, i32 %val, i32 %b
164  store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16
165  ret void
166}
167
168; GCN-LABEL: {{^}}dynamic_insertelement_v8i32:
169; GCN: v_movreld_b32
170; GCN: buffer_store_dwordx4
171; GCN: buffer_store_dwordx4
172define void @dynamic_insertelement_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, i32 %b) nounwind {
173  %vecins = insertelement <8 x i32> %a, i32 5, i32 %b
174  store <8 x i32> %vecins, <8 x i32> addrspace(1)* %out, align 32
175  ret void
176}
177
178; GCN-LABEL: {{^}}dynamic_insertelement_v16i32:
179; GCN: v_movreld_b32
180; GCN: buffer_store_dwordx4
181; GCN: buffer_store_dwordx4
182; GCN: buffer_store_dwordx4
183; GCN: buffer_store_dwordx4
184define void @dynamic_insertelement_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, i32 %b) nounwind {
185  %vecins = insertelement <16 x i32> %a, i32 5, i32 %b
186  store <16 x i32> %vecins, <16 x i32> addrspace(1)* %out, align 64
187  ret void
188}
189
190; GCN-LABEL: {{^}}dynamic_insertelement_v2i16:
191define void @dynamic_insertelement_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, i32 %b) nounwind {
192  %vecins = insertelement <2 x i16> %a, i16 5, i32 %b
193  store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out, align 8
194  ret void
195}
196
197; GCN-LABEL: {{^}}dynamic_insertelement_v3i16:
198define void @dynamic_insertelement_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %a, i32 %b) nounwind {
199  %vecins = insertelement <3 x i16> %a, i16 5, i32 %b
200  store <3 x i16> %vecins, <3 x i16> addrspace(1)* %out, align 8
201  ret void
202}
203
204; GCN-LABEL: {{^}}dynamic_insertelement_v4i16:
205; GCN: buffer_load_ushort v{{[0-9]+}}, off
206; GCN: buffer_load_ushort v{{[0-9]+}}, off
207; GCN: buffer_load_ushort v{{[0-9]+}}, off
208; GCN: buffer_load_ushort v{{[0-9]+}}, off
209
210; GCN: v_mov_b32_e32 {{v[0-9]+}}, 0{{$}}
211; GCN: v_mov_b32_e32 [[BASE_FI:v[0-9]+]], 0{{$}}
212
213; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:6
214; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:4
215; GCN-DAG: buffer_store_short v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:2
216; GCN-DAG: buffer_store_short v{{[0-9]+}}, [[BASE_FI]], s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
217; GCN: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
218
219; GCN: s_waitcnt
220
221; GCN: buffer_load_ushort
222; GCN: buffer_load_ushort
223; GCN: buffer_load_ushort
224; GCN: buffer_load_ushort
225
226; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off
227define void @dynamic_insertelement_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, i32 %b) nounwind {
228  %vecins = insertelement <4 x i16> %a, i16 5, i32 %b
229  store <4 x i16> %vecins, <4 x i16> addrspace(1)* %out, align 8
230  ret void
231}
232
233; GCN-LABEL: {{^}}dynamic_insertelement_v2i8:
234; GCN: buffer_load_ubyte v{{[0-9]+}}, off
235; GCN: buffer_load_ubyte v{{[0-9]+}}, off
236
237; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:1
238; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
239
240; GCN: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
241
242; GCN: buffer_load_ubyte
243; GCN: buffer_load_ubyte
244
245; GCN: buffer_store_short v{{[0-9]+}}, off
246define void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> %a, i32 %b) nounwind {
247  %vecins = insertelement <2 x i8> %a, i8 5, i32 %b
248  store <2 x i8> %vecins, <2 x i8> addrspace(1)* %out, align 8
249  ret void
250}
251
252; GCN-LABEL: {{^}}dynamic_insertelement_v3i8:
253; GCN: buffer_load_ubyte v{{[0-9]+}}, off
254; GCN: buffer_load_ubyte v{{[0-9]+}}, off
255; GCN: buffer_load_ubyte v{{[0-9]+}}, off
256
257; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:2
258; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:1
259; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
260
261; GCN: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
262
263; GCN: buffer_load_ubyte
264; GCN: buffer_load_ubyte
265; GCN: buffer_load_ubyte
266
267; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off
268; GCN-DAG: buffer_store_short v{{[0-9]+}}, off
269define void @dynamic_insertelement_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> %a, i32 %b) nounwind {
270  %vecins = insertelement <3 x i8> %a, i8 5, i32 %b
271  store <3 x i8> %vecins, <3 x i8> addrspace(1)* %out, align 4
272  ret void
273}
274
275; GCN-LABEL: {{^}}dynamic_insertelement_v4i8:
276; GCN: buffer_load_ubyte v{{[0-9]+}}, off
277; GCN: buffer_load_ubyte v{{[0-9]+}}, off
278; GCN: buffer_load_ubyte v{{[0-9]+}}, off
279; GCN: buffer_load_ubyte v{{[0-9]+}}, off
280
281; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:3
282; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:2
283; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offset:1
284; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
285
286; GCN: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
287
288; GCN: buffer_load_ubyte
289; GCN: buffer_load_ubyte
290; GCN: buffer_load_ubyte
291; GCN: buffer_load_ubyte
292
293; GCN: buffer_store_dword v{{[0-9]+}}, off
294define void @dynamic_insertelement_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, i32 %b) nounwind {
295  %vecins = insertelement <4 x i8> %a, i8 5, i32 %b
296  store <4 x i8> %vecins, <4 x i8> addrspace(1)* %out, align 4
297  ret void
298}
299
300; GCN-LABEL: {{^}}dynamic_insertelement_v8i8:
301define void @dynamic_insertelement_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> %a, i32 %b) nounwind {
302  %vecins = insertelement <8 x i8> %a, i8 5, i32 %b
303  store <8 x i8> %vecins, <8 x i8> addrspace(1)* %out, align 8
304  ret void
305}
306
307; GCN-LABEL: {{^}}dynamic_insertelement_v16i8:
308define void @dynamic_insertelement_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> %a, i32 %b) nounwind {
309  %vecins = insertelement <16 x i8> %a, i8 5, i32 %b
310  store <16 x i8> %vecins, <16 x i8> addrspace(1)* %out, align 16
311  ret void
312}
313
314; This test requires handling INSERT_SUBREG in SIFixSGPRCopies.  Check that
315; the compiler doesn't crash.
316; GCN-LABEL: {{^}}insert_split_bb:
317define void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 addrspace(1)* %in, i32 %a, i32 %b) {
318entry:
319  %0 = insertelement <2 x i32> undef, i32 %a, i32 0
320  %1 = icmp eq i32 %a, 0
321  br i1 %1, label %if, label %else
322
323if:
324  %2 = load i32, i32 addrspace(1)* %in
325  %3 = insertelement <2 x i32> %0, i32 %2, i32 1
326  br label %endif
327
328else:
329  %4 = getelementptr i32, i32 addrspace(1)* %in, i32 1
330  %5 = load i32, i32 addrspace(1)* %4
331  %6 = insertelement <2 x i32> %0, i32 %5, i32 1
332  br label %endif
333
334endif:
335  %7 = phi <2 x i32> [%3, %if], [%6, %else]
336  store <2 x i32> %7, <2 x i32> addrspace(1)* %out
337  ret void
338}
339
340; GCN-LABEL: {{^}}dynamic_insertelement_v2f64:
341; GCN-DAG: s_load_dwordx4 s{{\[}}[[A_ELT0:[0-9]+]]:[[A_ELT3:[0-9]+]]{{\]}}
342; GCN-DAG: s_load_dword [[IDX:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0x11|0x44}}{{$}}
343
344; GCN-DAG: s_lshl_b32 [[SCALEDIDX:s[0-9]+]], [[IDX]], 1{{$}}
345
346; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
347; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
348; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
349; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
350; GCN-DAG: v_mov_b32_e32 [[ELT1:v[0-9]+]], 0x40200000
351
352; GCN-DAG: s_mov_b32 m0, [[SCALEDIDX]]
353; GCN: v_movreld_b32_e32 v{{[0-9]+}}, 0
354
355; Increment to next element folded into base register, but FileCheck
356; can't do math expressions
357
358; FIXME: Should be able to manipulate m0 directly instead of s_lshl_b32 + copy to m0
359
360; GCN: v_movreld_b32_e32 v{{[0-9]+}}, [[ELT1]]
361
362; GCN: buffer_store_dwordx4
363; GCN: s_endpgm
364define void @dynamic_insertelement_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, i32 %b) nounwind {
365  %vecins = insertelement <2 x double> %a, double 8.0, i32 %b
366  store <2 x double> %vecins, <2 x double> addrspace(1)* %out, align 16
367  ret void
368}
369
370; GCN-LABEL: {{^}}dynamic_insertelement_v2i64:
371
372; GCN-DAG: v_movreld_b32_e32 v{{[0-9]+}}, 5
373; GCN-DAG: v_movreld_b32_e32 v{{[0-9]+}}, 0
374
375; GCN: buffer_store_dwordx4
376; GCN: s_endpgm
377define void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %a, i32 %b) nounwind {
378  %vecins = insertelement <2 x i64> %a, i64 5, i32 %b
379  store <2 x i64> %vecins, <2 x i64> addrspace(1)* %out, align 8
380  ret void
381}
382
383; GCN-LABEL: {{^}}dynamic_insertelement_v3i64:
384define void @dynamic_insertelement_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> %a, i32 %b) nounwind {
385  %vecins = insertelement <3 x i64> %a, i64 5, i32 %b
386  store <3 x i64> %vecins, <3 x i64> addrspace(1)* %out, align 32
387  ret void
388}
389
390; FIXME: Should be able to do without stack access. The used stack
391; space is also 2x what should be required.
392
393; GCN-LABEL: {{^}}dynamic_insertelement_v4f64:
394; GCN: SCRATCH_RSRC_DWORD
395
396; Stack store
397
398; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}}
399; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:16{{$}}
400
401; Write element
402; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
403
404; Stack reload
405; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
406; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
407
408; Store result
409; GCN: buffer_store_dwordx4
410; GCN: buffer_store_dwordx4
411; GCN: s_endpgm
412; GCN: ScratchSize: 64
413
414define void @dynamic_insertelement_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, i32 %b) nounwind {
415  %vecins = insertelement <4 x double> %a, double 8.0, i32 %b
416  store <4 x double> %vecins, <4 x double> addrspace(1)* %out, align 16
417  ret void
418}
419
420; GCN-LABEL: {{^}}dynamic_insertelement_v8f64:
421; GCN: SCRATCH_RSRC_DWORD
422
423; FIXME: Should be able to eliminate this?
424
425; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:16{{$}}
426; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:32{{$}}
427; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, off, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offset:48{{$}}
428
429; GCN: v_mov_b32_e32 [[BASE_FI0:v[0-9]+]], 0{{$}}
430; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, [[BASE_FI0]], s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
431
432; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
433
434; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, [[BASE_FI0]], s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
435; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, [[BASE_FI0]], s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
436; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, [[BASE_FI0]], s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
437; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, [[BASE_FI0]], s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
438
439; GCN: buffer_store_dwordx4
440; GCN: buffer_store_dwordx4
441; GCN: buffer_store_dwordx4
442; GCN: buffer_store_dwordx4
443; GCN: s_endpgm
444; GCN: ScratchSize: 128
445define void @dynamic_insertelement_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, i32 %b) nounwind {
446  %vecins = insertelement <8 x double> %a, double 8.0, i32 %b
447  store <8 x double> %vecins, <8 x double> addrspace(1)* %out, align 16
448  ret void
449}
450
451declare <4 x float> @llvm.SI.gather4.lz.v2i32(<2 x i32>, <8 x i32>, <4 x i32>, i32, i32, i32, i32, i32, i32, i32, i32) nounwind readnone
452