1; RUN: llc -verify-machineinstrs -march=amdgcn -mattr=+max-private-element-size-16 < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
2; RUN: llc -verify-machineinstrs -march=amdgcn -mcpu=tonga -mattr=+max-private-element-size-16 < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
3
4; FIXME: Broken on evergreen
5; FIXME: For some reason the 8 and 16 vectors are being stored as
6; individual elements instead of 128-bit stores.
7
8
9; FIXME: Why is the constant moved into the intermediate register and
10; not just directly into the vector component?
11
12; GCN-LABEL: {{^}}insertelement_v4f32_0:
13; GCN: s_load_dwordx4 s{{\[}}[[LOW_REG:[0-9]+]]:
14; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
15; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
16; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
17; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
18; GCN-DAG: v_mov_b32_e32 [[CONSTREG:v[0-9]+]], 0x40a00000
19; GCN-DAG: v_mov_b32_e32 v[[LOW_REG]], [[CONSTREG]]
20; GCN: buffer_store_dwordx4 v{{\[}}[[LOW_REG]]:
21define void @insertelement_v4f32_0(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
22  %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 0
23  store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
24  ret void
25}
26
27; GCN-LABEL: {{^}}insertelement_v4f32_1:
28define void @insertelement_v4f32_1(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
29  %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 1
30  store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
31  ret void
32}
33
34; GCN-LABEL: {{^}}insertelement_v4f32_2:
35define void @insertelement_v4f32_2(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
36  %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 2
37  store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
38  ret void
39}
40
41; GCN-LABEL: {{^}}insertelement_v4f32_3:
42define void @insertelement_v4f32_3(<4 x float> addrspace(1)* %out, <4 x float> %a) nounwind {
43  %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 3
44  store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
45  ret void
46}
47
48; GCN-LABEL: {{^}}insertelement_v4i32_0:
49define void @insertelement_v4i32_0(<4 x i32> addrspace(1)* %out, <4 x i32> %a) nounwind {
50  %vecins = insertelement <4 x i32> %a, i32 999, i32 0
51  store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16
52  ret void
53}
54
55; GCN-LABEL: {{^}}insertelement_v3f32_1:
56define void @insertelement_v3f32_1(<3 x float> addrspace(1)* %out, <3 x float> %a) nounwind {
57  %vecins = insertelement <3 x float> %a, float 5.000000e+00, i32 1
58  store <3 x float> %vecins, <3 x float> addrspace(1)* %out, align 16
59  ret void
60}
61
62; GCN-LABEL: {{^}}insertelement_v3f32_2:
63define void @insertelement_v3f32_2(<3 x float> addrspace(1)* %out, <3 x float> %a) nounwind {
64  %vecins = insertelement <3 x float> %a, float 5.000000e+00, i32 2
65  store <3 x float> %vecins, <3 x float> addrspace(1)* %out, align 16
66  ret void
67}
68
69; GCN-LABEL: {{^}}insertelement_v3f32_3:
70define void @insertelement_v3f32_3(<3 x float> addrspace(1)* %out, <3 x float> %a) nounwind {
71  %vecins = insertelement <3 x float> %a, float 5.000000e+00, i32 3
72  store <3 x float> %vecins, <3 x float> addrspace(1)* %out, align 16
73  ret void
74}
75
76; GCN-LABEL: {{^}}dynamic_insertelement_v2f32:
77; GCN: v_mov_b32_e32 [[CONST:v[0-9]+]], 0x40a00000
78; GCN: v_movreld_b32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
79; GCN: buffer_store_dwordx2 {{v\[}}[[LOW_RESULT_REG]]:
80define void @dynamic_insertelement_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, i32 %b) nounwind {
81  %vecins = insertelement <2 x float> %a, float 5.000000e+00, i32 %b
82  store <2 x float> %vecins, <2 x float> addrspace(1)* %out, align 8
83  ret void
84}
85
86; GCN-LABEL: {{^}}dynamic_insertelement_v3f32:
87; GCN: v_mov_b32_e32 [[CONST:v[0-9]+]], 0x40a00000
88; GCN: v_movreld_b32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
89; GCN-DAG: buffer_store_dwordx2 {{v\[}}[[LOW_RESULT_REG]]:
90; GCN-DAG: buffer_store_dword v
91define void @dynamic_insertelement_v3f32(<3 x float> addrspace(1)* %out, <3 x float> %a, i32 %b) nounwind {
92  %vecins = insertelement <3 x float> %a, float 5.000000e+00, i32 %b
93  store <3 x float> %vecins, <3 x float> addrspace(1)* %out, align 16
94  ret void
95}
96
97; GCN-LABEL: {{^}}dynamic_insertelement_v4f32:
98; GCN: v_mov_b32_e32 [[CONST:v[0-9]+]], 0x40a00000
99; GCN: v_movreld_b32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
100; GCN: buffer_store_dwordx4 {{v\[}}[[LOW_RESULT_REG]]:
101define void @dynamic_insertelement_v4f32(<4 x float> addrspace(1)* %out, <4 x float> %a, i32 %b) nounwind {
102  %vecins = insertelement <4 x float> %a, float 5.000000e+00, i32 %b
103  store <4 x float> %vecins, <4 x float> addrspace(1)* %out, align 16
104  ret void
105}
106
107; GCN-LABEL: {{^}}dynamic_insertelement_v8f32:
108; GCN: v_movreld_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
109; GCN: buffer_store_dwordx4
110; GCN: buffer_store_dwordx4
111define void @dynamic_insertelement_v8f32(<8 x float> addrspace(1)* %out, <8 x float> %a, i32 %b) nounwind {
112  %vecins = insertelement <8 x float> %a, float 5.000000e+00, i32 %b
113  store <8 x float> %vecins, <8 x float> addrspace(1)* %out, align 32
114  ret void
115}
116
117; GCN-LABEL: {{^}}dynamic_insertelement_v16f32:
118; GCN: v_movreld_b32_e32 v{{[0-9]+}}, v{{[0-9]+}}
119; GCN: buffer_store_dwordx4
120; GCN: buffer_store_dwordx4
121; GCN: buffer_store_dwordx4
122; GCN: buffer_store_dwordx4
123define void @dynamic_insertelement_v16f32(<16 x float> addrspace(1)* %out, <16 x float> %a, i32 %b) nounwind {
124  %vecins = insertelement <16 x float> %a, float 5.000000e+00, i32 %b
125  store <16 x float> %vecins, <16 x float> addrspace(1)* %out, align 64
126  ret void
127}
128
129; GCN-LABEL: {{^}}dynamic_insertelement_v2i32:
130; GCN: v_movreld_b32
131; GCN: buffer_store_dwordx2
132define void @dynamic_insertelement_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> %a, i32 %b) nounwind {
133  %vecins = insertelement <2 x i32> %a, i32 5, i32 %b
134  store <2 x i32> %vecins, <2 x i32> addrspace(1)* %out, align 8
135  ret void
136}
137
138; GCN-LABEL: {{^}}dynamic_insertelement_v3i32:
139; GCN: v_mov_b32_e32 [[CONST:v[0-9]+]], 5
140; GCN: v_movreld_b32_e32 v[[LOW_RESULT_REG:[0-9]+]], [[CONST]]
141; GCN-DAG: buffer_store_dwordx2 {{v\[}}[[LOW_RESULT_REG]]:
142; GCN-DAG: buffer_store_dword v
143define void @dynamic_insertelement_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> %a, i32 %b) nounwind {
144  %vecins = insertelement <3 x i32> %a, i32 5, i32 %b
145  store <3 x i32> %vecins, <3 x i32> addrspace(1)* %out, align 16
146  ret void
147}
148
149; GCN-LABEL: {{^}}dynamic_insertelement_v4i32:
150; GCN: v_movreld_b32
151; GCN: buffer_store_dwordx4
152define void @dynamic_insertelement_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %a, i32 %b) nounwind {
153  %vecins = insertelement <4 x i32> %a, i32 5, i32 %b
154  store <4 x i32> %vecins, <4 x i32> addrspace(1)* %out, align 16
155  ret void
156}
157
158; GCN-LABEL: {{^}}dynamic_insertelement_v8i32:
159; GCN: v_movreld_b32
160; GCN: buffer_store_dwordx4
161; GCN: buffer_store_dwordx4
162define void @dynamic_insertelement_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> %a, i32 %b) nounwind {
163  %vecins = insertelement <8 x i32> %a, i32 5, i32 %b
164  store <8 x i32> %vecins, <8 x i32> addrspace(1)* %out, align 32
165  ret void
166}
167
168; GCN-LABEL: {{^}}dynamic_insertelement_v16i32:
169; GCN: v_movreld_b32
170; GCN: buffer_store_dwordx4
171; GCN: buffer_store_dwordx4
172; GCN: buffer_store_dwordx4
173; GCN: buffer_store_dwordx4
174define void @dynamic_insertelement_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> %a, i32 %b) nounwind {
175  %vecins = insertelement <16 x i32> %a, i32 5, i32 %b
176  store <16 x i32> %vecins, <16 x i32> addrspace(1)* %out, align 64
177  ret void
178}
179
180; GCN-LABEL: {{^}}dynamic_insertelement_v2i16:
181define void @dynamic_insertelement_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> %a, i32 %b) nounwind {
182  %vecins = insertelement <2 x i16> %a, i16 5, i32 %b
183  store <2 x i16> %vecins, <2 x i16> addrspace(1)* %out, align 8
184  ret void
185}
186
187; GCN-LABEL: {{^}}dynamic_insertelement_v3i16:
188define void @dynamic_insertelement_v3i16(<3 x i16> addrspace(1)* %out, <3 x i16> %a, i32 %b) nounwind {
189  %vecins = insertelement <3 x i16> %a, i16 5, i32 %b
190  store <3 x i16> %vecins, <3 x i16> addrspace(1)* %out, align 8
191  ret void
192}
193
194; GCN-LABEL: {{^}}dynamic_insertelement_v4i16:
195; GCN: buffer_load_ushort v{{[0-9]+}}, off
196; GCN: buffer_load_ushort v{{[0-9]+}}, off
197; GCN: buffer_load_ushort v{{[0-9]+}}, off
198; GCN: buffer_load_ushort v{{[0-9]+}}, off
199
200; GCN-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:6
201; GCN-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:4
202; GCN-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:2
203; GCN-DAG: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
204; GCN: buffer_store_short v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
205
206; GCN: buffer_load_ushort
207; GCN: buffer_load_ushort
208; GCN: buffer_load_ushort
209; GCN: buffer_load_ushort
210
211; GCN: buffer_store_short v{{[0-9]+}}, off
212; GCN: buffer_store_short v{{[0-9]+}}, off
213; GCN: buffer_store_short v{{[0-9]+}}, off
214; GCN: buffer_store_short v{{[0-9]+}}, off
215define void @dynamic_insertelement_v4i16(<4 x i16> addrspace(1)* %out, <4 x i16> %a, i32 %b) nounwind {
216  %vecins = insertelement <4 x i16> %a, i16 5, i32 %b
217  store <4 x i16> %vecins, <4 x i16> addrspace(1)* %out, align 8
218  ret void
219}
220
221; GCN-LABEL: {{^}}dynamic_insertelement_v2i8:
222; GCN: buffer_load_ubyte v{{[0-9]+}}, off
223; GCN: buffer_load_ubyte v{{[0-9]+}}, off
224
225; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:1
226; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
227
228; GCN: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
229
230; GCN: buffer_load_ubyte
231; GCN: buffer_load_ubyte
232
233; GCN: buffer_store_byte v{{[0-9]+}}, off
234; GCN: buffer_store_byte v{{[0-9]+}}, off
235define void @dynamic_insertelement_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> %a, i32 %b) nounwind {
236  %vecins = insertelement <2 x i8> %a, i8 5, i32 %b
237  store <2 x i8> %vecins, <2 x i8> addrspace(1)* %out, align 8
238  ret void
239}
240
241; GCN-LABEL: {{^}}dynamic_insertelement_v3i8:
242; GCN: buffer_load_ubyte v{{[0-9]+}}, off
243; GCN: buffer_load_ubyte v{{[0-9]+}}, off
244; GCN: buffer_load_ubyte v{{[0-9]+}}, off
245
246; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:2
247; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:1
248; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
249
250; GCN: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
251
252; GCN: buffer_load_ubyte
253; GCN: buffer_load_ubyte
254; GCN: buffer_load_ubyte
255
256; GCN-DAG: buffer_store_byte v{{[0-9]+}}, off
257; GCN-DAG: buffer_store_short v{{[0-9]+}}, off
258define void @dynamic_insertelement_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> %a, i32 %b) nounwind {
259  %vecins = insertelement <3 x i8> %a, i8 5, i32 %b
260  store <3 x i8> %vecins, <3 x i8> addrspace(1)* %out, align 4
261  ret void
262}
263
264; GCN-LABEL: {{^}}dynamic_insertelement_v4i8:
265; GCN: buffer_load_ubyte v{{[0-9]+}}, off
266; GCN: buffer_load_ubyte v{{[0-9]+}}, off
267; GCN: buffer_load_ubyte v{{[0-9]+}}, off
268; GCN: buffer_load_ubyte v{{[0-9]+}}, off
269
270; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:3
271; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:2
272; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen offset:1
273; GCN-DAG: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
274
275; GCN: buffer_store_byte v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, s{{[0-9]+}} offen{{$}}
276
277; GCN: buffer_load_ubyte
278; GCN: buffer_load_ubyte
279; GCN: buffer_load_ubyte
280; GCN: buffer_load_ubyte
281
282; GCN: buffer_store_byte v{{[0-9]+}}, off
283; GCN: buffer_store_byte v{{[0-9]+}}, off
284; GCN: buffer_store_byte v{{[0-9]+}}, off
285; GCN: buffer_store_byte v{{[0-9]+}}, off
286define void @dynamic_insertelement_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> %a, i32 %b) nounwind {
287  %vecins = insertelement <4 x i8> %a, i8 5, i32 %b
288  store <4 x i8> %vecins, <4 x i8> addrspace(1)* %out, align 4
289  ret void
290}
291
292; GCN-LABEL: {{^}}dynamic_insertelement_v8i8:
293define void @dynamic_insertelement_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> %a, i32 %b) nounwind {
294  %vecins = insertelement <8 x i8> %a, i8 5, i32 %b
295  store <8 x i8> %vecins, <8 x i8> addrspace(1)* %out, align 8
296  ret void
297}
298
299; GCN-LABEL: {{^}}dynamic_insertelement_v16i8:
300define void @dynamic_insertelement_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> %a, i32 %b) nounwind {
301  %vecins = insertelement <16 x i8> %a, i8 5, i32 %b
302  store <16 x i8> %vecins, <16 x i8> addrspace(1)* %out, align 16
303  ret void
304}
305
306; This test requires handling INSERT_SUBREG in SIFixSGPRCopies.  Check that
307; the compiler doesn't crash.
308; GCN-LABEL: {{^}}insert_split_bb:
309define void @insert_split_bb(<2 x i32> addrspace(1)* %out, i32 addrspace(1)* %in, i32 %a, i32 %b) {
310entry:
311  %0 = insertelement <2 x i32> undef, i32 %a, i32 0
312  %1 = icmp eq i32 %a, 0
313  br i1 %1, label %if, label %else
314
315if:
316  %2 = load i32, i32 addrspace(1)* %in
317  %3 = insertelement <2 x i32> %0, i32 %2, i32 1
318  br label %endif
319
320else:
321  %4 = getelementptr i32, i32 addrspace(1)* %in, i32 1
322  %5 = load i32, i32 addrspace(1)* %4
323  %6 = insertelement <2 x i32> %0, i32 %5, i32 1
324  br label %endif
325
326endif:
327  %7 = phi <2 x i32> [%3, %if], [%6, %else]
328  store <2 x i32> %7, <2 x i32> addrspace(1)* %out
329  ret void
330}
331
332; GCN-LABEL: {{^}}dynamic_insertelement_v2f64:
333; GCN: s_load_dword [[IDX:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, {{0x11|0x44}}{{$}}
334; GCN-DAG: s_lshl_b32 [[SCALEDIDX:s[0-9]+]], [[IDX]], 1{{$}}
335; GCN-DAG: v_mov_b32_e32 [[ELT0:v[0-9]+]], 0{{$}}
336
337; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
338; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
339; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
340; GCN-DAG: v_mov_b32_e32 v{{[0-9]+}}, s{{[0-9]+}}
341
342; GCN: s_mov_b32 m0, [[SCALEDIDX]]
343; GCN: v_movreld_b32_e32 v{{[0-9]+}}, [[ELT0]]
344
345; Increment to next element.
346; FIXME: Should be able to manipulate m0 directly instead of add and
347; copy.
348
349; GCN: s_or_b32 [[IDX1:s[0-9]+]], [[SCALEDIDX]], 1
350; GCN-DAG: v_mov_b32_e32 [[ELT1:v[0-9]+]], 0x40200000
351; GCN-DAG: s_mov_b32 m0, [[IDX1]]
352; GCN: v_movreld_b32_e32 v{{[0-9]+}}, [[ELT1]]
353
354; GCN: buffer_store_dwordx4
355; GCN: s_endpgm
356define void @dynamic_insertelement_v2f64(<2 x double> addrspace(1)* %out, <2 x double> %a, i32 %b) nounwind {
357  %vecins = insertelement <2 x double> %a, double 8.0, i32 %b
358  store <2 x double> %vecins, <2 x double> addrspace(1)* %out, align 16
359  ret void
360}
361
362; FIXME: Inline immediate should be folded into v_movreld_b32.
363; GCN-LABEL: {{^}}dynamic_insertelement_v2i64:
364
365; GCN-DAG: v_mov_b32_e32 [[ELT0:v[0-9]+]], 5{{$}}
366; GCN-DAG: v_mov_b32_e32 [[ELT1:v[0-9]+]], 0{{$}}
367
368; GCN-DAG: v_movreld_b32_e32 v{{[0-9]+}}, [[ELT0]]
369; GCN-DAG: v_movreld_b32_e32 v{{[0-9]+}}, [[ELT1]]
370
371; GCN: buffer_store_dwordx4
372; GCN: s_endpgm
373define void @dynamic_insertelement_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> %a, i32 %b) nounwind {
374  %vecins = insertelement <2 x i64> %a, i64 5, i32 %b
375  store <2 x i64> %vecins, <2 x i64> addrspace(1)* %out, align 8
376  ret void
377}
378
379; GCN-LABEL: {{^}}dynamic_insertelement_v3i64:
380define void @dynamic_insertelement_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> %a, i32 %b) nounwind {
381  %vecins = insertelement <3 x i64> %a, i64 5, i32 %b
382  store <3 x i64> %vecins, <3 x i64> addrspace(1)* %out, align 32
383  ret void
384}
385
386; FIXME: Should be able to do without stack access. The used stack
387; space is also 2x what should be required.
388
389; GCN-LABEL: {{^}}dynamic_insertelement_v4f64:
390; GCN: SCRATCH_RSRC_DWORD
391
392; Stack store
393
394; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
395; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
396
397; Write element
398; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
399
400; Stack reload
401; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
402; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
403
404; Store result
405; GCN: buffer_store_dwordx4
406; GCN: buffer_store_dwordx4
407; GCN: s_endpgm
408; GCN: ScratchSize: 64
409
410define void @dynamic_insertelement_v4f64(<4 x double> addrspace(1)* %out, <4 x double> %a, i32 %b) nounwind {
411  %vecins = insertelement <4 x double> %a, double 8.0, i32 %b
412  store <4 x double> %vecins, <4 x double> addrspace(1)* %out, align 16
413  ret void
414}
415
416; GCN-LABEL: {{^}}dynamic_insertelement_v8f64:
417; GCN: SCRATCH_RSRC_DWORD
418
419; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
420; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
421; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:32{{$}}
422; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:48{{$}}
423
424; GCN: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
425
426; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
427; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
428; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen offset:16{{$}}
429; GCN-DAG: buffer_load_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}}, {{s[0-9]+}} offen{{$}}
430
431; GCN: buffer_store_dwordx4
432; GCN: buffer_store_dwordx4
433; GCN: buffer_store_dwordx4
434; GCN: buffer_store_dwordx4
435; GCN: s_endpgm
436; GCN: ScratchSize: 128
437define void @dynamic_insertelement_v8f64(<8 x double> addrspace(1)* %out, <8 x double> %a, i32 %b) nounwind {
438  %vecins = insertelement <8 x double> %a, double 8.0, i32 %b
439  store <8 x double> %vecins, <8 x double> addrspace(1)* %out, align 16
440  ret void
441}
442