1; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs < %s | FileCheck  -enable-var-scope -check-prefixes=GCN,CI %s
2; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89 %s
3; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s
4
5; GCN-LABEL: {{^}}i1_func_void:
6; GCN: buffer_load_ubyte v0, off
7; GCN-NEXT: s_waitcnt
8; GCN-NEXT: s_setpc_b64
9define i1 @i1_func_void() #0 {
10  %val = load i1, i1 addrspace(1)* undef
11  ret i1 %val
12}
13
14; FIXME: Missing and?
15; GCN-LABEL: {{^}}i1_zeroext_func_void:
16; GCN: buffer_load_ubyte v0, off
17; GCN-NEXT: s_waitcnt vmcnt(0)
18; GCN-NEXT: s_setpc_b64
19define zeroext i1 @i1_zeroext_func_void() #0 {
20  %val = load i1, i1 addrspace(1)* undef
21  ret i1 %val
22}
23
24; GCN-LABEL: {{^}}i1_signext_func_void:
25; GCN: buffer_load_ubyte v0, off
26; GCN-NEXT: s_waitcnt vmcnt(0)
27; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1{{$}}
28; GCN-NEXT: s_setpc_b64
29define signext i1 @i1_signext_func_void() #0 {
30  %val = load i1, i1 addrspace(1)* undef
31  ret i1 %val
32}
33
34; GCN-LABEL: {{^}}i8_func_void:
35; GCN: buffer_load_ubyte v0, off
36; GCN-NEXT: s_waitcnt vmcnt(0)
37; GCN-NEXT: s_setpc_b64
38define i8 @i8_func_void() #0 {
39  %val = load i8, i8 addrspace(1)* undef
40  ret i8 %val
41}
42
43; GCN-LABEL: {{^}}i8_zeroext_func_void:
44; GCN: buffer_load_ubyte v0, off
45; GCN-NEXT: s_waitcnt vmcnt(0)
46; GCN-NEXT: s_setpc_b64
47define zeroext i8 @i8_zeroext_func_void() #0 {
48  %val = load i8, i8 addrspace(1)* undef
49  ret i8 %val
50}
51
52; GCN-LABEL: {{^}}i8_signext_func_void:
53; GCN: buffer_load_sbyte v0, off
54; GCN-NEXT: s_waitcnt vmcnt(0)
55; GCN-NEXT: s_setpc_b64
56define signext i8 @i8_signext_func_void() #0 {
57  %val = load i8, i8 addrspace(1)* undef
58  ret i8 %val
59}
60
61; GCN-LABEL: {{^}}i16_func_void:
62; GCN: buffer_load_ushort v0, off
63; GCN-NEXT: s_waitcnt vmcnt(0)
64; GCN-NEXT: s_setpc_b64
65define i16 @i16_func_void() #0 {
66  %val = load i16, i16 addrspace(1)* undef
67  ret i16 %val
68}
69
70; GCN-LABEL: {{^}}i16_zeroext_func_void:
71; GCN: buffer_load_ushort v0, off
72; GCN-NEXT: s_waitcnt vmcnt(0)
73; GCN-NEXT: s_setpc_b64
74define zeroext i16 @i16_zeroext_func_void() #0 {
75  %val = load i16, i16 addrspace(1)* undef
76  ret i16 %val
77}
78
79; GCN-LABEL: {{^}}i16_signext_func_void:
80; GCN: buffer_load_sshort v0, off
81; GCN-NEXT: s_waitcnt vmcnt(0)
82; GCN-NEXT: s_setpc_b64
83define signext i16 @i16_signext_func_void() #0 {
84  %val = load i16, i16 addrspace(1)* undef
85  ret i16 %val
86}
87
88; GCN-LABEL: {{^}}i32_func_void:
89; GCN: buffer_load_dword v0, off
90; GCN-NEXT: s_waitcnt vmcnt(0)
91; GCN-NEXT: s_setpc_b64
92define i32 @i32_func_void() #0 {
93  %val = load i32, i32 addrspace(1)* undef
94  ret i32 %val
95}
96
97; GCN-LABEL: {{^}}i48_func_void:
98; GCN: buffer_load_dword v0, off
99; GCN-NEXT: buffer_load_ushort v1, off
100; GCN-NEXT: s_waitcnt vmcnt(0)
101; GCN-NEXT: s_setpc_b64
102define i48 @i48_func_void() #0 {
103  %val = load i48, i48 addrspace(1)* undef, align 8
104  ret i48 %val
105}
106
107; GCN-LABEL: {{^}}i48_zeroext_func_void:
108; GCN: buffer_load_dword v0, off
109; GCN-NEXT: buffer_load_ushort v1, off
110; GCN-NEXT: s_waitcnt vmcnt(0)
111; GCN-NEXT: s_setpc_b64
112define zeroext i48 @i48_zeroext_func_void() #0 {
113  %val = load i48, i48 addrspace(1)* undef, align 8
114  ret i48 %val
115}
116
117; GCN-LABEL: {{^}}i48_signext_func_void:
118; GCN: buffer_load_dword v0, off
119; GCN-NEXT: buffer_load_sshort v1, off
120; GCN-NEXT: s_waitcnt vmcnt(0)
121; GCN-NEXT: s_setpc_b64
122define signext i48 @i48_signext_func_void() #0 {
123  %val = load i48, i48 addrspace(1)* undef, align 8
124  ret i48 %val
125}
126
127; GCN-LABEL: {{^}}i63_func_void:
128; GCN: s_waitcnt
129; GCN-NEXT: s_setpc_b64
130define i63 @i63_func_void(i63 %val) #0 {
131  ret i63 %val
132}
133
134; GCN-LABEL: {{^}}i63_zeroext_func_void:
135; GCN: s_waitcnt
136; GCN-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
137; GCN-NEXT: s_setpc_b64
138define zeroext i63 @i63_zeroext_func_void(i63 %val) #0 {
139  ret i63 %val
140}
141
142; GCN-LABEL: {{^}}i63_signext_func_void:
143; GCN: s_waitcnt
144; CI-NEXT:	v_lshl_b64 v[0:1], v[0:1], 1
145; CI-NEXT: v_ashr_i64 v[0:1], v[0:1], 1
146
147; GFX89-NEXT:	v_lshlrev_b64 v[0:1], 1, v[0:1]
148; GFX89-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1]
149
150; GCN-NEXT: s_setpc_b64
151define signext i63 @i63_signext_func_void(i63 %val) #0 {
152  ret i63 %val
153}
154
155; GCN-LABEL: {{^}}i64_func_void:
156; GCN: buffer_load_dwordx2 v[0:1], off
157; GCN-NEXT: s_waitcnt vmcnt(0)
158; GCN-NEXT: s_setpc_b64
159define i64 @i64_func_void() #0 {
160  %val = load i64, i64 addrspace(1)* undef
161  ret i64 %val
162}
163
164; GCN-LABEL: {{^}}i65_func_void:
165; GCN-DAG: buffer_load_dwordx2 v[0:1], off
166; GCN-DAG: buffer_load_ubyte v2, off
167; GCN: s_waitcnt vmcnt(0)
168; GCN-NEXT: s_setpc_b64
169define i65 @i65_func_void() #0 {
170  %val = load i65, i65 addrspace(1)* undef
171  ret i65 %val
172}
173
174; GCN-LABEL: {{^}}f32_func_void:
175; GCN: buffer_load_dword v0, off, s[4:7], 0
176; GCN-NEXT: s_waitcnt vmcnt(0)
177; GCN-NEXT: s_setpc_b64
178define float @f32_func_void() #0 {
179  %val = load float, float addrspace(1)* undef
180  ret float %val
181}
182
183; GCN-LABEL: {{^}}f64_func_void:
184; GCN: buffer_load_dwordx2 v[0:1], off
185; GCN-NEXT: s_waitcnt vmcnt(0)
186; GCN-NEXT: s_setpc_b64
187define double @f64_func_void() #0 {
188  %val = load double, double addrspace(1)* undef
189  ret double %val
190}
191
192; GCN-LABEL: {{^}}v2f64_func_void:
193; GCN: buffer_load_dwordx4 v[0:3], off
194; GCN-NEXT: s_waitcnt vmcnt(0)
195; GCN-NEXT: s_setpc_b64
196define <2 x double> @v2f64_func_void() #0 {
197  %val = load <2 x double>, <2 x double> addrspace(1)* undef
198  ret <2 x double> %val
199}
200
201; GCN-LABEL: {{^}}v2i32_func_void:
202; GCN: buffer_load_dwordx2 v[0:1], off
203; GCN-NEXT: s_waitcnt vmcnt(0)
204; GCN-NEXT: s_setpc_b64
205define <2 x i32> @v2i32_func_void() #0 {
206  %val = load <2 x i32>, <2 x i32> addrspace(1)* undef
207  ret <2 x i32> %val
208}
209
210; GCN-LABEL: {{^}}v3i32_func_void:
211; GCN: buffer_load_dwordx3 v[0:2], off
212; GCN-NEXT: s_waitcnt vmcnt(0)
213; GCN-NEXT: s_setpc_b64
214define <3 x i32> @v3i32_func_void() #0 {
215  %val = load <3 x i32>, <3 x i32> addrspace(1)* undef
216  ret <3 x i32> %val
217}
218
219; GCN-LABEL: {{^}}v4i32_func_void:
220; GCN: buffer_load_dwordx4 v[0:3], off
221; GCN-NEXT: s_waitcnt vmcnt(0)
222; GCN-NEXT: s_setpc_b64
223define <4 x i32> @v4i32_func_void() #0 {
224  %val = load <4 x i32>, <4 x i32> addrspace(1)* undef
225  ret <4 x i32> %val
226}
227
228; GCN-LABEL: {{^}}v5i32_func_void:
229; GCN-DAG: buffer_load_dword v4, off
230; GCN-DAG: buffer_load_dwordx4 v[0:3], off
231; GCN: s_waitcnt vmcnt(0)
232; GCN-NEXT: s_setpc_b64
233define <5 x i32> @v5i32_func_void() #0 {
234  %val = load volatile <5 x i32>, <5 x i32> addrspace(1)* undef
235  ret <5 x i32> %val
236}
237
238; GCN-LABEL: {{^}}v8i32_func_void:
239; GCN-DAG: buffer_load_dwordx4 v[0:3], off
240; GCN-DAG: buffer_load_dwordx4 v[4:7], off
241; GCN: s_waitcnt vmcnt(0)
242; GCN-NEXT: s_setpc_b64
243define <8 x i32> @v8i32_func_void() #0 {
244  %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef
245  %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr
246  ret <8 x i32> %val
247}
248
249; GCN-LABEL: {{^}}v16i32_func_void:
250; GCN-DAG: buffer_load_dwordx4 v[0:3], off
251; GCN-DAG: buffer_load_dwordx4 v[4:7], off
252; GCN-DAG: buffer_load_dwordx4 v[8:11], off
253; GCN-DAG: buffer_load_dwordx4 v[12:15], off
254; GCN: s_waitcnt vmcnt(0)
255; GCN-NEXT: s_setpc_b64
256define <16 x i32> @v16i32_func_void() #0 {
257  %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef
258  %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr
259  ret <16 x i32> %val
260}
261
262; GCN-LABEL: {{^}}v32i32_func_void:
263; GCN-DAG: buffer_load_dwordx4 v[0:3], off
264; GCN-DAG: buffer_load_dwordx4 v[4:7], off
265; GCN-DAG: buffer_load_dwordx4 v[8:11], off
266; GCN-DAG: buffer_load_dwordx4 v[12:15], off
267; GCN-DAG: buffer_load_dwordx4 v[16:19], off
268; GCN-DAG: buffer_load_dwordx4 v[20:23], off
269; GCN-DAG: buffer_load_dwordx4 v[24:27], off
270; GCN-DAG: buffer_load_dwordx4 v[28:31], off
271; GCN: s_waitcnt vmcnt(0)
272; GCN-NEXT: s_setpc_b64
273define <32 x i32> @v32i32_func_void() #0 {
274  %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
275  %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr
276  ret <32 x i32> %val
277}
278
279; GCN-LABEL: {{^}}v2i64_func_void:
280; GCN: buffer_load_dwordx4 v[0:3], off
281; GCN-NEXT: s_waitcnt vmcnt(0)
282; GCN-NEXT: s_setpc_b64
283define <2 x i64> @v2i64_func_void() #0 {
284  %val = load <2 x i64>, <2 x i64> addrspace(1)* undef
285  ret <2 x i64> %val
286}
287
288; GCN-LABEL: {{^}}v3i64_func_void:
289; GCN-DAG: buffer_load_dwordx4 v[0:3], off
290; GCN-DAG: buffer_load_dwordx2 v[4:5], off
291; GCN: s_waitcnt vmcnt(0)
292; GCN-NEXT: s_setpc_b64
293define <3 x i64> @v3i64_func_void() #0 {
294  %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(4)* undef
295  %val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr
296  ret <3 x i64> %val
297}
298
299; GCN-LABEL: {{^}}v4i64_func_void:
300; GCN: buffer_load_dwordx4 v[0:3], off
301; GCN: buffer_load_dwordx4 v[4:7], off
302; GCN-NEXT: s_waitcnt vmcnt(0)
303; GCN-NEXT: s_setpc_b64
304define <4 x i64> @v4i64_func_void() #0 {
305  %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(4)* undef
306  %val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr
307  ret <4 x i64> %val
308}
309
310; GCN-LABEL: {{^}}v5i64_func_void:
311; GCN-DAG: buffer_load_dwordx4 v[0:3], off
312; GCN-DAG: buffer_load_dwordx4 v[4:7], off
313; GCN-DAG: buffer_load_dwordx4 v[8:11], off
314; GCN: s_waitcnt vmcnt(0)
315; GCN-NEXT: s_setpc_b64
316define <5 x i64> @v5i64_func_void() #0 {
317  %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(4)* undef
318  %val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr
319  ret <5 x i64> %val
320}
321
322; GCN-LABEL: {{^}}v8i64_func_void:
323; GCN-DAG: buffer_load_dwordx4 v[0:3], off
324; GCN-DAG: buffer_load_dwordx4 v[4:7], off
325; GCN-DAG: buffer_load_dwordx4 v[8:11], off
326; GCN-DAG: buffer_load_dwordx4 v[12:15], off
327; GCN: s_waitcnt vmcnt(0)
328; GCN-NEXT: s_setpc_b64
329define <8 x i64> @v8i64_func_void() #0 {
330  %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(4)* undef
331  %val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr
332  ret <8 x i64> %val
333}
334
335; GCN-LABEL: {{^}}v16i64_func_void:
336; GCN-DAG: buffer_load_dwordx4 v[0:3], off
337; GCN-DAG: buffer_load_dwordx4 v[4:7], off
338; GCN-DAG: buffer_load_dwordx4 v[8:11], off
339; GCN-DAG: buffer_load_dwordx4 v[12:15], off
340; GCN-DAG: buffer_load_dwordx4 v[16:19], off
341; GCN-DAG: buffer_load_dwordx4 v[20:23], off
342; GCN-DAG: buffer_load_dwordx4 v[24:27], off
343; GCN-DAG: buffer_load_dwordx4 v[28:31], off
344; GCN: s_waitcnt vmcnt(0)
345; GCN-NEXT: s_setpc_b64
346define <16 x i64> @v16i64_func_void() #0 {
347  %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(4)* undef
348  %val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr
349  ret <16 x i64> %val
350}
351
352; GCN-LABEL: {{^}}v2i16_func_void:
353; GFX9: buffer_load_dword v0, off
354; GFX9-NEXT: s_waitcnt vmcnt(0)
355; GFX9-NEXT: s_setpc_b64
356define <2 x i16> @v2i16_func_void() #0 {
357  %val = load <2 x i16>, <2 x i16> addrspace(1)* undef
358  ret <2 x i16> %val
359}
360
361; GCN-LABEL: {{^}}v3i16_func_void:
362; GFX9: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off
363; GFX9-NEXT: s_waitcnt vmcnt(0)
364; GFX9-NEXT: s_setpc_b64
365define <3 x i16> @v3i16_func_void() #0 {
366  %val = load <3 x i16>, <3 x i16> addrspace(1)* undef
367  ret <3 x i16> %val
368}
369
370; GCN-LABEL: {{^}}v4i16_func_void:
371; GFX9: buffer_load_dwordx2 v[0:1], off
372; GFX9-NEXT: s_waitcnt vmcnt(0)
373; GFX9-NEXT: s_setpc_b64
374define <4 x i16> @v4i16_func_void() #0 {
375  %val = load <4 x i16>, <4 x i16> addrspace(1)* undef
376  ret <4 x i16> %val
377}
378
379; GCN-LABEL: {{^}}v4f16_func_void:
380; GFX9: buffer_load_dwordx2 v[0:1], off
381; GFX9-NEXT: s_waitcnt vmcnt(0)
382; GFX9-NEXT: s_setpc_b64
383define <4 x half> @v4f16_func_void() #0 {
384  %val = load <4 x half>, <4 x half> addrspace(1)* undef
385  ret <4 x half> %val
386}
387
388; FIXME: Mixing buffer and global
389; FIXME: Should not scalarize
390; GCN-LABEL: {{^}}v5i16_func_void:
391; GFX9: buffer_load_dwordx4 v[0:3]
392; GFX9-NEXT: s_waitcnt
393; GFX9-NEXT: s_setpc_b64
394define <5 x i16> @v5i16_func_void() #0 {
395  %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef
396  %val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr
397  ret <5 x i16> %val
398}
399
400; GCN-LABEL: {{^}}v8i16_func_void:
401; GFX9-DAG: buffer_load_dwordx4 v[0:3], off
402; GFX9: s_waitcnt vmcnt(0)
403; GFX9-NEXT: s_setpc_b64
404define <8 x i16> @v8i16_func_void() #0 {
405  %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(4)* undef
406  %val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
407  ret <8 x i16> %val
408}
409
410; GCN-LABEL: {{^}}v16i16_func_void:
411; GFX9: buffer_load_dwordx4 v[0:3], off
412; GFX9: buffer_load_dwordx4 v[4:7], off
413; GFX9: s_waitcnt vmcnt(0)
414; GFX9-NEXT: s_setpc_b64
415define <16 x i16> @v16i16_func_void() #0 {
416  %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(4)* undef
417  %val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr
418  ret <16 x i16> %val
419}
420
421; FIXME: Should pack
422; GCN-LABEL: {{^}}v16i8_func_void:
423; GCN-DAG: v12
424; GCN-DAG: v13
425; GCN-DAG: v14
426; GCN-DAG: v15
427define <16 x i8> @v16i8_func_void() #0 {
428  %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef
429  %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
430  ret <16 x i8> %val
431}
432
433; FIXME: Should pack
434; GCN-LABEL: {{^}}v4i8_func_void:
435; GCN: buffer_load_dword v0
436; GCN-DAG: v_lshrrev_b32_e32 v1, 8, v0
437; GCN-DAG: v_lshrrev_b32_e32 v2, 16, v0
438; GCN-DAG: v_lshrrev_b32_e32 v3, 24, v0
439; GCN: s_setpc_b64
440define <4  x i8> @v4i8_func_void() #0 {
441  %ptr = load volatile <4  x i8> addrspace(1)*, <4  x i8> addrspace(1)* addrspace(4)* undef
442  %val = load <4  x i8>, <4  x i8> addrspace(1)* %ptr
443  ret <4  x i8> %val
444}
445
446; GCN-LABEL: {{^}}struct_i8_i32_func_void:
447; GCN-DAG: buffer_load_dword v1
448; GCN-DAG: buffer_load_ubyte v0
449; GCN: s_waitcnt vmcnt(0)
450; GCN-NEXT: s_setpc_b64
451define {i8, i32} @struct_i8_i32_func_void() #0 {
452  %val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef
453  ret { i8, i32 } %val
454}
455
456; GCN-LABEL: {{^}}void_func_sret_struct_i8_i32:
457; GCN: buffer_load_ubyte [[VAL0:v[0-9]+]]
458; GCN: buffer_load_dword [[VAL1:v[0-9]+]]
459; GCN: buffer_store_byte [[VAL0]], v0, s[0:3], 0 offen{{$}}
460; GCN: buffer_store_dword [[VAL1]], v0, s[0:3], 0 offen offset:4{{$}}
461define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret({ i8, i32 }) %arg0) #0 {
462  %val0 = load volatile i8, i8 addrspace(1)* undef
463  %val1 = load volatile i32, i32 addrspace(1)* undef
464  %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0
465  %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1
466  store i8 %val0, i8 addrspace(5)* %gep0
467  store i32 %val1, i32 addrspace(5)* %gep1
468  ret void
469}
470
471; FIXME: Should be able to fold offsets in all of these pre-gfx9. Call
472; lowering introduces an extra CopyToReg/CopyFromReg obscuring the
473; AssertZext inserted. Not using it introduces the spills.
474
475; GCN-LABEL: {{^}}v33i32_func_void:
476; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen{{$}}
477; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:4{{$}}
478; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:8{{$}}
479; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:12{{$}}
480; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:16{{$}}
481; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:20{{$}}
482; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:24{{$}}
483; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:28{{$}}
484; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:32{{$}}
485; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:36{{$}}
486; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:40{{$}}
487; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:44{{$}}
488; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:48{{$}}
489; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:52{{$}}
490; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:56{{$}}
491; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:60{{$}}
492; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:64{{$}}
493; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:68{{$}}
494; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:72{{$}}
495; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:76{{$}}
496; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:80{{$}}
497; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:84{{$}}
498; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:88{{$}}
499; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:92{{$}}
500; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:96{{$}}
501; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:100{{$}}
502; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:104{{$}}
503; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:108{{$}}
504; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:112{{$}}
505; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:116{{$}}
506; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:120{{$}}
507; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:124{{$}}
508; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:128{{$}}
509; GFX9: s_waitcnt vmcnt(0)
510; GFX9-NEXT: s_setpc_b64
511define <33 x i32> @v33i32_func_void() #0 {
512  %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef
513  %val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr
514  ret <33 x i32> %val
515}
516
517; GCN-LABEL: {{^}}struct_v32i32_i32_func_void:
518; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen{{$}}
519; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:4{{$}}
520; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:8{{$}}
521; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:12{{$}}
522; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:16{{$}}
523; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:20{{$}}
524; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:24{{$}}
525; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:28{{$}}
526; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:32{{$}}
527; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:36{{$}}
528; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:40{{$}}
529; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:44{{$}}
530; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:48{{$}}
531; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:52{{$}}
532; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:56{{$}}
533; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:60{{$}}
534; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:64{{$}}
535; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:68{{$}}
536; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:72{{$}}
537; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:76{{$}}
538; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:80{{$}}
539; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:84{{$}}
540; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:88{{$}}
541; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:92{{$}}
542; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:96{{$}}
543; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:100{{$}}
544; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:104{{$}}
545; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:108{{$}}
546; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:112{{$}}
547; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:116{{$}}
548; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:120{{$}}
549; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:124{{$}}
550; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:128{{$}}
551; GFX9: s_waitcnt vmcnt(0)
552; GFX9-NEXT: s_setpc_b64
553define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 {
554  %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef
555  %val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr
556  ret { <32 x i32>, i32 }%val
557}
558
559; GCN-LABEL: {{^}}struct_i32_v32i32_func_void:
560; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen{{$}}
561; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:128{{$}}
562; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:132{{$}}
563; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:136{{$}}
564; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:140{{$}}
565; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:144{{$}}
566; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:148{{$}}
567; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:152{{$}}
568; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:156{{$}}
569; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:160{{$}}
570; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:164{{$}}
571; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:168{{$}}
572; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:172{{$}}
573; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:176{{$}}
574; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:180{{$}}
575; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:184{{$}}
576; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:188{{$}}
577; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:192{{$}}
578; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:196{{$}}
579; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:200{{$}}
580; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:204{{$}}
581; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:208{{$}}
582; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:212{{$}}
583; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:216{{$}}
584; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:220{{$}}
585; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:224{{$}}
586; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:228{{$}}
587; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:232{{$}}
588; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:236{{$}}
589; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:240{{$}}
590; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:244{{$}}
591; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:248{{$}}
592; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:252{{$}}
593; GFX9: s_waitcnt vmcnt(0)
594; GFX9-NEXT: s_setpc_b64
595define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 {
596  %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef
597  %val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr
598  ret { i32, <32 x i32> }%val
599}
600
601; Make sure the last struct component is returned in v3, not v4.
602; GCN-LABEL: {{^}}v3i32_struct_func_void_wasted_reg:
603; GCN: ds_read_b32 v0,
604; GCN: ds_read_b32 v1,
605; GCN: ds_read_b32 v2,
606; GCN: ds_read_b32 v3,
607define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 {
608  %load0 = load volatile i32, i32 addrspace(3)* undef
609  %load1 = load volatile i32, i32 addrspace(3)* undef
610  %load2 = load volatile i32, i32 addrspace(3)* undef
611  %load3 = load volatile i32, i32 addrspace(3)* undef
612
613  %insert.0 = insertelement <3 x i32> undef, i32 %load0, i32 0
614  %insert.1 = insertelement <3 x i32> %insert.0, i32 %load1, i32 1
615  %insert.2 = insertelement <3 x i32> %insert.1, i32 %load2, i32 2
616  %insert.3 = insertvalue { <3 x i32>, i32 } undef, <3 x i32> %insert.2, 0
617  %insert.4 = insertvalue { <3 x i32>, i32 } %insert.3, i32 %load3, 1
618  ret { <3 x i32>, i32 } %insert.4
619}
620
621; GCN-LABEL: {{^}}v3f32_struct_func_void_wasted_reg:
622; GCN: ds_read_b32 v0,
623; GCN: ds_read_b32 v1,
624; GCN: ds_read_b32 v2,
625; GCN: ds_read_b32 v3,
626define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 {
627  %load0 = load volatile float, float addrspace(3)* undef
628  %load1 = load volatile float, float addrspace(3)* undef
629  %load2 = load volatile float, float addrspace(3)* undef
630  %load3 = load volatile i32, i32 addrspace(3)* undef
631
632  %insert.0 = insertelement <3 x float> undef, float %load0, i32 0
633  %insert.1 = insertelement <3 x float> %insert.0, float %load1, i32 1
634  %insert.2 = insertelement <3 x float> %insert.1, float %load2, i32 2
635  %insert.3 = insertvalue { <3 x float>, i32 } undef, <3 x float> %insert.2, 0
636  %insert.4 = insertvalue { <3 x float>, i32 } %insert.3, i32 %load3, 1
637  ret { <3 x float>, i32 } %insert.4
638}
639
640; GCN-LABEL: {{^}}void_func_sret_max_known_zero_bits:
641; GCN: v_lshrrev_b32_e32 [[LSHR16:v[0-9]+]], 16, v0
642; GCN: ds_write_b32 {{v[0-9]+}}, [[LSHR16]]
643
644; GCN: v_mov_b32_e32 [[HIGH_BITS:v[0-9]+]], 0
645; GCN: ds_write_b32 {{v[0-9]+}}, [[HIGH_BITS]]
646; GCN-NEXT: ds_write_b32 {{v[0-9]+}}, [[HIGH_BITS]]
647define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret(i8) %arg0) #0 {
648  %arg0.int = ptrtoint i8 addrspace(5)* %arg0 to i32
649
650  %lshr0 = lshr i32 %arg0.int, 16
651  %lshr1 = lshr i32 %arg0.int, 17
652  %lshr2 = lshr i32 %arg0.int, 18
653
654  store volatile i32 %lshr0, i32 addrspace(3)* undef
655  store volatile i32 %lshr1, i32 addrspace(3)* undef
656  store volatile i32 %lshr2, i32 addrspace(3)* undef
657  ret void
658}
659
660attributes #0 = { nounwind }
661