1; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs < %s | FileCheck  -enable-var-scope -check-prefixes=GCN,CI %s
2; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89 %s
3; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s
4
5; GCN-LABEL: {{^}}i1_func_void:
6; GCN: buffer_load_ubyte v0, off
7; GCN-NEXT: s_setpc_b64
8define i1 @i1_func_void() #0 {
9  %val = load i1, i1 addrspace(1)* undef
10  ret i1 %val
11}
12
13; FIXME: Missing and?
14; GCN-LABEL: {{^}}i1_zeroext_func_void:
15; GCN: buffer_load_ubyte v0, off
16; GCN-NEXT: s_setpc_b64
17define zeroext i1 @i1_zeroext_func_void() #0 {
18  %val = load i1, i1 addrspace(1)* undef
19  ret i1 %val
20}
21
22; GCN-LABEL: {{^}}i1_signext_func_void:
23; GCN: buffer_load_ubyte v0, off
24; GCN-NEXT: s_waitcnt vmcnt(0)
25; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1{{$}}
26; GCN-NEXT: s_setpc_b64
27define signext i1 @i1_signext_func_void() #0 {
28  %val = load i1, i1 addrspace(1)* undef
29  ret i1 %val
30}
31
32; GCN-LABEL: {{^}}i8_func_void:
33; GCN: buffer_load_ubyte v0, off
34; GCN-NEXT: s_setpc_b64
35define i8 @i8_func_void() #0 {
36  %val = load i8, i8 addrspace(1)* undef
37  ret i8 %val
38}
39
40; GCN-LABEL: {{^}}i8_zeroext_func_void:
41; GCN: buffer_load_ubyte v0, off
42; GCN-NEXT: s_setpc_b64
43define zeroext i8 @i8_zeroext_func_void() #0 {
44  %val = load i8, i8 addrspace(1)* undef
45  ret i8 %val
46}
47
48; GCN-LABEL: {{^}}i8_signext_func_void:
49; GCN: buffer_load_sbyte v0, off
50; GCN-NEXT: s_setpc_b64
51define signext i8 @i8_signext_func_void() #0 {
52  %val = load i8, i8 addrspace(1)* undef
53  ret i8 %val
54}
55
56; GCN-LABEL: {{^}}i16_func_void:
57; GCN: buffer_load_ushort v0, off
58; GCN-NEXT: s_setpc_b64
59define i16 @i16_func_void() #0 {
60  %val = load i16, i16 addrspace(1)* undef
61  ret i16 %val
62}
63
64; GCN-LABEL: {{^}}i16_zeroext_func_void:
65; GCN: buffer_load_ushort v0, off
66; GCN-NEXT: s_setpc_b64
67define zeroext i16 @i16_zeroext_func_void() #0 {
68  %val = load i16, i16 addrspace(1)* undef
69  ret i16 %val
70}
71
72; GCN-LABEL: {{^}}i16_signext_func_void:
73; GCN: buffer_load_sshort v0, off
74; GCN-NEXT: s_setpc_b64
75define signext i16 @i16_signext_func_void() #0 {
76  %val = load i16, i16 addrspace(1)* undef
77  ret i16 %val
78}
79
80; GCN-LABEL: {{^}}i32_func_void:
81; GCN: buffer_load_dword v0, off
82; GCN-NEXT: s_setpc_b64
83define i32 @i32_func_void() #0 {
84  %val = load i32, i32 addrspace(1)* undef
85  ret i32 %val
86}
87
88; GCN-LABEL: {{^}}i48_func_void:
89; GCN: buffer_load_dword v0, off
90; GCN-NEXT: buffer_load_ushort v1, off
91; GCN-NEXT: s_setpc_b64
92define i48 @i48_func_void() #0 {
93  %val = load i48, i48 addrspace(1)* undef, align 8
94  ret i48 %val
95}
96
97; GCN-LABEL: {{^}}i48_zeroext_func_void:
98; GCN: buffer_load_dword v0, off
99; GCN-NEXT: buffer_load_ushort v1, off
100; GCN-NEXT: s_setpc_b64
101define zeroext i48 @i48_zeroext_func_void() #0 {
102  %val = load i48, i48 addrspace(1)* undef, align 8
103  ret i48 %val
104}
105
106; GCN-LABEL: {{^}}i48_signext_func_void:
107; GCN: buffer_load_dword v0, off
108; GCN-NEXT: buffer_load_sshort v1, off
109; GCN-NEXT: s_setpc_b64
110define signext i48 @i48_signext_func_void() #0 {
111  %val = load i48, i48 addrspace(1)* undef, align 8
112  ret i48 %val
113}
114
115; GCN-LABEL: {{^}}i63_func_void:
116define i63 @i63_func_void(i63 %val) #0 {
117  ret i63 %val
118}
119
120; GCN-LABEL: {{^}}i63_zeroext_func_void:
121; GCN: s_waitcnt
122; GCN-NEXT: v_and_b32_e32 v1, 0x7fffffff, v1
123; GCN-NEXT: s_setpc_b64
124define zeroext i63 @i63_zeroext_func_void(i63 %val) #0 {
125  ret i63 %val
126}
127
128; GCN-LABEL: {{^}}i63_signext_func_void:
129; GCN: s_waitcnt
130; CI-NEXT:	v_lshl_b64 v[0:1], v[0:1], 1
131; CI-NEXT: v_ashr_i64 v[0:1], v[0:1], 1
132
133; GFX89-NEXT:	v_lshlrev_b64 v[0:1], 1, v[0:1]
134; GFX89-NEXT: v_ashrrev_i64 v[0:1], 1, v[0:1]
135
136; GCN-NEXT: s_setpc_b64
137define signext i63 @i63_signext_func_void(i63 %val) #0 {
138  ret i63 %val
139}
140
141; GCN-LABEL: {{^}}i64_func_void:
142; GCN: buffer_load_dwordx2 v[0:1], off
143; GCN-NEXT: s_setpc_b64
144define i64 @i64_func_void() #0 {
145  %val = load i64, i64 addrspace(1)* undef
146  ret i64 %val
147}
148
149; GCN-LABEL: {{^}}i65_func_void:
150; GCN-DAG: buffer_load_dwordx2 v[0:1], off
151; GCN-DAG: buffer_load_ubyte v2, off
152; GCN-NEXT: s_setpc_b64
153define i65 @i65_func_void() #0 {
154  %val = load i65, i65 addrspace(1)* undef
155  ret i65 %val
156}
157
158; GCN-LABEL: {{^}}f32_func_void:
159; GCN: buffer_load_dword v0, off, s[4:7], 0
160; GCN-NEXT: s_setpc_b64
161define float @f32_func_void() #0 {
162  %val = load float, float addrspace(1)* undef
163  ret float %val
164}
165
166; GCN-LABEL: {{^}}f64_func_void:
167; GCN: buffer_load_dwordx2 v[0:1], off
168; GCN-NEXT: s_setpc_b64
169define double @f64_func_void() #0 {
170  %val = load double, double addrspace(1)* undef
171  ret double %val
172}
173
174; GCN-LABEL: {{^}}v2f64_func_void:
175; GCN: buffer_load_dwordx4 v[0:3], off
176; GCN-NEXT: s_setpc_b64
177define <2 x double> @v2f64_func_void() #0 {
178  %val = load <2 x double>, <2 x double> addrspace(1)* undef
179  ret <2 x double> %val
180}
181
182; GCN-LABEL: {{^}}v2i32_func_void:
183; GCN: buffer_load_dwordx2 v[0:1], off
184; GCN-NEXT: s_setpc_b64
185define <2 x i32> @v2i32_func_void() #0 {
186  %val = load <2 x i32>, <2 x i32> addrspace(1)* undef
187  ret <2 x i32> %val
188}
189
190; GCN-LABEL: {{^}}v3i32_func_void:
191; GCN: buffer_load_dwordx3 v[0:2], off
192; GCN-NEXT: s_setpc_b64
193define <3 x i32> @v3i32_func_void() #0 {
194  %val = load <3 x i32>, <3 x i32> addrspace(1)* undef
195  ret <3 x i32> %val
196}
197
198; GCN-LABEL: {{^}}v4i32_func_void:
199; GCN: buffer_load_dwordx4 v[0:3], off
200; GCN-NEXT: s_setpc_b64
201define <4 x i32> @v4i32_func_void() #0 {
202  %val = load <4 x i32>, <4 x i32> addrspace(1)* undef
203  ret <4 x i32> %val
204}
205
206; GCN-LABEL: {{^}}v5i32_func_void:
207; GCN-DAG: buffer_load_dword v4, off
208; GCN-DAG: buffer_load_dwordx4 v[0:3], off
209; GCN-NEXT: s_setpc_b64
210define <5 x i32> @v5i32_func_void() #0 {
211  %val = load volatile <5 x i32>, <5 x i32> addrspace(1)* undef
212  ret <5 x i32> %val
213}
214
215; GCN-LABEL: {{^}}v8i32_func_void:
216; GCN-DAG: buffer_load_dwordx4 v[0:3], off
217; GCN-DAG: buffer_load_dwordx4 v[4:7], off
218; GCN-NEXT: s_setpc_b64
219define <8 x i32> @v8i32_func_void() #0 {
220  %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef
221  %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr
222  ret <8 x i32> %val
223}
224
225; GCN-LABEL: {{^}}v16i32_func_void:
226; GCN-DAG: buffer_load_dwordx4 v[0:3], off
227; GCN-DAG: buffer_load_dwordx4 v[4:7], off
228; GCN-DAG: buffer_load_dwordx4 v[8:11], off
229; GCN-DAG: buffer_load_dwordx4 v[12:15], off
230; GCN-NEXT: s_setpc_b64
231define <16 x i32> @v16i32_func_void() #0 {
232  %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef
233  %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr
234  ret <16 x i32> %val
235}
236
237; GCN-LABEL: {{^}}v32i32_func_void:
238; GCN-DAG: buffer_load_dwordx4 v[0:3], off
239; GCN-DAG: buffer_load_dwordx4 v[4:7], off
240; GCN-DAG: buffer_load_dwordx4 v[8:11], off
241; GCN-DAG: buffer_load_dwordx4 v[12:15], off
242; GCN-DAG: buffer_load_dwordx4 v[16:19], off
243; GCN-DAG: buffer_load_dwordx4 v[20:23], off
244; GCN-DAG: buffer_load_dwordx4 v[24:27], off
245; GCN-DAG: buffer_load_dwordx4 v[28:31], off
246; GCN-NEXT: s_setpc_b64
247define <32 x i32> @v32i32_func_void() #0 {
248  %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
249  %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr
250  ret <32 x i32> %val
251}
252
253; GCN-LABEL: {{^}}v2i64_func_void:
254; GCN: buffer_load_dwordx4 v[0:3], off
255; GCN-NEXT: s_setpc_b64
256define <2 x i64> @v2i64_func_void() #0 {
257  %val = load <2 x i64>, <2 x i64> addrspace(1)* undef
258  ret <2 x i64> %val
259}
260
261; GCN-LABEL: {{^}}v3i64_func_void:
262; GCN-DAG: buffer_load_dwordx4 v[0:3], off
263; GCN-DAG: buffer_load_dwordx4 v[4:7], off
264; GCN-NEXT: s_setpc_b64
265define <3 x i64> @v3i64_func_void() #0 {
266  %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(4)* undef
267  %val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr
268  ret <3 x i64> %val
269}
270
271; GCN-LABEL: {{^}}v4i64_func_void:
272; GCN: buffer_load_dwordx4 v[0:3], off
273; GCN: buffer_load_dwordx4 v[4:7], off
274; GCN-NEXT: s_setpc_b64
275define <4 x i64> @v4i64_func_void() #0 {
276  %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(4)* undef
277  %val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr
278  ret <4 x i64> %val
279}
280
281; GCN-LABEL: {{^}}v5i64_func_void:
282; GCN-DAG: buffer_load_dwordx4 v[0:3], off
283; GCN-DAG: buffer_load_dwordx4 v[4:7], off
284; GCN-DAG: buffer_load_dwordx4 v[8:11], off
285; GCN-NEXT: s_setpc_b64
286define <5 x i64> @v5i64_func_void() #0 {
287  %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(4)* undef
288  %val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr
289  ret <5 x i64> %val
290}
291
292; GCN-LABEL: {{^}}v8i64_func_void:
293; GCN-DAG: buffer_load_dwordx4 v[0:3], off
294; GCN-DAG: buffer_load_dwordx4 v[4:7], off
295; GCN-DAG: buffer_load_dwordx4 v[8:11], off
296; GCN-DAG: buffer_load_dwordx4 v[12:15], off
297; GCN-NEXT: s_setpc_b64
298define <8 x i64> @v8i64_func_void() #0 {
299  %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(4)* undef
300  %val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr
301  ret <8 x i64> %val
302}
303
304; GCN-LABEL: {{^}}v16i64_func_void:
305; GCN-DAG: buffer_load_dwordx4 v[0:3], off
306; GCN-DAG: buffer_load_dwordx4 v[4:7], off
307; GCN-DAG: buffer_load_dwordx4 v[8:11], off
308; GCN-DAG: buffer_load_dwordx4 v[12:15], off
309; GCN-DAG: buffer_load_dwordx4 v[16:19], off
310; GCN-DAG: buffer_load_dwordx4 v[20:23], off
311; GCN-DAG: buffer_load_dwordx4 v[24:27], off
312; GCN-DAG: buffer_load_dwordx4 v[28:31], off
313; GCN-NEXT: s_setpc_b64
314define <16 x i64> @v16i64_func_void() #0 {
315  %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(4)* undef
316  %val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr
317  ret <16 x i64> %val
318}
319
320; GCN-LABEL: {{^}}v2i16_func_void:
321; GFX9: buffer_load_dword v0, off
322; GFX9-NEXT: s_setpc_b64
323define <2 x i16> @v2i16_func_void() #0 {
324  %val = load <2 x i16>, <2 x i16> addrspace(1)* undef
325  ret <2 x i16> %val
326}
327
328; GCN-LABEL: {{^}}v3i16_func_void:
329; GFX9: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off
330; GFX9-NEXT: s_setpc_b64
331define <3 x i16> @v3i16_func_void() #0 {
332  %val = load <3 x i16>, <3 x i16> addrspace(1)* undef
333  ret <3 x i16> %val
334}
335
336; GCN-LABEL: {{^}}v4i16_func_void:
337; GFX9: buffer_load_dwordx2 v[0:1], off
338; GFX9-NEXT: s_setpc_b64
339define <4 x i16> @v4i16_func_void() #0 {
340  %val = load <4 x i16>, <4 x i16> addrspace(1)* undef
341  ret <4 x i16> %val
342}
343
344; GCN-LABEL: {{^}}v4f16_func_void:
345; GFX9: buffer_load_dwordx2 v[0:1], off
346; GFX9-NEXT: s_setpc_b64
347define <4 x half> @v4f16_func_void() #0 {
348  %val = load <4 x half>, <4 x half> addrspace(1)* undef
349  ret <4 x half> %val
350}
351
352; FIXME: Mixing buffer and global
353; FIXME: Should not scalarize
354; GCN-LABEL: {{^}}v5i16_func_void:
355; GFX9: buffer_load_dwordx2 v[0:1]
356; GFX9-NEXT: global_load_short_d16 v2
357; GFX9-NEXT: s_setpc_b64
358define <5 x i16> @v5i16_func_void() #0 {
359  %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef
360  %val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr
361  ret <5 x i16> %val
362}
363
364; GCN-LABEL: {{^}}v8i16_func_void:
365; GFX9-DAG: buffer_load_dwordx4 v[0:3], off
366; GFX9-NEXT: s_setpc_b64
367define <8 x i16> @v8i16_func_void() #0 {
368  %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(4)* undef
369  %val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
370  ret <8 x i16> %val
371}
372
373; GCN-LABEL: {{^}}v16i16_func_void:
374; GFX9: buffer_load_dwordx4 v[0:3], off
375; GFX9: buffer_load_dwordx4 v[4:7], off
376; GFX9-NEXT: s_setpc_b64
377define <16 x i16> @v16i16_func_void() #0 {
378  %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(4)* undef
379  %val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr
380  ret <16 x i16> %val
381}
382
383; FIXME: Should pack
384; GCN-LABEL: {{^}}v16i8_func_void:
385; GCN-DAG: v12
386; GCN-DAG: v13
387; GCN-DAG: v14
388; GCN-DAG: v15
389define <16 x i8> @v16i8_func_void() #0 {
390  %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef
391  %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
392  ret <16 x i8> %val
393}
394
395; FIXME: Should pack
396; GCN-LABEL: {{^}}v4i8_func_void:
397; GCN: buffer_load_dword v0
398; GCN-DAG: v_lshrrev_b32_e32 v1, 8, v0
399; GCN-DAG: v_lshrrev_b32_e32 v2, 16, v0
400; GCN-DAG: v_lshrrev_b32_e32 v3, 24, v0
401; GCN: s_setpc_b64
402define <4  x i8> @v4i8_func_void() #0 {
403  %ptr = load volatile <4  x i8> addrspace(1)*, <4  x i8> addrspace(1)* addrspace(4)* undef
404  %val = load <4  x i8>, <4  x i8> addrspace(1)* %ptr
405  ret <4  x i8> %val
406}
407
408; GCN-LABEL: {{^}}struct_i8_i32_func_void:
409; GCN-DAG: buffer_load_dword v1
410; GCN-DAG: buffer_load_ubyte v0
411; GCN-NEXT: s_setpc_b64
412define {i8, i32} @struct_i8_i32_func_void() #0 {
413  %val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef
414  ret { i8, i32 } %val
415}
416
417; GCN-LABEL: {{^}}void_func_sret_struct_i8_i32:
418; GCN: buffer_load_ubyte [[VAL0:v[0-9]+]]
419; GCN: buffer_load_dword [[VAL1:v[0-9]+]]
420; GCN: buffer_store_byte [[VAL0]], v0, s[0:3], 0 offen{{$}}
421; GCN: buffer_store_dword [[VAL1]], v0, s[0:3], 0 offen offset:4{{$}}
422define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret %arg0) #0 {
423  %val0 = load volatile i8, i8 addrspace(1)* undef
424  %val1 = load volatile i32, i32 addrspace(1)* undef
425  %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0
426  %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1
427  store i8 %val0, i8 addrspace(5)* %gep0
428  store i32 %val1, i32 addrspace(5)* %gep1
429  ret void
430}
431
432; FIXME: Should be able to fold offsets in all of these pre-gfx9. Call
433; lowering introduces an extra CopyToReg/CopyFromReg obscuring the
434; AssertZext inserted. Not using it introduces the spills.
435
436; GCN-LABEL: {{^}}v33i32_func_void:
437; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen{{$}}
438; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:4{{$}}
439; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:8{{$}}
440; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:12{{$}}
441; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:16{{$}}
442; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:20{{$}}
443; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:24{{$}}
444; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:28{{$}}
445; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:32{{$}}
446; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:36{{$}}
447; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:40{{$}}
448; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:44{{$}}
449; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:48{{$}}
450; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:52{{$}}
451; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:56{{$}}
452; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:60{{$}}
453; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:64{{$}}
454; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:68{{$}}
455; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:72{{$}}
456; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:76{{$}}
457; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:80{{$}}
458; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:84{{$}}
459; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:88{{$}}
460; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:92{{$}}
461; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:96{{$}}
462; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:100{{$}}
463; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:104{{$}}
464; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:108{{$}}
465; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:112{{$}}
466; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:116{{$}}
467; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:120{{$}}
468; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:124{{$}}
469; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:128{{$}}
470; GFX9-NEXT: s_setpc_b64
471define <33 x i32> @v33i32_func_void() #0 {
472  %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef
473  %val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr
474  ret <33 x i32> %val
475}
476
477; GCN-LABEL: {{^}}struct_v32i32_i32_func_void:
478; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen{{$}}
479; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:4{{$}}
480; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:8{{$}}
481; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:12{{$}}
482; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:16{{$}}
483; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:20{{$}}
484; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:24{{$}}
485; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:28{{$}}
486; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:32{{$}}
487; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:36{{$}}
488; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:40{{$}}
489; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:44{{$}}
490; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:48{{$}}
491; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:52{{$}}
492; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:56{{$}}
493; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:60{{$}}
494; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:64{{$}}
495; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:68{{$}}
496; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:72{{$}}
497; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:76{{$}}
498; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:80{{$}}
499; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:84{{$}}
500; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:88{{$}}
501; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:92{{$}}
502; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:96{{$}}
503; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:100{{$}}
504; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:104{{$}}
505; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:108{{$}}
506; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:112{{$}}
507; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:116{{$}}
508; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:120{{$}}
509; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:124{{$}}
510; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:128{{$}}
511; GFX9-NEXT: s_setpc_b64
512define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 {
513  %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef
514  %val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr
515  ret { <32 x i32>, i32 }%val
516}
517
518; GCN-LABEL: {{^}}struct_i32_v32i32_func_void:
519; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen{{$}}
520; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:128{{$}}
521; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:132{{$}}
522; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:136{{$}}
523; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:140{{$}}
524; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:144{{$}}
525; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:148{{$}}
526; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:152{{$}}
527; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:156{{$}}
528; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:160{{$}}
529; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:164{{$}}
530; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:168{{$}}
531; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:172{{$}}
532; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:176{{$}}
533; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:180{{$}}
534; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:184{{$}}
535; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:188{{$}}
536; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:192{{$}}
537; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:196{{$}}
538; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:200{{$}}
539; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:204{{$}}
540; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:208{{$}}
541; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:212{{$}}
542; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:216{{$}}
543; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:220{{$}}
544; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:224{{$}}
545; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:228{{$}}
546; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:232{{$}}
547; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:236{{$}}
548; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:240{{$}}
549; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:244{{$}}
550; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:248{{$}}
551; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], 0 offen offset:252{{$}}
552; GFX9-NEXT: s_setpc_b64
553define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 {
554  %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef
555  %val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr
556  ret { i32, <32 x i32> }%val
557}
558
559; Make sure the last struct component is returned in v3, not v4.
560; GCN-LABEL: {{^}}v3i32_struct_func_void_wasted_reg:
561; GCN: ds_read_b32 v0,
562; GCN: ds_read_b32 v1,
563; GCN: ds_read_b32 v2,
564; GCN: ds_read_b32 v3,
565define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 {
566  %load0 = load volatile i32, i32 addrspace(3)* undef
567  %load1 = load volatile i32, i32 addrspace(3)* undef
568  %load2 = load volatile i32, i32 addrspace(3)* undef
569  %load3 = load volatile i32, i32 addrspace(3)* undef
570
571  %insert.0 = insertelement <3 x i32> undef, i32 %load0, i32 0
572  %insert.1 = insertelement <3 x i32> %insert.0, i32 %load1, i32 1
573  %insert.2 = insertelement <3 x i32> %insert.1, i32 %load2, i32 2
574  %insert.3 = insertvalue { <3 x i32>, i32 } undef, <3 x i32> %insert.2, 0
575  %insert.4 = insertvalue { <3 x i32>, i32 } %insert.3, i32 %load3, 1
576  ret { <3 x i32>, i32 } %insert.4
577}
578
579; GCN-LABEL: {{^}}v3f32_struct_func_void_wasted_reg:
580; GCN: ds_read_b32 v0,
581; GCN: ds_read_b32 v1,
582; GCN: ds_read_b32 v2,
583; GCN: ds_read_b32 v3,
584define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 {
585  %load0 = load volatile float, float addrspace(3)* undef
586  %load1 = load volatile float, float addrspace(3)* undef
587  %load2 = load volatile float, float addrspace(3)* undef
588  %load3 = load volatile i32, i32 addrspace(3)* undef
589
590  %insert.0 = insertelement <3 x float> undef, float %load0, i32 0
591  %insert.1 = insertelement <3 x float> %insert.0, float %load1, i32 1
592  %insert.2 = insertelement <3 x float> %insert.1, float %load2, i32 2
593  %insert.3 = insertvalue { <3 x float>, i32 } undef, <3 x float> %insert.2, 0
594  %insert.4 = insertvalue { <3 x float>, i32 } %insert.3, i32 %load3, 1
595  ret { <3 x float>, i32 } %insert.4
596}
597
598; GCN-LABEL: {{^}}void_func_sret_max_known_zero_bits:
599; GCN: v_lshrrev_b32_e32 [[LSHR16:v[0-9]+]], 16, v0
600; GCN: ds_write_b32 {{v[0-9]+}}, [[LSHR16]]
601
602; GCN: v_mov_b32_e32 [[HIGH_BITS:v[0-9]+]], 0
603; GCN: ds_write_b32 {{v[0-9]+}}, [[HIGH_BITS]]
604; GCN-NEXT: ds_write_b32 {{v[0-9]+}}, [[HIGH_BITS]]
605define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret %arg0) #0 {
606  %arg0.int = ptrtoint i8 addrspace(5)* %arg0 to i32
607
608  %lshr0 = lshr i32 %arg0.int, 16
609  %lshr1 = lshr i32 %arg0.int, 17
610  %lshr2 = lshr i32 %arg0.int, 18
611
612  store volatile i32 %lshr0, i32 addrspace(3)* undef
613  store volatile i32 %lshr1, i32 addrspace(3)* undef
614  store volatile i32 %lshr2, i32 addrspace(3)* undef
615  ret void
616}
617
618attributes #0 = { nounwind }
619