1; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=hawaii -verify-machineinstrs < %s | FileCheck  -enable-var-scope -check-prefixes=GCN,CI %s
2; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=fiji -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89 %s
3; RUN: llc -march=amdgcn -mtriple=amdgcn-- -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s
4
5; GCN-LABEL: {{^}}i1_func_void:
6; GCN: buffer_load_ubyte v0, off
7; GCN-NEXT: s_waitcnt
8; GCN-NEXT: s_setpc_b64
9define i1 @i1_func_void() #0 {
10  %val = load i1, i1 addrspace(1)* undef
11  ret i1 %val
12}
13
14; FIXME: Missing and?
15; GCN-LABEL: {{^}}i1_zeroext_func_void:
16; GCN: buffer_load_ubyte v0, off
17; GCN-NEXT: s_waitcnt vmcnt(0)
18; GCN-NEXT: s_setpc_b64
19define zeroext i1 @i1_zeroext_func_void() #0 {
20  %val = load i1, i1 addrspace(1)* undef
21  ret i1 %val
22}
23
24; GCN-LABEL: {{^}}i1_signext_func_void:
25; GCN: buffer_load_ubyte v0, off
26; GCN-NEXT: s_waitcnt vmcnt(0)
27; GCN-NEXT: v_bfe_i32 v0, v0, 0, 1{{$}}
28; GCN-NEXT: s_setpc_b64
29define signext i1 @i1_signext_func_void() #0 {
30  %val = load i1, i1 addrspace(1)* undef
31  ret i1 %val
32}
33
34; GCN-LABEL: {{^}}i8_func_void:
35; GCN: buffer_load_ubyte v0, off
36; GCN-NEXT: s_waitcnt vmcnt(0)
37; GCN-NEXT: s_setpc_b64
38define i8 @i8_func_void() #0 {
39  %val = load i8, i8 addrspace(1)* undef
40  ret i8 %val
41}
42
43; GCN-LABEL: {{^}}i8_zeroext_func_void:
44; GCN: buffer_load_ubyte v0, off
45; GCN-NEXT: s_waitcnt vmcnt(0)
46; GCN-NEXT: s_setpc_b64
47define zeroext i8 @i8_zeroext_func_void() #0 {
48  %val = load i8, i8 addrspace(1)* undef
49  ret i8 %val
50}
51
52; GCN-LABEL: {{^}}i8_signext_func_void:
53; GCN: buffer_load_sbyte v0, off
54; GCN-NEXT: s_waitcnt vmcnt(0)
55; GCN-NEXT: s_setpc_b64
56define signext i8 @i8_signext_func_void() #0 {
57  %val = load i8, i8 addrspace(1)* undef
58  ret i8 %val
59}
60
61; GCN-LABEL: {{^}}i16_func_void:
62; GCN: buffer_load_ushort v0, off
63; GCN-NEXT: s_waitcnt vmcnt(0)
64; GCN-NEXT: s_setpc_b64
65define i16 @i16_func_void() #0 {
66  %val = load i16, i16 addrspace(1)* undef
67  ret i16 %val
68}
69
70; GCN-LABEL: {{^}}i16_zeroext_func_void:
71; GCN: buffer_load_ushort v0, off
72; GCN-NEXT: s_waitcnt vmcnt(0)
73; GCN-NEXT: s_setpc_b64
74define zeroext i16 @i16_zeroext_func_void() #0 {
75  %val = load i16, i16 addrspace(1)* undef
76  ret i16 %val
77}
78
79; GCN-LABEL: {{^}}i16_signext_func_void:
80; GCN: buffer_load_sshort v0, off
81; GCN-NEXT: s_waitcnt vmcnt(0)
82; GCN-NEXT: s_setpc_b64
83define signext i16 @i16_signext_func_void() #0 {
84  %val = load i16, i16 addrspace(1)* undef
85  ret i16 %val
86}
87
88; GCN-LABEL: {{^}}i32_func_void:
89; GCN: buffer_load_dword v0, off
90; GCN-NEXT: s_waitcnt vmcnt(0)
91; GCN-NEXT: s_setpc_b64
92define i32 @i32_func_void() #0 {
93  %val = load i32, i32 addrspace(1)* undef
94  ret i32 %val
95}
96
97; GCN-LABEL: {{^}}i48_func_void:
98; GCN: buffer_load_dword v0, off
99; GCN-NEXT: buffer_load_ushort v1, off
100; GCN-NEXT: s_waitcnt vmcnt(0)
101; GCN-NEXT: s_setpc_b64
102define i48 @i48_func_void() #0 {
103  %val = load i48, i48 addrspace(1)* undef, align 8
104  ret i48 %val
105}
106
107; GCN-LABEL: {{^}}i64_func_void:
108; GCN: buffer_load_dwordx2 v[0:1], off
109; GCN-NEXT: s_waitcnt vmcnt(0)
110; GCN-NEXT: s_setpc_b64
111define i64 @i64_func_void() #0 {
112  %val = load i64, i64 addrspace(1)* undef
113  ret i64 %val
114}
115
116; GCN-LABEL: {{^}}i65_func_void:
117; GCN-DAG: buffer_load_dwordx2 v[0:1], off
118; GCN-DAG: buffer_load_ubyte v2, off
119; GCN: s_waitcnt vmcnt(0)
120; GCN-NEXT: s_setpc_b64
121define i65 @i65_func_void() #0 {
122  %val = load i65, i65 addrspace(1)* undef
123  ret i65 %val
124}
125
126; GCN-LABEL: {{^}}f32_func_void:
127; GCN: buffer_load_dword v0, off, s[4:7], 0
128; GCN-NEXT: s_waitcnt vmcnt(0)
129; GCN-NEXT: s_setpc_b64
130define float @f32_func_void() #0 {
131  %val = load float, float addrspace(1)* undef
132  ret float %val
133}
134
135; GCN-LABEL: {{^}}f64_func_void:
136; GCN: buffer_load_dwordx2 v[0:1], off
137; GCN-NEXT: s_waitcnt vmcnt(0)
138; GCN-NEXT: s_setpc_b64
139define double @f64_func_void() #0 {
140  %val = load double, double addrspace(1)* undef
141  ret double %val
142}
143
144; GCN-LABEL: {{^}}v2f64_func_void:
145; GCN: buffer_load_dwordx4 v[0:3], off
146; GCN-NEXT: s_waitcnt vmcnt(0)
147; GCN-NEXT: s_setpc_b64
148define <2 x double> @v2f64_func_void() #0 {
149  %val = load <2 x double>, <2 x double> addrspace(1)* undef
150  ret <2 x double> %val
151}
152
153; GCN-LABEL: {{^}}v2i32_func_void:
154; GCN: buffer_load_dwordx2 v[0:1], off
155; GCN-NEXT: s_waitcnt vmcnt(0)
156; GCN-NEXT: s_setpc_b64
157define <2 x i32> @v2i32_func_void() #0 {
158  %val = load <2 x i32>, <2 x i32> addrspace(1)* undef
159  ret <2 x i32> %val
160}
161
162; GCN-LABEL: {{^}}v3i32_func_void:
163; GCN: buffer_load_dwordx3 v[0:2], off
164; GCN-NEXT: s_waitcnt vmcnt(0)
165; GCN-NEXT: s_setpc_b64
166define <3 x i32> @v3i32_func_void() #0 {
167  %val = load <3 x i32>, <3 x i32> addrspace(1)* undef
168  ret <3 x i32> %val
169}
170
171; GCN-LABEL: {{^}}v4i32_func_void:
172; GCN: buffer_load_dwordx4 v[0:3], off
173; GCN-NEXT: s_waitcnt vmcnt(0)
174; GCN-NEXT: s_setpc_b64
175define <4 x i32> @v4i32_func_void() #0 {
176  %val = load <4 x i32>, <4 x i32> addrspace(1)* undef
177  ret <4 x i32> %val
178}
179
180; GCN-LABEL: {{^}}v5i32_func_void:
181; GCN-DAG: buffer_load_dword v4, off
182; GCN-DAG: buffer_load_dwordx4 v[0:3], off
183; GCN: s_waitcnt vmcnt(0)
184; GCN-NEXT: s_setpc_b64
185define <5 x i32> @v5i32_func_void() #0 {
186  %val = load volatile <5 x i32>, <5 x i32> addrspace(1)* undef
187  ret <5 x i32> %val
188}
189
190; GCN-LABEL: {{^}}v8i32_func_void:
191; GCN-DAG: buffer_load_dwordx4 v[0:3], off
192; GCN-DAG: buffer_load_dwordx4 v[4:7], off
193; GCN: s_waitcnt vmcnt(0)
194; GCN-NEXT: s_setpc_b64
195define <8 x i32> @v8i32_func_void() #0 {
196  %ptr = load volatile <8 x i32> addrspace(1)*, <8 x i32> addrspace(1)* addrspace(4)* undef
197  %val = load <8 x i32>, <8 x i32> addrspace(1)* %ptr
198  ret <8 x i32> %val
199}
200
201; GCN-LABEL: {{^}}v16i32_func_void:
202; GCN-DAG: buffer_load_dwordx4 v[0:3], off
203; GCN-DAG: buffer_load_dwordx4 v[4:7], off
204; GCN-DAG: buffer_load_dwordx4 v[8:11], off
205; GCN-DAG: buffer_load_dwordx4 v[12:15], off
206; GCN: s_waitcnt vmcnt(0)
207; GCN-NEXT: s_setpc_b64
208define <16 x i32> @v16i32_func_void() #0 {
209  %ptr = load volatile <16 x i32> addrspace(1)*, <16 x i32> addrspace(1)* addrspace(4)* undef
210  %val = load <16 x i32>, <16 x i32> addrspace(1)* %ptr
211  ret <16 x i32> %val
212}
213
214; GCN-LABEL: {{^}}v32i32_func_void:
215; GCN-DAG: buffer_load_dwordx4 v[0:3], off
216; GCN-DAG: buffer_load_dwordx4 v[4:7], off
217; GCN-DAG: buffer_load_dwordx4 v[8:11], off
218; GCN-DAG: buffer_load_dwordx4 v[12:15], off
219; GCN-DAG: buffer_load_dwordx4 v[16:19], off
220; GCN-DAG: buffer_load_dwordx4 v[20:23], off
221; GCN-DAG: buffer_load_dwordx4 v[24:27], off
222; GCN-DAG: buffer_load_dwordx4 v[28:31], off
223; GCN: s_waitcnt vmcnt(0)
224; GCN-NEXT: s_setpc_b64
225define <32 x i32> @v32i32_func_void() #0 {
226  %ptr = load volatile <32 x i32> addrspace(1)*, <32 x i32> addrspace(1)* addrspace(4)* undef
227  %val = load <32 x i32>, <32 x i32> addrspace(1)* %ptr
228  ret <32 x i32> %val
229}
230
231; GCN-LABEL: {{^}}v2i64_func_void:
232; GCN: buffer_load_dwordx4 v[0:3], off
233; GCN-NEXT: s_waitcnt vmcnt(0)
234; GCN-NEXT: s_setpc_b64
235define <2 x i64> @v2i64_func_void() #0 {
236  %val = load <2 x i64>, <2 x i64> addrspace(1)* undef
237  ret <2 x i64> %val
238}
239
240; GCN-LABEL: {{^}}v3i64_func_void:
241; GCN-DAG: buffer_load_dwordx4 v[0:3], off
242; GCN-DAG: buffer_load_dwordx4 v[4:7], off
243; GCN: s_waitcnt vmcnt(0)
244; GCN-NEXT: s_setpc_b64
245define <3 x i64> @v3i64_func_void() #0 {
246  %ptr = load volatile <3 x i64> addrspace(1)*, <3 x i64> addrspace(1)* addrspace(4)* undef
247  %val = load <3 x i64>, <3 x i64> addrspace(1)* %ptr
248  ret <3 x i64> %val
249}
250
251; GCN-LABEL: {{^}}v4i64_func_void:
252; GCN: buffer_load_dwordx4 v[0:3], off
253; GCN: buffer_load_dwordx4 v[4:7], off
254; GCN-NEXT: s_waitcnt vmcnt(0)
255; GCN-NEXT: s_setpc_b64
256define <4 x i64> @v4i64_func_void() #0 {
257  %ptr = load volatile <4 x i64> addrspace(1)*, <4 x i64> addrspace(1)* addrspace(4)* undef
258  %val = load <4 x i64>, <4 x i64> addrspace(1)* %ptr
259  ret <4 x i64> %val
260}
261
262; GCN-LABEL: {{^}}v5i64_func_void:
263; GCN-DAG: buffer_load_dwordx4 v[0:3], off
264; GCN-DAG: buffer_load_dwordx4 v[4:7], off
265; GCN-DAG: buffer_load_dwordx4 v[8:11], off
266; GCN: s_waitcnt vmcnt(0)
267; GCN-NEXT: s_setpc_b64
268define <5 x i64> @v5i64_func_void() #0 {
269  %ptr = load volatile <5 x i64> addrspace(1)*, <5 x i64> addrspace(1)* addrspace(4)* undef
270  %val = load <5 x i64>, <5 x i64> addrspace(1)* %ptr
271  ret <5 x i64> %val
272}
273
274; GCN-LABEL: {{^}}v8i64_func_void:
275; GCN-DAG: buffer_load_dwordx4 v[0:3], off
276; GCN-DAG: buffer_load_dwordx4 v[4:7], off
277; GCN-DAG: buffer_load_dwordx4 v[8:11], off
278; GCN-DAG: buffer_load_dwordx4 v[12:15], off
279; GCN: s_waitcnt vmcnt(0)
280; GCN-NEXT: s_setpc_b64
281define <8 x i64> @v8i64_func_void() #0 {
282  %ptr = load volatile <8 x i64> addrspace(1)*, <8 x i64> addrspace(1)* addrspace(4)* undef
283  %val = load <8 x i64>, <8 x i64> addrspace(1)* %ptr
284  ret <8 x i64> %val
285}
286
287; GCN-LABEL: {{^}}v16i64_func_void:
288; GCN-DAG: buffer_load_dwordx4 v[0:3], off
289; GCN-DAG: buffer_load_dwordx4 v[4:7], off
290; GCN-DAG: buffer_load_dwordx4 v[8:11], off
291; GCN-DAG: buffer_load_dwordx4 v[12:15], off
292; GCN-DAG: buffer_load_dwordx4 v[16:19], off
293; GCN-DAG: buffer_load_dwordx4 v[20:23], off
294; GCN-DAG: buffer_load_dwordx4 v[24:27], off
295; GCN-DAG: buffer_load_dwordx4 v[28:31], off
296; GCN: s_waitcnt vmcnt(0)
297; GCN-NEXT: s_setpc_b64
298define <16 x i64> @v16i64_func_void() #0 {
299  %ptr = load volatile <16 x i64> addrspace(1)*, <16 x i64> addrspace(1)* addrspace(4)* undef
300  %val = load <16 x i64>, <16 x i64> addrspace(1)* %ptr
301  ret <16 x i64> %val
302}
303
304; GCN-LABEL: {{^}}v2i16_func_void:
305; GFX9: buffer_load_dword v0, off
306; GFX9-NEXT: s_waitcnt vmcnt(0)
307; GFX9-NEXT: s_setpc_b64
308define <2 x i16> @v2i16_func_void() #0 {
309  %val = load <2 x i16>, <2 x i16> addrspace(1)* undef
310  ret <2 x i16> %val
311}
312
313; GCN-LABEL: {{^}}v3i16_func_void:
314; GFX9: buffer_load_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, off
315; GFX9-NEXT: s_waitcnt vmcnt(0)
316; GFX9-NEXT: s_setpc_b64
317define <3 x i16> @v3i16_func_void() #0 {
318  %val = load <3 x i16>, <3 x i16> addrspace(1)* undef
319  ret <3 x i16> %val
320}
321
322; GCN-LABEL: {{^}}v4i16_func_void:
323; GFX9: buffer_load_dwordx2 v[0:1], off
324; GFX9-NEXT: s_waitcnt vmcnt(0)
325; GFX9-NEXT: s_setpc_b64
326define <4 x i16> @v4i16_func_void() #0 {
327  %val = load <4 x i16>, <4 x i16> addrspace(1)* undef
328  ret <4 x i16> %val
329}
330
331; GCN-LABEL: {{^}}v4f16_func_void:
332; GFX9: buffer_load_dwordx2 v[0:1], off
333; GFX9-NEXT: s_waitcnt vmcnt(0)
334; GFX9-NEXT: s_setpc_b64
335define <4 x half> @v4f16_func_void() #0 {
336  %val = load <4 x half>, <4 x half> addrspace(1)* undef
337  ret <4 x half> %val
338}
339
340; FIXME: Mixing buffer and global
341; FIXME: Should not scalarize
342; GCN-LABEL: {{^}}v5i16_func_void:
343; GFX9: buffer_load_dwordx2 v[0:1]
344; GFX9-NEXT: global_load_short_d16 v2
345; GFX9-NEXT: s_waitcnt
346; GFX9-NEXT: s_setpc_b64
347define <5 x i16> @v5i16_func_void() #0 {
348  %ptr = load volatile <5 x i16> addrspace(1)*, <5 x i16> addrspace(1)* addrspace(4)* undef
349  %val = load <5 x i16>, <5 x i16> addrspace(1)* %ptr
350  ret <5 x i16> %val
351}
352
353; GCN-LABEL: {{^}}v8i16_func_void:
354; GFX9-DAG: buffer_load_dwordx4 v[0:3], off
355; GFX9: s_waitcnt vmcnt(0)
356; GFX9-NEXT: s_setpc_b64
357define <8 x i16> @v8i16_func_void() #0 {
358  %ptr = load volatile <8 x i16> addrspace(1)*, <8 x i16> addrspace(1)* addrspace(4)* undef
359  %val = load <8 x i16>, <8 x i16> addrspace(1)* %ptr
360  ret <8 x i16> %val
361}
362
363; GCN-LABEL: {{^}}v16i16_func_void:
364; GFX9: buffer_load_dwordx4 v[0:3], off
365; GFX9: buffer_load_dwordx4 v[4:7], off
366; GFX9: s_waitcnt vmcnt(0)
367; GFX9-NEXT: s_setpc_b64
368define <16 x i16> @v16i16_func_void() #0 {
369  %ptr = load volatile <16 x i16> addrspace(1)*, <16 x i16> addrspace(1)* addrspace(4)* undef
370  %val = load <16 x i16>, <16 x i16> addrspace(1)* %ptr
371  ret <16 x i16> %val
372}
373
374; FIXME: Should pack
375; GCN-LABEL: {{^}}v16i8_func_void:
376; GCN-DAG: v12
377; GCN-DAG: v13
378; GCN-DAG: v14
379; GCN-DAG: v15
380define <16 x i8> @v16i8_func_void() #0 {
381  %ptr = load volatile <16 x i8> addrspace(1)*, <16 x i8> addrspace(1)* addrspace(4)* undef
382  %val = load <16 x i8>, <16 x i8> addrspace(1)* %ptr
383  ret <16 x i8> %val
384}
385
386; FIXME: Should pack
387; GCN-LABEL: {{^}}v4i8_func_void:
388; GCN: buffer_load_dword v0
389; GCN-DAG: v_lshrrev_b32_e32 v1, 8, v0
390; GCN-DAG: v_lshrrev_b32_e32 v2, 16, v0
391; GCN-DAG: v_lshrrev_b32_e32 v3, 24, v0
392; GCN: s_setpc_b64
393define <4  x i8> @v4i8_func_void() #0 {
394  %ptr = load volatile <4  x i8> addrspace(1)*, <4  x i8> addrspace(1)* addrspace(4)* undef
395  %val = load <4  x i8>, <4  x i8> addrspace(1)* %ptr
396  ret <4  x i8> %val
397}
398
399; GCN-LABEL: {{^}}struct_i8_i32_func_void:
400; GCN-DAG: buffer_load_dword v1
401; GCN-DAG: buffer_load_ubyte v0
402; GCN: s_waitcnt vmcnt(0)
403; GCN-NEXT: s_setpc_b64
404define {i8, i32} @struct_i8_i32_func_void() #0 {
405  %val = load { i8, i32 }, { i8, i32 } addrspace(1)* undef
406  ret { i8, i32 } %val
407}
408
409; GCN-LABEL: {{^}}void_func_sret_struct_i8_i32:
410; GCN: buffer_load_ubyte [[VAL0:v[0-9]+]]
411; GCN: buffer_load_dword [[VAL1:v[0-9]+]]
412; GCN: buffer_store_byte [[VAL0]], v0, s[0:3], s33 offen{{$}}
413; GCN: buffer_store_dword [[VAL1]], v0, s[0:3], s33 offen offset:4{{$}}
414define void @void_func_sret_struct_i8_i32({ i8, i32 } addrspace(5)* sret %arg0) #0 {
415  %val0 = load volatile i8, i8 addrspace(1)* undef
416  %val1 = load volatile i32, i32 addrspace(1)* undef
417  %gep0 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 0
418  %gep1 = getelementptr inbounds { i8, i32 }, { i8, i32 } addrspace(5)* %arg0, i32 0, i32 1
419  store i8 %val0, i8 addrspace(5)* %gep0
420  store i32 %val1, i32 addrspace(5)* %gep1
421  ret void
422}
423
424; FIXME: Should be able to fold offsets in all of these pre-gfx9. Call
425; lowering introduces an extra CopyToReg/CopyFromReg obscuring the
426; AssertZext inserted. Not using it introduces the spills.
427
428; GCN-LABEL: {{^}}v33i32_func_void:
429; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen{{$}}
430; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:4{{$}}
431; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:8{{$}}
432; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:12{{$}}
433; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:16{{$}}
434; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:20{{$}}
435; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:24{{$}}
436; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:28{{$}}
437; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:32{{$}}
438; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:36{{$}}
439; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:40{{$}}
440; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:44{{$}}
441; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:48{{$}}
442; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:52{{$}}
443; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:56{{$}}
444; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:60{{$}}
445; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:64{{$}}
446; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:68{{$}}
447; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:72{{$}}
448; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:76{{$}}
449; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:80{{$}}
450; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:84{{$}}
451; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:88{{$}}
452; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:92{{$}}
453; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:96{{$}}
454; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:100{{$}}
455; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:104{{$}}
456; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:108{{$}}
457; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:112{{$}}
458; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:116{{$}}
459; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:120{{$}}
460; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:124{{$}}
461; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:128{{$}}
462; GFX9: s_waitcnt vmcnt(0)
463; GFX9-NEXT: s_setpc_b64
464define <33 x i32> @v33i32_func_void() #0 {
465  %ptr = load volatile <33 x i32> addrspace(1)*, <33 x i32> addrspace(1)* addrspace(4)* undef
466  %val = load <33 x i32>, <33 x i32> addrspace(1)* %ptr
467  ret <33 x i32> %val
468}
469
470; GCN-LABEL: {{^}}struct_v32i32_i32_func_void:
471; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen{{$}}
472; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:4{{$}}
473; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:8{{$}}
474; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:12{{$}}
475; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:16{{$}}
476; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:20{{$}}
477; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:24{{$}}
478; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:28{{$}}
479; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:32{{$}}
480; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:36{{$}}
481; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:40{{$}}
482; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:44{{$}}
483; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:48{{$}}
484; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:52{{$}}
485; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:56{{$}}
486; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:60{{$}}
487; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:64{{$}}
488; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:68{{$}}
489; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:72{{$}}
490; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:76{{$}}
491; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:80{{$}}
492; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:84{{$}}
493; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:88{{$}}
494; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:92{{$}}
495; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:96{{$}}
496; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:100{{$}}
497; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:104{{$}}
498; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:108{{$}}
499; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:112{{$}}
500; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:116{{$}}
501; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:120{{$}}
502; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:124{{$}}
503; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:128{{$}}
504; GFX9: s_waitcnt vmcnt(0)
505; GFX9-NEXT: s_setpc_b64
506define { <32 x i32>, i32 } @struct_v32i32_i32_func_void() #0 {
507  %ptr = load volatile { <32 x i32>, i32 } addrspace(1)*, { <32 x i32>, i32 } addrspace(1)* addrspace(4)* undef
508  %val = load { <32 x i32>, i32 }, { <32 x i32>, i32 } addrspace(1)* %ptr
509  ret { <32 x i32>, i32 }%val
510}
511
512; GCN-LABEL: {{^}}struct_i32_v32i32_func_void:
513; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen{{$}}
514; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:128{{$}}
515; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:132{{$}}
516; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:136{{$}}
517; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:140{{$}}
518; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:144{{$}}
519; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:148{{$}}
520; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:152{{$}}
521; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:156{{$}}
522; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:160{{$}}
523; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:164{{$}}
524; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:168{{$}}
525; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:172{{$}}
526; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:176{{$}}
527; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:180{{$}}
528; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:184{{$}}
529; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:188{{$}}
530; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:192{{$}}
531; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:196{{$}}
532; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:200{{$}}
533; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:204{{$}}
534; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:208{{$}}
535; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:212{{$}}
536; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:216{{$}}
537; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:220{{$}}
538; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:224{{$}}
539; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:228{{$}}
540; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:232{{$}}
541; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:236{{$}}
542; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:240{{$}}
543; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:244{{$}}
544; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:248{{$}}
545; GFX9-DAG: buffer_store_dword v{{[0-9]+}}, v0, s[0:3], s33 offen offset:252{{$}}
546; GFX9: s_waitcnt vmcnt(0)
547; GFX9-NEXT: s_setpc_b64
548define { i32, <32 x i32> } @struct_i32_v32i32_func_void() #0 {
549  %ptr = load volatile { i32, <32 x i32> } addrspace(1)*, { i32, <32 x i32> } addrspace(1)* addrspace(4)* undef
550  %val = load { i32, <32 x i32> }, { i32, <32 x i32> } addrspace(1)* %ptr
551  ret { i32, <32 x i32> }%val
552}
553
554; Make sure the last struct component is returned in v3, not v4.
555; GCN-LABEL: {{^}}v3i32_struct_func_void_wasted_reg:
556; GCN: ds_read_b32 v0,
557; GCN: ds_read_b32 v1,
558; GCN: ds_read_b32 v2,
559; GCN: ds_read_b32 v3,
560define { <3 x i32>, i32 } @v3i32_struct_func_void_wasted_reg() #0 {
561  %load0 = load volatile i32, i32 addrspace(3)* undef
562  %load1 = load volatile i32, i32 addrspace(3)* undef
563  %load2 = load volatile i32, i32 addrspace(3)* undef
564  %load3 = load volatile i32, i32 addrspace(3)* undef
565
566  %insert.0 = insertelement <3 x i32> undef, i32 %load0, i32 0
567  %insert.1 = insertelement <3 x i32> %insert.0, i32 %load1, i32 1
568  %insert.2 = insertelement <3 x i32> %insert.1, i32 %load2, i32 2
569  %insert.3 = insertvalue { <3 x i32>, i32 } undef, <3 x i32> %insert.2, 0
570  %insert.4 = insertvalue { <3 x i32>, i32 } %insert.3, i32 %load3, 1
571  ret { <3 x i32>, i32 } %insert.4
572}
573
574; GCN-LABEL: {{^}}v3f32_struct_func_void_wasted_reg:
575; GCN: ds_read_b32 v0,
576; GCN: ds_read_b32 v1,
577; GCN: ds_read_b32 v2,
578; GCN: ds_read_b32 v3,
579define { <3 x float>, i32 } @v3f32_struct_func_void_wasted_reg() #0 {
580  %load0 = load volatile float, float addrspace(3)* undef
581  %load1 = load volatile float, float addrspace(3)* undef
582  %load2 = load volatile float, float addrspace(3)* undef
583  %load3 = load volatile i32, i32 addrspace(3)* undef
584
585  %insert.0 = insertelement <3 x float> undef, float %load0, i32 0
586  %insert.1 = insertelement <3 x float> %insert.0, float %load1, i32 1
587  %insert.2 = insertelement <3 x float> %insert.1, float %load2, i32 2
588  %insert.3 = insertvalue { <3 x float>, i32 } undef, <3 x float> %insert.2, 0
589  %insert.4 = insertvalue { <3 x float>, i32 } %insert.3, i32 %load3, 1
590  ret { <3 x float>, i32 } %insert.4
591}
592
593; GCN-LABEL: {{^}}void_func_sret_max_known_zero_bits:
594; GCN: v_lshrrev_b32_e32 [[LSHR16:v[0-9]+]], 16, v0
595; GCN: ds_write_b32 {{v[0-9]+}}, [[LSHR16]]
596
597; GCN: v_mov_b32_e32 [[HIGH_BITS:v[0-9]+]], 0
598; GCN: ds_write_b32 {{v[0-9]+}}, [[HIGH_BITS]]
599; GCN-NEXT: ds_write_b32 {{v[0-9]+}}, [[HIGH_BITS]]
600define void @void_func_sret_max_known_zero_bits(i8 addrspace(5)* sret %arg0) #0 {
601  %arg0.int = ptrtoint i8 addrspace(5)* %arg0 to i32
602
603  %lshr0 = lshr i32 %arg0.int, 16
604  %lshr1 = lshr i32 %arg0.int, 17
605  %lshr2 = lshr i32 %arg0.int, 18
606
607  store volatile i32 %lshr0, i32 addrspace(3)* undef
608  store volatile i32 %lshr1, i32 addrspace(3)* undef
609  store volatile i32 %lshr2, i32 addrspace(3)* undef
610  ret void
611}
612
613attributes #0 = { nounwind }
614