1; RUN: llc -march=amdgcn -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI,SIVI %s
2; RUN: llc -march=amdgcn -mcpu=tonga -amdgpu-atomic-optimizations=false -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI,SIVI %s
3; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-atomic-optimizations=false -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX9 %s
4
5; GCN-LABEL: {{^}}atomic_add_i32_offset:
6; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
7; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
8define amdgpu_kernel void @atomic_add_i32_offset(i32 addrspace(1)* %out, i32 %in) {
9entry:
10  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
11  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
12  ret void
13}
14
15; GCN-LABEL: {{^}}atomic_add_i32_max_neg_offset:
16; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:-4096{{$}}
17define amdgpu_kernel void @atomic_add_i32_max_neg_offset(i32 addrspace(1)* %out, i32 %in) {
18entry:
19  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 -1024
20  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
21  ret void
22}
23
24; GCN-LABEL: {{^}}atomic_add_i32_soffset:
25; SIVI: s_mov_b32 [[SREG:s[0-9]+]], 0x8ca0
26; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], [[SREG]]{{$}}
27
28; GFX9: v_mov_b32_e32 [[OFFSET:v[0-9]+]], 0x8000{{$}}
29; GFX9: global_atomic_add [[OFFSET]], v{{[0-9]+}}, s{{\[[0-9]:[0-9]+\]}} offset:3232{{$}}
30define amdgpu_kernel void @atomic_add_i32_soffset(i32 addrspace(1)* %out, i32 %in) {
31entry:
32  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 9000
33  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
34  ret void
35}
36
37; GCN-LABEL: {{^}}atomic_add_i32_huge_offset:
38; SI-DAG: v_mov_b32_e32 v[[PTRLO:[0-9]+]], 0xdeac
39; SI-DAG: v_mov_b32_e32 v[[PTRHI:[0-9]+]], 0xabcd
40; SI: buffer_atomic_add v{{[0-9]+}}, v[[[PTRLO]]:[[PTRHI]]], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
41
42; VI: flat_atomic_add
43
44; GFX9: s_add_u32 s[[LOW_K:[0-9]+]], s{{[0-9]+}}, 0xdeac
45; GFX9: s_addc_u32 s[[HIGH_K:[0-9]+]], s{{[0-9]+}}, 0xabcd
46; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s[[[LOW_K]]:[[HIGH_K]]]{{$}}
47define amdgpu_kernel void @atomic_add_i32_huge_offset(i32 addrspace(1)* %out, i32 %in) {
48entry:
49  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 47224239175595
50
51  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
52  ret void
53}
54
55; GCN-LABEL: {{^}}atomic_add_i32_ret_offset:
56; SIVI: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
57; SIVI: buffer_store_dword [[RET]]
58
59; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}}
60define amdgpu_kernel void @atomic_add_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
61entry:
62  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
63  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
64  store i32 %val, i32 addrspace(1)* %out2
65  ret void
66}
67
68; GCN-LABEL: {{^}}atomic_add_i32_addr64_offset:
69; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
70; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
71; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
72define amdgpu_kernel void @atomic_add_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
73entry:
74  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
75  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
76  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
77  ret void
78}
79
80; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64_offset:
81; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
82; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
83; SIVI: buffer_store_dword [[RET]]
84
85; GFX9: global_atomic_add [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
86; GFX9: global_store_dword v{{[0-9]+}}, [[RET]], s
87define amdgpu_kernel void @atomic_add_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
88entry:
89  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
90  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
91  %val = atomicrmw volatile add i32 addrspace(1)* %gep, i32 %in seq_cst
92  store i32 %val, i32 addrspace(1)* %out2
93  ret void
94}
95
96; GCN-LABEL: {{^}}atomic_add_i32:
97; SIVI: buffer_atomic_add v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
98; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}}
99define amdgpu_kernel void @atomic_add_i32(i32 addrspace(1)* %out, i32 %in) {
100entry:
101  %val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
102  ret void
103}
104
105; GCN-LABEL: {{^}}atomic_add_i32_ret:
106; SIVI: buffer_atomic_add [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
107; SIVI: buffer_store_dword [[RET]]
108
109; GFX9: global_atomic_add [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
110; GFX9: global_store_dword v{{[0-9]+}}, [[RET]], s
111define amdgpu_kernel void @atomic_add_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
112entry:
113  %val = atomicrmw volatile add i32 addrspace(1)* %out, i32 %in seq_cst
114  store i32 %val, i32 addrspace(1)* %out2
115  ret void
116}
117
118; GCN-LABEL: {{^}}atomic_add_i32_addr64:
119; SI: buffer_atomic_add v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
120; VI: flat_atomic_add v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
121; GFX9: global_atomic_add v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
122define amdgpu_kernel void @atomic_add_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
123entry:
124  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
125  %val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
126  ret void
127}
128
129; GCN-LABEL: {{^}}atomic_add_i32_ret_addr64:
130; SI: buffer_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
131; VI: flat_atomic_add [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
132; SIVI: buffer_store_dword [[RET]]
133
134; GFX9: global_atomic_add [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
135define amdgpu_kernel void @atomic_add_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
136entry:
137  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
138  %val = atomicrmw volatile add i32 addrspace(1)* %ptr, i32 %in seq_cst
139  store i32 %val, i32 addrspace(1)* %out2
140  ret void
141}
142
143; GCN-LABEL: {{^}}atomic_and_i32_offset:
144; SIVI: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
145
146; GFX9: global_atomic_and v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
147define amdgpu_kernel void @atomic_and_i32_offset(i32 addrspace(1)* %out, i32 %in) {
148entry:
149  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
150  %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
151  ret void
152}
153
154; GCN-LABEL: {{^}}atomic_and_i32_ret_offset:
155; SIVI: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
156; SIVI: buffer_store_dword [[RET]]
157
158; GFX9: global_atomic_and [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
159define amdgpu_kernel void @atomic_and_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
160entry:
161  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
162  %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
163  store i32 %val, i32 addrspace(1)* %out2
164  ret void
165}
166
167; GCN-LABEL: {{^}}atomic_and_i32_addr64_offset:
168; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
169; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
170
171; GFX9: global_atomic_and v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
172define amdgpu_kernel void @atomic_and_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
173entry:
174  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
175  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
176  %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
177  ret void
178}
179
180; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64_offset:
181; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
182; VI: flat_atomic_and [[RET:v[0-9]]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
183; SIVI: buffer_store_dword [[RET]]
184
185; GFX9: global_atomic_and [[RET:v[0-9]]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
186define amdgpu_kernel void @atomic_and_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
187entry:
188  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
189  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
190  %val = atomicrmw volatile and i32 addrspace(1)* %gep, i32 %in seq_cst
191  store i32 %val, i32 addrspace(1)* %out2
192  ret void
193}
194
195; GCN-LABEL: {{^}}atomic_and_i32:
196; SIVI: buffer_atomic_and v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
197
198; GFX9: global_atomic_and v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}}
199define amdgpu_kernel void @atomic_and_i32(i32 addrspace(1)* %out, i32 %in) {
200entry:
201  %val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
202  ret void
203}
204
205; GCN-LABEL: {{^}}atomic_and_i32_ret:
206; SIVI: buffer_atomic_and [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
207; SIVI: buffer_store_dword [[RET]]
208
209; GFX9: global_atomic_and v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
210define amdgpu_kernel void @atomic_and_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
211entry:
212  %val = atomicrmw volatile and i32 addrspace(1)* %out, i32 %in seq_cst
213  store i32 %val, i32 addrspace(1)* %out2
214  ret void
215}
216
217; GCN-LABEL: {{^}}atomic_and_i32_addr64:
218; SI: buffer_atomic_and v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
219; VI: flat_atomic_and v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
220
221; GFX9: global_atomic_and v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
222define amdgpu_kernel void @atomic_and_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
223entry:
224  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
225  %val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst
226  ret void
227}
228
229; GCN-LABEL: {{^}}atomic_and_i32_ret_addr64:
230; SI: buffer_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
231; VI: flat_atomic_and [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
232; SIVI: buffer_store_dword [[RET]]
233
234; GFX9: global_atomic_and [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
235define amdgpu_kernel void @atomic_and_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
236entry:
237  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
238  %val = atomicrmw volatile and i32 addrspace(1)* %ptr, i32 %in seq_cst
239  store i32 %val, i32 addrspace(1)* %out2
240  ret void
241}
242
243; GCN-LABEL: {{^}}atomic_sub_i32_offset:
244; SIVI: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
245
246; GFX9: global_atomic_sub v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}}
247define amdgpu_kernel void @atomic_sub_i32_offset(i32 addrspace(1)* %out, i32 %in) {
248entry:
249  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
250  %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
251  ret void
252}
253
254; GCN-LABEL: {{^}}atomic_sub_i32_ret_offset:
255; SIVI: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
256; SIVI: buffer_store_dword [[RET]]
257
258; GFX9: global_atomic_sub v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16 glc{{$}}
259define amdgpu_kernel void @atomic_sub_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
260entry:
261  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
262  %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
263  store i32 %val, i32 addrspace(1)* %out2
264  ret void
265}
266
267; GCN-LABEL: {{^}}atomic_sub_i32_addr64_offset:
268; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
269; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
270
271; GFX9: global_atomic_sub v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
272define amdgpu_kernel void @atomic_sub_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
273entry:
274  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
275  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
276  %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
277  ret void
278}
279
280; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64_offset:
281; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
282; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
283; SIVI: buffer_store_dword [[RET]]
284
285; GFX9: global_atomic_sub [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
286define amdgpu_kernel void @atomic_sub_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
287entry:
288  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
289  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
290  %val = atomicrmw volatile sub i32 addrspace(1)* %gep, i32 %in seq_cst
291  store i32 %val, i32 addrspace(1)* %out2
292  ret void
293}
294
295; GCN-LABEL: {{^}}atomic_sub_i32:
296; SIVI: buffer_atomic_sub v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
297
298; GFX9: global_atomic_sub v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}}
299define amdgpu_kernel void @atomic_sub_i32(i32 addrspace(1)* %out, i32 %in) {
300entry:
301  %val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
302  ret void
303}
304
305; GCN-LABEL: {{^}}atomic_sub_i32_ret:
306; SIVI: buffer_atomic_sub [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
307; SIVI: buffer_store_dword [[RET]]
308
309; GFX9: global_atomic_sub [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
310define amdgpu_kernel void @atomic_sub_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
311entry:
312  %val = atomicrmw volatile sub i32 addrspace(1)* %out, i32 %in seq_cst
313  store i32 %val, i32 addrspace(1)* %out2
314  ret void
315}
316
317; GCN-LABEL: {{^}}atomic_sub_i32_addr64:
318; SI: buffer_atomic_sub v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
319; VI: flat_atomic_sub v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
320
321; GFX9: global_atomic_sub v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}}
322define amdgpu_kernel void @atomic_sub_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
323entry:
324  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
325  %val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst
326  ret void
327}
328
329; GCN-LABEL: {{^}}atomic_sub_i32_ret_addr64:
330; SI: buffer_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
331; VI: flat_atomic_sub [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
332; SIVI: buffer_store_dword [[RET]]
333
334; GFX9: global_atomic_sub [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
335define amdgpu_kernel void @atomic_sub_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
336entry:
337  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
338  %val = atomicrmw volatile sub i32 addrspace(1)* %ptr, i32 %in seq_cst
339  store i32 %val, i32 addrspace(1)* %out2
340  ret void
341}
342
343; GCN-LABEL: {{^}}atomic_max_i32_offset:
344; SIVI: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
345
346; GFX9: global_atomic_smax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
347define amdgpu_kernel void @atomic_max_i32_offset(i32 addrspace(1)* %out, i32 %in) {
348entry:
349  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
350  %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
351  ret void
352}
353
354; GCN-LABEL: {{^}}atomic_max_i32_ret_offset:
355; SIVI: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
356; SIVI: buffer_store_dword [[RET]]
357
358; GFX9: global_atomic_smax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
359define amdgpu_kernel void @atomic_max_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
360entry:
361  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
362  %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
363  store i32 %val, i32 addrspace(1)* %out2
364  ret void
365}
366
367; GCN-LABEL: {{^}}atomic_max_i32_addr64_offset:
368; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
369; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
370
371; GFX9: global_atomic_smax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
372define amdgpu_kernel void @atomic_max_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
373entry:
374  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
375  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
376  %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
377  ret void
378}
379
380; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64_offset:
381; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
382; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
383; SIVI: buffer_store_dword [[RET]]
384
385; GFX9: global_atomic_smax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
386define amdgpu_kernel void @atomic_max_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
387entry:
388  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
389  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
390  %val = atomicrmw volatile max i32 addrspace(1)* %gep, i32 %in seq_cst
391  store i32 %val, i32 addrspace(1)* %out2
392  ret void
393}
394
395; GCN-LABEL: {{^}}atomic_max_i32:
396; SIVI: buffer_atomic_smax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
397
398; GFX9: global_atomic_smax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
399define amdgpu_kernel void @atomic_max_i32(i32 addrspace(1)* %out, i32 %in) {
400entry:
401  %val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
402  ret void
403}
404
405; GCN-LABEL: {{^}}atomic_max_i32_ret:
406; SIVI: buffer_atomic_smax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
407; SIVI: buffer_store_dword [[RET]]
408
409; GFX9: global_atomic_smax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
410define amdgpu_kernel void @atomic_max_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
411entry:
412  %val = atomicrmw volatile max i32 addrspace(1)* %out, i32 %in seq_cst
413  store i32 %val, i32 addrspace(1)* %out2
414  ret void
415}
416
417; GCN-LABEL: {{^}}atomic_max_i32_addr64:
418; SI: buffer_atomic_smax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
419; VI: flat_atomic_smax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
420
421; GFX9: global_atomic_smax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
422define amdgpu_kernel void @atomic_max_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
423entry:
424  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
425  %val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst
426  ret void
427}
428
429; GCN-LABEL: {{^}}atomic_max_i32_ret_addr64:
430; SI: buffer_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
431; VI: flat_atomic_smax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
432; SIVI: buffer_store_dword [[RET]]
433
434; GFX9: global_atomic_smax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
435define amdgpu_kernel void @atomic_max_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
436entry:
437  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
438  %val = atomicrmw volatile max i32 addrspace(1)* %ptr, i32 %in seq_cst
439  store i32 %val, i32 addrspace(1)* %out2
440  ret void
441}
442
443; GCN-LABEL: {{^}}atomic_umax_i32_offset:
444; SIVI: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
445
446; GFX9: global_atomic_umax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
447define amdgpu_kernel void @atomic_umax_i32_offset(i32 addrspace(1)* %out, i32 %in) {
448entry:
449  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
450  %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
451  ret void
452}
453
454; GCN-LABEL: {{^}}atomic_umax_i32_ret_offset:
455; SIVI: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
456; SIVI: buffer_store_dword [[RET]]
457
458; GFX9: global_atomic_umax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
459define amdgpu_kernel void @atomic_umax_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
460entry:
461  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
462  %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
463  store i32 %val, i32 addrspace(1)* %out2
464  ret void
465}
466
467; GCN-LABEL: {{^}}atomic_umax_i32_addr64_offset:
468; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
469; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
470; GFX9: global_atomic_umax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
471define amdgpu_kernel void @atomic_umax_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
472entry:
473  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
474  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
475  %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
476  ret void
477}
478
479; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64_offset:
480; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
481; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
482; SIVI: buffer_store_dword [[RET]]
483
484; GFX9: global_atomic_umax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
485define amdgpu_kernel void @atomic_umax_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
486entry:
487  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
488  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
489  %val = atomicrmw volatile umax i32 addrspace(1)* %gep, i32 %in seq_cst
490  store i32 %val, i32 addrspace(1)* %out2
491  ret void
492}
493
494; GCN-LABEL: {{^}}atomic_umax_i32:
495; SIVI: buffer_atomic_umax v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
496
497; GFX9: global_atomic_umax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
498define amdgpu_kernel void @atomic_umax_i32(i32 addrspace(1)* %out, i32 %in) {
499entry:
500  %val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
501  ret void
502}
503
504; GCN-LABEL: {{^}}atomic_umax_i32_ret:
505; SIVI: buffer_atomic_umax [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
506; SIVI: buffer_store_dword [[RET]]
507
508; GFX9: global_atomic_umax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
509define amdgpu_kernel void @atomic_umax_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
510entry:
511  %val = atomicrmw volatile umax i32 addrspace(1)* %out, i32 %in seq_cst
512  store i32 %val, i32 addrspace(1)* %out2
513  ret void
514}
515
516; GCN-LABEL: {{^}}atomic_umax_i32_addr64:
517; SI: buffer_atomic_umax v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
518; VI: flat_atomic_umax v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
519; GFX9: global_atomic_umax v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
520define amdgpu_kernel void @atomic_umax_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
521entry:
522  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
523  %val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst
524  ret void
525}
526
527; GCN-LABEL: {{^}}atomic_umax_i32_ret_addr64:
528; SI: buffer_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
529; VI: flat_atomic_umax [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
530; SIVI: buffer_store_dword [[RET]]
531
532; GFX9: global_atomic_umax [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
533define amdgpu_kernel void @atomic_umax_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
534entry:
535  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
536  %val = atomicrmw volatile umax i32 addrspace(1)* %ptr, i32 %in seq_cst
537  store i32 %val, i32 addrspace(1)* %out2
538  ret void
539}
540
541; GCN-LABEL: {{^}}atomic_min_i32_offset:
542; SIVI: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
543
544; GFX9: global_atomic_smin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
545define amdgpu_kernel void @atomic_min_i32_offset(i32 addrspace(1)* %out, i32 %in) {
546entry:
547  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
548  %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
549  ret void
550}
551
552; GCN-LABEL: {{^}}atomic_min_i32_ret_offset:
553; SIVI: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
554; SIVI: buffer_store_dword [[RET]]
555
556; GFX9: global_atomic_smin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
557define amdgpu_kernel void @atomic_min_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
558entry:
559  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
560  %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
561  store i32 %val, i32 addrspace(1)* %out2
562  ret void
563}
564
565; GCN-LABEL: {{^}}atomic_min_i32_addr64_offset:
566; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
567; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
568; GFX9: global_atomic_smin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16
569define amdgpu_kernel void @atomic_min_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
570entry:
571  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
572  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
573  %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
574  ret void
575}
576
577; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64_offset:
578; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
579; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
580; SIVI: buffer_store_dword [[RET]]
581
582; GFX9: global_atomic_smin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
583define amdgpu_kernel void @atomic_min_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
584entry:
585  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
586  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
587  %val = atomicrmw volatile min i32 addrspace(1)* %gep, i32 %in seq_cst
588  store i32 %val, i32 addrspace(1)* %out2
589  ret void
590}
591
592; GCN-LABEL: {{^}}atomic_min_i32:
593; SIVI: buffer_atomic_smin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
594
595; GFX9: global_atomic_smin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}}
596define amdgpu_kernel void @atomic_min_i32(i32 addrspace(1)* %out, i32 %in) {
597entry:
598  %val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
599  ret void
600}
601
602; GCN-LABEL: {{^}}atomic_min_i32_ret:
603; SIVI: buffer_atomic_smin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
604; SIVI: buffer_store_dword [[RET]]
605
606; GFX9: global_atomic_smin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
607define amdgpu_kernel void @atomic_min_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
608entry:
609  %val = atomicrmw volatile min i32 addrspace(1)* %out, i32 %in seq_cst
610  store i32 %val, i32 addrspace(1)* %out2
611  ret void
612}
613
614; GCN-LABEL: {{^}}atomic_min_i32_addr64:
615; SI: buffer_atomic_smin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
616; VI: flat_atomic_smin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
617; GFX9: global_atomic_smin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
618define amdgpu_kernel void @atomic_min_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
619entry:
620  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
621  %val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst
622  ret void
623}
624
625; GCN-LABEL: {{^}}atomic_min_i32_ret_addr64:
626; SI: buffer_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
627; VI: flat_atomic_smin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
628; SIVI: buffer_store_dword [[RET]]
629
630; GFX9: global_atomic_smin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
631define amdgpu_kernel void @atomic_min_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
632entry:
633  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
634  %val = atomicrmw volatile min i32 addrspace(1)* %ptr, i32 %in seq_cst
635  store i32 %val, i32 addrspace(1)* %out2
636  ret void
637}
638
639; GCN-LABEL: {{^}}atomic_umin_i32_offset:
640; SIVI: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
641
642; GFX9: global_atomic_umin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
643define amdgpu_kernel void @atomic_umin_i32_offset(i32 addrspace(1)* %out, i32 %in) {
644entry:
645  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
646  %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
647  ret void
648}
649
650; GCN-LABEL: {{^}}atomic_umin_i32_ret_offset:
651; SIVI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
652; SIVI: buffer_store_dword [[RET]]
653
654; GFX9: global_atomic_umin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
655define amdgpu_kernel void @atomic_umin_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
656entry:
657  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
658  %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
659  store i32 %val, i32 addrspace(1)* %out2
660  ret void
661}
662
663; GCN-LABEL: {{^}}atomic_umin_i32_addr64_offset:
664; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
665; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
666; GFX9: global_atomic_umin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
667define amdgpu_kernel void @atomic_umin_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
668entry:
669  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
670  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
671  %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
672  ret void
673}
674
675; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64_offset:
676; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
677; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
678; SIVI: buffer_store_dword [[RET]]
679
680; GFX9: global_atomic_umin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
681define amdgpu_kernel void @atomic_umin_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
682entry:
683  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
684  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
685  %val = atomicrmw volatile umin i32 addrspace(1)* %gep, i32 %in seq_cst
686  store i32 %val, i32 addrspace(1)* %out2
687  ret void
688}
689
690; GCN-LABEL: {{^}}atomic_umin_i32:
691; SIVI: buffer_atomic_umin v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
692; GFX9: global_atomic_umin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
693define amdgpu_kernel void @atomic_umin_i32(i32 addrspace(1)* %out, i32 %in) {
694entry:
695  %val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
696  ret void
697}
698
699; GCN-LABEL: {{^}}atomic_umin_i32_ret:
700; SIVI: buffer_atomic_umin [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
701; SIVI: buffer_store_dword [[RET]]
702
703; GFX9: global_atomic_umin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
704define amdgpu_kernel void @atomic_umin_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
705entry:
706  %val = atomicrmw volatile umin i32 addrspace(1)* %out, i32 %in seq_cst
707  store i32 %val, i32 addrspace(1)* %out2
708  ret void
709}
710
711; GCN-LABEL: {{^}}atomic_umin_i32_addr64:
712; SI: buffer_atomic_umin v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
713; VI: flat_atomic_umin v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
714; GFX9: global_atomic_umin v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
715define amdgpu_kernel void @atomic_umin_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
716entry:
717  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
718  %val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst
719  ret void
720}
721
722; GCN-LABEL: {{^}}atomic_umin_i32_ret_addr64:
723; SI: buffer_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
724; VI: flat_atomic_umin [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
725; SIVI: buffer_store_dword [[RET]]
726
727; GFX9: global_atomic_umin [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
728define amdgpu_kernel void @atomic_umin_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
729entry:
730  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
731  %val = atomicrmw volatile umin i32 addrspace(1)* %ptr, i32 %in seq_cst
732  store i32 %val, i32 addrspace(1)* %out2
733  ret void
734}
735
736; GCN-LABEL: {{^}}atomic_or_i32_offset:
737; SIVI: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
738
739; GFX9: global_atomic_or v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
740define amdgpu_kernel void @atomic_or_i32_offset(i32 addrspace(1)* %out, i32 %in) {
741entry:
742  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
743  %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
744  ret void
745}
746
747; GCN-LABEL: {{^}}atomic_or_i32_ret_offset:
748; SIVI: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
749; SIVI: buffer_store_dword [[RET]]
750
751; GFX9: global_atomic_or [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
752define amdgpu_kernel void @atomic_or_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
753entry:
754  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
755  %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
756  store i32 %val, i32 addrspace(1)* %out2
757  ret void
758}
759
760; GCN-LABEL: {{^}}atomic_or_i32_addr64_offset:
761; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
762; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
763; GFX9: global_atomic_or v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16
764define amdgpu_kernel void @atomic_or_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
765entry:
766  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
767  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
768  %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
769  ret void
770}
771
772; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64_offset:
773; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
774; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
775; SIVI: buffer_store_dword [[RET]]
776
777; GFX9: global_atomic_or [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
778define amdgpu_kernel void @atomic_or_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
779entry:
780  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
781  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
782  %val = atomicrmw volatile or i32 addrspace(1)* %gep, i32 %in seq_cst
783  store i32 %val, i32 addrspace(1)* %out2
784  ret void
785}
786
787; GCN-LABEL: {{^}}atomic_or_i32:
788; SIVI: buffer_atomic_or v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
789
790; GFX9: global_atomic_or v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
791define amdgpu_kernel void @atomic_or_i32(i32 addrspace(1)* %out, i32 %in) {
792entry:
793  %val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
794  ret void
795}
796
797; GCN-LABEL: {{^}}atomic_or_i32_ret:
798; SIVI: buffer_atomic_or [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
799; SIVI: buffer_store_dword [[RET]]
800
801; GFX9: global_atomic_or [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
802define amdgpu_kernel void @atomic_or_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
803entry:
804  %val = atomicrmw volatile or i32 addrspace(1)* %out, i32 %in seq_cst
805  store i32 %val, i32 addrspace(1)* %out2
806  ret void
807}
808
809; GCN-LABEL: {{^}}atomic_or_i32_addr64:
810; SI: buffer_atomic_or v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
811; VI: flat_atomic_or v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
812; GFX9: global_atomic_or v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
813define amdgpu_kernel void @atomic_or_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
814entry:
815  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
816  %val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst
817  ret void
818}
819
820; GCN-LABEL: {{^}}atomic_or_i32_ret_addr64:
821; SI: buffer_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
822; VI: flat_atomic_or [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
823; SIVI: buffer_store_dword [[RET]]
824
825; GFX9: global_atomic_or [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
826define amdgpu_kernel void @atomic_or_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
827entry:
828  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
829  %val = atomicrmw volatile or i32 addrspace(1)* %ptr, i32 %in seq_cst
830  store i32 %val, i32 addrspace(1)* %out2
831  ret void
832}
833
834; GCN-LABEL: {{^}}atomic_xchg_i32_offset:
835; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
836
837; GFX9: global_atomic_swap v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
838define amdgpu_kernel void @atomic_xchg_i32_offset(i32 addrspace(1)* %out, i32 %in) {
839entry:
840  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
841  %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
842  ret void
843}
844
845; GCN-LABEL: {{^}}atomic_xchg_f32_offset:
846; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
847
848; GFX9: global_atomic_swap v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
849define amdgpu_kernel void @atomic_xchg_f32_offset(float addrspace(1)* %out, float %in) {
850entry:
851  %gep = getelementptr float, float addrspace(1)* %out, i64 4
852  %val = atomicrmw volatile xchg float addrspace(1)* %gep, float %in seq_cst
853  ret void
854}
855
856; GCN-LABEL: {{^}}atomic_xchg_i32_ret_offset:
857; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
858; SIVI: buffer_store_dword [[RET]]
859
860; GFX9: global_atomic_swap [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
861define amdgpu_kernel void @atomic_xchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
862entry:
863  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
864  %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
865  store i32 %val, i32 addrspace(1)* %out2
866  ret void
867}
868
869; GCN-LABEL: {{^}}atomic_xchg_i32_addr64_offset:
870; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
871; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
872; GFX9: global_atomic_swap v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
873define amdgpu_kernel void @atomic_xchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
874entry:
875  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
876  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
877  %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
878  ret void
879}
880
881; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64_offset:
882; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
883; VI: flat_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
884; SIVI: buffer_store_dword [[RET]]
885
886; GFX9: global_atomic_swap [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
887define amdgpu_kernel void @atomic_xchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
888entry:
889  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
890  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
891  %val = atomicrmw volatile xchg i32 addrspace(1)* %gep, i32 %in seq_cst
892  store i32 %val, i32 addrspace(1)* %out2
893  ret void
894}
895
896; GCN-LABEL: {{^}}atomic_xchg_i32:
897; SIVI: buffer_atomic_swap v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
898; GFX9: global_atomic_swap v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
899define amdgpu_kernel void @atomic_xchg_i32(i32 addrspace(1)* %out, i32 %in) {
900entry:
901  %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
902  ret void
903}
904
905; GCN-LABEL: {{^}}atomic_xchg_i32_ret:
906; SIVI: buffer_atomic_swap [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
907; SIVI: buffer_store_dword [[RET]]
908
909; GFX9: global_atomic_swap [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
910define amdgpu_kernel void @atomic_xchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
911entry:
912  %val = atomicrmw volatile xchg i32 addrspace(1)* %out, i32 %in seq_cst
913  store i32 %val, i32 addrspace(1)* %out2
914  ret void
915}
916
917; GCN-LABEL: {{^}}atomic_xchg_i32_addr64:
918; SI: buffer_atomic_swap v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
919; VI: flat_atomic_swap v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
920; GFX9: global_atomic_swap v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
921define amdgpu_kernel void @atomic_xchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
922entry:
923  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
924  %val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst
925  ret void
926}
927
928; GCN-LABEL: {{^}}atomic_xchg_i32_ret_addr64:
929; SI: buffer_atomic_swap [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
930; VI: flat_atomic_swap [[RET:v[0-9]+]],  v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
931; SIVI: buffer_store_dword [[RET]]
932
933; GFX9: global_atomic_swap [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
934define amdgpu_kernel void @atomic_xchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
935entry:
936  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
937  %val = atomicrmw volatile xchg i32 addrspace(1)* %ptr, i32 %in seq_cst
938  store i32 %val, i32 addrspace(1)* %out2
939  ret void
940}
941
942; GCN-LABEL: {{^}}atomic_cmpxchg_i32_offset:
943; SIVI: buffer_atomic_cmpswap v[{{[0-9]+}}:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
944
945; GFX9: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
946define amdgpu_kernel void @atomic_cmpxchg_i32_offset(i32 addrspace(1)* %out, i32 %in, i32 %old) {
947entry:
948  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
949  %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
950  ret void
951}
952
953; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_offset:
954; SIVI: buffer_atomic_cmpswap v[[[RET:[0-9]+]]{{:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
955; SIVI: buffer_store_dword v[[RET]]
956
957; GFX9: global_atomic_cmpswap [[RET:v[0-9]+]], v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
958define amdgpu_kernel void @atomic_cmpxchg_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) {
959entry:
960  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
961  %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
962  %extract0 = extractvalue { i32, i1 } %val, 0
963  store i32 %extract0, i32 addrspace(1)* %out2
964  ret void
965}
966
967; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64_offset:
968; SI: buffer_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
969
970; VI: flat_atomic_cmpswap v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
971; GFX9: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+\:[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
972define amdgpu_kernel void @atomic_cmpxchg_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
973entry:
974  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
975  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
976  %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
977  ret void
978}
979
980; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64_offset:
981; SI: buffer_atomic_cmpswap v[[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
982; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
983; SIVI: buffer_store_dword v[[RET]]
984
985; GFX9: global_atomic_cmpswap v[[RET:[0-9]+]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
986define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) {
987entry:
988  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
989  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
990  %val = cmpxchg volatile i32 addrspace(1)* %gep, i32 %old, i32 %in seq_cst seq_cst
991  %extract0 = extractvalue { i32, i1 } %val, 0
992  store i32 %extract0, i32 addrspace(1)* %out2
993  ret void
994}
995
996; GCN-LABEL: {{^}}atomic_cmpxchg_i32:
997; SIVI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
998
999; GFX9: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]{{$}}
1000define amdgpu_kernel void @atomic_cmpxchg_i32(i32 addrspace(1)* %out, i32 %in, i32 %old) {
1001entry:
1002  %val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst
1003  ret void
1004}
1005
1006; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret:
1007; SIVI: buffer_atomic_cmpswap v[[[RET:[0-9]+]]:{{[0-9]+}}], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
1008; SIVI: buffer_store_dword v[[RET]]
1009
1010; GFX9: global_atomic_cmpswap [[RET:v[0-9]+]], v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
1011define amdgpu_kernel void @atomic_cmpxchg_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i32 %old) {
1012entry:
1013  %val = cmpxchg volatile i32 addrspace(1)* %out, i32 %old, i32 %in seq_cst seq_cst
1014  %extract0 = extractvalue { i32, i1 } %val, 0
1015  store i32 %extract0, i32 addrspace(1)* %out2
1016  ret void
1017}
1018
1019; GCN-LABEL: {{^}}atomic_cmpxchg_i32_addr64:
1020; SI: buffer_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
1021; VI: flat_atomic_cmpswap v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
1022; GFX9: global_atomic_cmpswap v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}]{{$}}
1023define amdgpu_kernel void @atomic_cmpxchg_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index, i32 %old) {
1024entry:
1025  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1026  %val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst
1027  ret void
1028}
1029
1030; GCN-LABEL: {{^}}atomic_cmpxchg_i32_ret_addr64:
1031; SI: buffer_atomic_cmpswap v[[[RET:[0-9]+]]:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1032; VI: flat_atomic_cmpswap v[[RET:[0-9]+]], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1033; SIVI: buffer_store_dword v[[RET]]
1034
1035; GFX9: global_atomic_cmpswap v[[RET:[0-9]+]], v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] glc{{$}}
1036define amdgpu_kernel void @atomic_cmpxchg_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index, i32 %old) {
1037entry:
1038  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1039  %val = cmpxchg volatile i32 addrspace(1)* %ptr, i32 %old, i32 %in seq_cst seq_cst
1040  %extract0 = extractvalue { i32, i1 } %val, 0
1041  store i32 %extract0, i32 addrspace(1)* %out2
1042  ret void
1043}
1044
1045; GCN-LABEL: {{^}}atomic_xor_i32_offset:
1046; SIVI: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
1047
1048; GFX9: global_atomic_xor v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
1049define amdgpu_kernel void @atomic_xor_i32_offset(i32 addrspace(1)* %out, i32 %in) {
1050entry:
1051  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
1052  %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
1053  ret void
1054}
1055
1056; GCN-LABEL: {{^}}atomic_xor_i32_ret_offset:
1057; SIVI: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
1058; SIVI: buffer_store_dword [[RET]]
1059
1060; GFX9: global_atomic_xor v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
1061define amdgpu_kernel void @atomic_xor_i32_ret_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
1062entry:
1063  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
1064  %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
1065  store i32 %val, i32 addrspace(1)* %out2
1066  ret void
1067}
1068
1069; GCN-LABEL: {{^}}atomic_xor_i32_addr64_offset:
1070; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
1071; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
1072; GFX9: global_atomic_xor v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16{{$}}
1073define amdgpu_kernel void @atomic_xor_i32_addr64_offset(i32 addrspace(1)* %out, i32 %in, i64 %index) {
1074entry:
1075  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1076  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
1077  %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
1078  ret void
1079}
1080
1081; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64_offset:
1082; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
1083; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
1084; SIVI: buffer_store_dword [[RET]]
1085
1086; GFX9: global_atomic_xor [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
1087define amdgpu_kernel void @atomic_xor_i32_ret_addr64_offset(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
1088entry:
1089  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1090  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
1091  %val = atomicrmw volatile xor i32 addrspace(1)* %gep, i32 %in seq_cst
1092  store i32 %val, i32 addrspace(1)* %out2
1093  ret void
1094}
1095
1096; GCN-LABEL: {{^}}atomic_xor_i32:
1097; SIVI: buffer_atomic_xor v{{[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
1098; GFX9: global_atomic_xor v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
1099define amdgpu_kernel void @atomic_xor_i32(i32 addrspace(1)* %out, i32 %in) {
1100entry:
1101  %val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
1102  ret void
1103}
1104
1105; GCN-LABEL: {{^}}atomic_xor_i32_ret:
1106; SIVI: buffer_atomic_xor [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
1107; SIVI: buffer_store_dword [[RET]]
1108
1109; GFX9: global_atomic_xor [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} glc{{$}}
1110define amdgpu_kernel void @atomic_xor_i32_ret(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in) {
1111entry:
1112  %val = atomicrmw volatile xor i32 addrspace(1)* %out, i32 %in seq_cst
1113  store i32 %val, i32 addrspace(1)* %out2
1114  ret void
1115}
1116
1117; GCN-LABEL: {{^}}atomic_xor_i32_addr64:
1118; SI: buffer_atomic_xor v{{[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
1119; VI: flat_atomic_xor v[{{[0-9]+:[0-9]+}}], v{{[0-9]+$}}
1120; GFX9: global_atomic_xor v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}]{{$}}
1121define amdgpu_kernel void @atomic_xor_i32_addr64(i32 addrspace(1)* %out, i32 %in, i64 %index) {
1122entry:
1123  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1124  %val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst
1125  ret void
1126}
1127
1128; GCN-LABEL: {{^}}atomic_xor_i32_ret_addr64:
1129; SI: buffer_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1130; VI: flat_atomic_xor [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}], v{{[0-9]+}} glc{{$}}
1131; SIVI: buffer_store_dword [[RET]]
1132
1133; GFX9: global_atomic_xor [[RET:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
1134define amdgpu_kernel void @atomic_xor_i32_ret_addr64(i32 addrspace(1)* %out, i32 addrspace(1)* %out2, i32 %in, i64 %index) {
1135entry:
1136  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1137  %val = atomicrmw volatile xor i32 addrspace(1)* %ptr, i32 %in seq_cst
1138  store i32 %val, i32 addrspace(1)* %out2
1139  ret void
1140}
1141
1142; GCN-LABEL: {{^}}atomic_load_i32_offset:
1143; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
1144; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
1145; SIVI: buffer_store_dword [[RET]]
1146
1147; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
1148define amdgpu_kernel void @atomic_load_i32_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
1149entry:
1150  %gep = getelementptr i32, i32 addrspace(1)* %in, i64 4
1151  %val = load atomic i32, i32 addrspace(1)* %gep  seq_cst, align 4
1152  store i32 %val, i32 addrspace(1)* %out
1153  ret void
1154}
1155
1156; GCN-LABEL: {{^}}atomic_load_i32_negoffset:
1157; SI: buffer_load_dword [[RET:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1158
1159; VI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0xfffffe00
1160; VI-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, -1
1161; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
1162
1163; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:-512 glc{{$}}
1164define amdgpu_kernel void @atomic_load_i32_negoffset(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
1165entry:
1166  %gep = getelementptr i32, i32 addrspace(1)* %in, i64 -128
1167  %val = load atomic i32, i32 addrspace(1)* %gep  seq_cst, align 4
1168  store i32 %val, i32 addrspace(1)* %out
1169  ret void
1170}
1171
1172; GCN-LABEL: {{^}}atomic_load_f32_offset:
1173; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
1174; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
1175; SIVI: buffer_store_dword [[RET]]
1176
1177; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
1178define amdgpu_kernel void @atomic_load_f32_offset(float addrspace(1)* %in, float addrspace(1)* %out) {
1179entry:
1180  %gep = getelementptr float, float addrspace(1)* %in, i64 4
1181  %val = load atomic float, float addrspace(1)* %gep  seq_cst, align 4
1182  store float %val, float addrspace(1)* %out
1183  ret void
1184}
1185
1186; GCN-LABEL: {{^}}atomic_load_i32:
1187; SI: buffer_load_dword [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 glc
1188; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc
1189; SIVI: buffer_store_dword [[RET]]
1190
1191; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] glc
1192define amdgpu_kernel void @atomic_load_i32(i32 addrspace(1)* %in, i32 addrspace(1)* %out) {
1193entry:
1194  %val = load atomic i32, i32 addrspace(1)* %in seq_cst, align 4
1195  store i32 %val, i32 addrspace(1)* %out
1196  ret void
1197}
1198
1199; GCN-LABEL: {{^}}atomic_load_i32_addr64_offset:
1200; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
1201; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1202; SIVI: buffer_store_dword [[RET]]
1203
1204; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
1205define amdgpu_kernel void @atomic_load_i32_addr64_offset(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) {
1206entry:
1207  %ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index
1208  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
1209  %val = load atomic i32, i32 addrspace(1)* %gep seq_cst, align 4
1210  store i32 %val, i32 addrspace(1)* %out
1211  ret void
1212}
1213
1214; GCN-LABEL: {{^}}atomic_load_i32_addr64:
1215; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1216; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1217; SIVI: buffer_store_dword [[RET]]
1218
1219; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] glc{{$}}
1220define amdgpu_kernel void @atomic_load_i32_addr64(i32 addrspace(1)* %in, i32 addrspace(1)* %out, i64 %index) {
1221entry:
1222  %ptr = getelementptr i32, i32 addrspace(1)* %in, i64 %index
1223  %val = load atomic i32, i32 addrspace(1)* %ptr seq_cst, align 4
1224  store i32 %val, i32 addrspace(1)* %out
1225  ret void
1226}
1227
1228; GCN-LABEL: {{^}}atomic_load_f32_addr64_offset:
1229; SI: buffer_load_dword [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16 glc{{$}}
1230; VI: flat_load_dword [[RET:v[0-9]+]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1231; SIVI: buffer_store_dword [[RET]]
1232
1233; GFX9: global_load_dword [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] offset:16 glc{{$}}
1234define amdgpu_kernel void @atomic_load_f32_addr64_offset(float addrspace(1)* %in, float addrspace(1)* %out, i64 %index) {
1235entry:
1236  %ptr = getelementptr float, float addrspace(1)* %in, i64 %index
1237  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4
1238  %val = load atomic float, float addrspace(1)* %gep seq_cst, align 4
1239  store float %val, float addrspace(1)* %out
1240  ret void
1241}
1242
1243; GCN-LABEL: {{^}}atomic_store_i32_offset:
1244; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
1245; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1246; GFX9: global_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}}
1247define amdgpu_kernel void @atomic_store_i32_offset(i32 %in, i32 addrspace(1)* %out) {
1248entry:
1249  %gep = getelementptr i32, i32 addrspace(1)* %out, i64 4
1250  store atomic i32 %in, i32 addrspace(1)* %gep  seq_cst, align 4
1251  ret void
1252}
1253
1254; GCN-LABEL: {{^}}atomic_store_i32:
1255; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
1256; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1257; GFX9: global_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}}
1258define amdgpu_kernel void @atomic_store_i32(i32 %in, i32 addrspace(1)* %out) {
1259entry:
1260  store atomic i32 %in, i32 addrspace(1)* %out seq_cst, align 4
1261  ret void
1262}
1263
1264; GCN-LABEL: {{^}}atomic_store_f32:
1265; SI: buffer_store_dword {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
1266; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1267; GFX9: global_store_dword {{v[0-9]+}}, {{v[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}}
1268define amdgpu_kernel void @atomic_store_f32(float %in, float addrspace(1)* %out) {
1269entry:
1270  store atomic float %in, float addrspace(1)* %out seq_cst, align 4
1271  ret void
1272}
1273
1274; GCN-LABEL: {{^}}atomic_store_i32_addr64_offset:
1275; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
1276; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1277; GFX9: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
1278define amdgpu_kernel void @atomic_store_i32_addr64_offset(i32 %in, i32 addrspace(1)* %out, i64 %index) {
1279entry:
1280  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1281  %gep = getelementptr i32, i32 addrspace(1)* %ptr, i64 4
1282  store atomic i32 %in, i32 addrspace(1)* %gep seq_cst, align 4
1283  ret void
1284}
1285
1286; GCN-LABEL: {{^}}atomic_store_f32_addr64_offset:
1287; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 offset:16{{$}}
1288; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1289; GFX9: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16{{$}}
1290define amdgpu_kernel void @atomic_store_f32_addr64_offset(float %in, float addrspace(1)* %out, i64 %index) {
1291entry:
1292  %ptr = getelementptr float, float addrspace(1)* %out, i64 %index
1293  %gep = getelementptr float, float addrspace(1)* %ptr, i64 4
1294  store atomic float %in, float addrspace(1)* %gep seq_cst, align 4
1295  ret void
1296}
1297
1298; GCN-LABEL: {{^}}atomic_store_i32_addr64:
1299; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
1300; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1301; GFX9: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}}
1302define amdgpu_kernel void @atomic_store_i32_addr64(i32 %in, i32 addrspace(1)* %out, i64 %index) {
1303entry:
1304  %ptr = getelementptr i32, i32 addrspace(1)* %out, i64 %index
1305  store atomic i32 %in, i32 addrspace(1)* %ptr seq_cst, align 4
1306  ret void
1307}
1308
1309; GCN-LABEL: {{^}}atomic_store_f32_addr64:
1310; SI: buffer_store_dword {{v[0-9]+}}, v[{{[0-9]+}}:{{[0-9]+}}], s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64{{$}}
1311; VI: flat_store_dword v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1312; GFX9: global_store_dword v{{[0-9]+}}, v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}]{{$}}
1313define amdgpu_kernel void @atomic_store_f32_addr64(float %in, float addrspace(1)* %out, i64 %index) {
1314entry:
1315  %ptr = getelementptr float, float addrspace(1)* %out, i64 %index
1316  store atomic float %in, float addrspace(1)* %ptr seq_cst, align 4
1317  ret void
1318}
1319
1320; GCN-LABEL: {{^}}atomic_load_i8_offset:
1321; SIVI: buffer_load_ubyte [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
1322; SIVI: buffer_store_byte [[RET]]
1323
1324; GFX9: global_load_ubyte [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
1325define amdgpu_kernel void @atomic_load_i8_offset(i8 addrspace(1)* %in, i8 addrspace(1)* %out) {
1326entry:
1327  %gep = getelementptr i8, i8 addrspace(1)* %in, i64 16
1328  %val = load atomic i8, i8 addrspace(1)* %gep  seq_cst, align 1
1329  store i8 %val, i8 addrspace(1)* %out
1330  ret void
1331}
1332
1333; GCN-LABEL: {{^}}atomic_load_i8_negoffset:
1334; SI: buffer_load_ubyte [[RET:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1335
1336; VI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0xfffffe00
1337; VI-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, -1
1338; VI: flat_load_ubyte [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
1339
1340; GFX9: global_load_ubyte [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:-512 glc{{$}}
1341define amdgpu_kernel void @atomic_load_i8_negoffset(i8 addrspace(1)* %in, i8 addrspace(1)* %out) {
1342entry:
1343  %gep = getelementptr i8, i8 addrspace(1)* %in, i64 -512
1344  %val = load atomic i8, i8 addrspace(1)* %gep  seq_cst, align 1
1345  store i8 %val, i8 addrspace(1)* %out
1346  ret void
1347}
1348
1349; GCN-LABEL: {{^}}atomic_store_i8_offset:
1350; SI: buffer_store_byte {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
1351; VI: flat_store_byte v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1352; GFX9: global_store_byte {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}}
1353define amdgpu_kernel void @atomic_store_i8_offset(i8 %in, i8 addrspace(1)* %out) {
1354entry:
1355  %gep = getelementptr i8, i8 addrspace(1)* %out, i64 16
1356  store atomic i8 %in, i8 addrspace(1)* %gep  seq_cst, align 1
1357  ret void
1358}
1359
1360; GCN-LABEL: {{^}}atomic_store_i8:
1361; SI: buffer_store_byte {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
1362; VI: flat_store_byte v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1363; GFX9: global_store_byte {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}}
1364define amdgpu_kernel void @atomic_store_i8(i8 %in, i8 addrspace(1)* %out) {
1365entry:
1366  store atomic i8 %in, i8 addrspace(1)* %out seq_cst, align 1
1367  ret void
1368}
1369
1370; GCN-LABEL: {{^}}atomic_load_i16_offset:
1371; SIVI: buffer_load_ushort [[RET:v[0-9]+]], off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16 glc{{$}}
1372; SIVI: buffer_store_short [[RET]]
1373
1374; GFX9: global_load_ushort [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:16 glc{{$}}
1375define amdgpu_kernel void @atomic_load_i16_offset(i16 addrspace(1)* %in, i16 addrspace(1)* %out) {
1376entry:
1377  %gep = getelementptr i16, i16 addrspace(1)* %in, i64 8
1378  %val = load atomic i16, i16 addrspace(1)* %gep  seq_cst, align 2
1379  store i16 %val, i16 addrspace(1)* %out
1380  ret void
1381}
1382
1383; GCN-LABEL: {{^}}atomic_load_i16_negoffset:
1384; SI: buffer_load_ushort [[RET:v[0-9]+]], v{{\[[0-9]+:[0-9]+\]}}, s[{{[0-9]+}}:{{[0-9]+}}], 0 addr64 glc{{$}}
1385
1386; VI: s_add_u32 s{{[0-9]+}}, s{{[0-9]+}}, 0xfffffe00
1387; VI-NEXT: s_addc_u32 s{{[0-9]+}}, s{{[0-9]+}}, -1
1388; VI: flat_load_ushort [[RET:v[0-9]+]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
1389
1390; GFX9: global_load_ushort [[RET:v[0-9]+]], v{{[0-9]+}}, s[{{[0-9]+}}:{{[0-9]+}}] offset:-512 glc{{$}}
1391define amdgpu_kernel void @atomic_load_i16_negoffset(i16 addrspace(1)* %in, i16 addrspace(1)* %out) {
1392entry:
1393  %gep = getelementptr i16, i16 addrspace(1)* %in, i64 -256
1394  %val = load atomic i16, i16 addrspace(1)* %gep  seq_cst, align 2
1395  store i16 %val, i16 addrspace(1)* %out
1396  ret void
1397}
1398
1399; GCN-LABEL: {{^}}atomic_store_i16_offset:
1400; SI: buffer_store_short {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
1401; VI: flat_store_short v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1402; GFX9: global_store_short {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}}
1403define amdgpu_kernel void @atomic_store_i16_offset(i16 %in, i16 addrspace(1)* %out) {
1404entry:
1405  %gep = getelementptr i16, i16 addrspace(1)* %out, i64 8
1406  store atomic i16 %in, i16 addrspace(1)* %gep  seq_cst, align 2
1407  ret void
1408}
1409
1410; GCN-LABEL: {{^}}atomic_store_i16:
1411; SI: buffer_store_short {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
1412; VI: flat_store_short v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1413; GFX9: global_store_short {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}}
1414define amdgpu_kernel void @atomic_store_i16(i16 %in, i16 addrspace(1)* %out) {
1415entry:
1416  store atomic i16 %in, i16 addrspace(1)* %out seq_cst, align 2
1417  ret void
1418}
1419
1420; GCN-LABEL: {{^}}atomic_store_f16_offset:
1421; SI: buffer_store_short {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0 offset:16{{$}}
1422; VI: flat_store_short v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1423; GFX9: global_store_short {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]}} offset:16{{$}}
1424define amdgpu_kernel void @atomic_store_f16_offset(half %in, half addrspace(1)* %out) {
1425entry:
1426  %gep = getelementptr half, half addrspace(1)* %out, i64 8
1427  store atomic half %in, half addrspace(1)* %gep  seq_cst, align 2
1428  ret void
1429}
1430
1431; GCN-LABEL: {{^}}atomic_store_f16:
1432; SI: buffer_store_short {{v[0-9]+}}, off, s[{{[0-9]+}}:{{[0-9]+}}], 0{{$}}
1433; VI: flat_store_short v[{{[0-9]+}}:{{[0-9]+}}], {{v[0-9]+$}}
1434; GFX9: global_store_short {{v[0-9]+}}, {{v[0-9]+}}, s{{\[[0-9]+:[0-9]+\]$}}
1435define amdgpu_kernel void @atomic_store_f16(half %in, half addrspace(1)* %out) {
1436entry:
1437  store atomic half %in, half addrspace(1)* %out seq_cst, align 2
1438  ret void
1439}
1440