1; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4; GCN-LABEL: {{^}}atomic_add_i64_offset:
5; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}}
6define amdgpu_kernel void @atomic_add_i64_offset(i64* %out, i64 %in) {
7entry:
8  %gep = getelementptr i64, i64* %out, i64 4
9  %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst
10  ret void
11}
12
13; GCN-LABEL: {{^}}atomic_add_i64_ret_offset:
14; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
15; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
16define amdgpu_kernel void @atomic_add_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
17entry:
18  %gep = getelementptr i64, i64* %out, i64 4
19  %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst
20  store i64 %tmp0, i64* %out2
21  ret void
22}
23
24; GCN-LABEL: {{^}}atomic_add_i64_addr64_offset:
25; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}}
26define amdgpu_kernel void @atomic_add_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
27entry:
28  %ptr = getelementptr i64, i64* %out, i64 %index
29  %gep = getelementptr i64, i64* %ptr, i64 4
30  %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst
31  ret void
32}
33
34; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64_offset:
35; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
36; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
37define amdgpu_kernel void @atomic_add_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
38entry:
39  %ptr = getelementptr i64, i64* %out, i64 %index
40  %gep = getelementptr i64, i64* %ptr, i64 4
41  %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst
42  store i64 %tmp0, i64* %out2
43  ret void
44}
45
46; GCN-LABEL: {{^}}atomic_add_i64:
47; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
48define amdgpu_kernel void @atomic_add_i64(i64* %out, i64 %in) {
49entry:
50  %tmp0 = atomicrmw volatile add i64* %out, i64 %in seq_cst
51  ret void
52}
53
54; GCN-LABEL: {{^}}atomic_add_i64_ret:
55; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
56; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
57define amdgpu_kernel void @atomic_add_i64_ret(i64* %out, i64* %out2, i64 %in) {
58entry:
59  %tmp0 = atomicrmw volatile add i64* %out, i64 %in seq_cst
60  store i64 %tmp0, i64* %out2
61  ret void
62}
63
64; GCN-LABEL: {{^}}atomic_add_i64_addr64:
65; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
66define amdgpu_kernel void @atomic_add_i64_addr64(i64* %out, i64 %in, i64 %index) {
67entry:
68  %ptr = getelementptr i64, i64* %out, i64 %index
69  %tmp0 = atomicrmw volatile add i64* %ptr, i64 %in seq_cst
70  ret void
71}
72
73; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64:
74; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
75; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
76define amdgpu_kernel void @atomic_add_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
77entry:
78  %ptr = getelementptr i64, i64* %out, i64 %index
79  %tmp0 = atomicrmw volatile add i64* %ptr, i64 %in seq_cst
80  store i64 %tmp0, i64* %out2
81  ret void
82}
83
84; GCN-LABEL: {{^}}atomic_and_i64_offset:
85; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
86define amdgpu_kernel void @atomic_and_i64_offset(i64* %out, i64 %in) {
87entry:
88  %gep = getelementptr i64, i64* %out, i64 4
89  %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst
90  ret void
91}
92
93; GCN-LABEL: {{^}}atomic_and_i64_ret_offset:
94; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
95; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
96define amdgpu_kernel void @atomic_and_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
97entry:
98  %gep = getelementptr i64, i64* %out, i64 4
99  %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst
100  store i64 %tmp0, i64* %out2
101  ret void
102}
103
104; GCN-LABEL: {{^}}atomic_and_i64_addr64_offset:
105; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
106define amdgpu_kernel void @atomic_and_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
107entry:
108  %ptr = getelementptr i64, i64* %out, i64 %index
109  %gep = getelementptr i64, i64* %ptr, i64 4
110  %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst
111  ret void
112}
113
114; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64_offset:
115; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
116; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
117define amdgpu_kernel void @atomic_and_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
118entry:
119  %ptr = getelementptr i64, i64* %out, i64 %index
120  %gep = getelementptr i64, i64* %ptr, i64 4
121  %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst
122  store i64 %tmp0, i64* %out2
123  ret void
124}
125
126; GCN-LABEL: {{^}}atomic_and_i64:
127; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
128define amdgpu_kernel void @atomic_and_i64(i64* %out, i64 %in) {
129entry:
130  %tmp0 = atomicrmw volatile and i64* %out, i64 %in seq_cst
131  ret void
132}
133
134; GCN-LABEL: {{^}}atomic_and_i64_ret:
135; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
136; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
137define amdgpu_kernel void @atomic_and_i64_ret(i64* %out, i64* %out2, i64 %in) {
138entry:
139  %tmp0 = atomicrmw volatile and i64* %out, i64 %in seq_cst
140  store i64 %tmp0, i64* %out2
141  ret void
142}
143
144; GCN-LABEL: {{^}}atomic_and_i64_addr64:
145; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
146define amdgpu_kernel void @atomic_and_i64_addr64(i64* %out, i64 %in, i64 %index) {
147entry:
148  %ptr = getelementptr i64, i64* %out, i64 %index
149  %tmp0 = atomicrmw volatile and i64* %ptr, i64 %in seq_cst
150  ret void
151}
152
153; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64:
154; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
155; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
156define amdgpu_kernel void @atomic_and_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
157entry:
158  %ptr = getelementptr i64, i64* %out, i64 %index
159  %tmp0 = atomicrmw volatile and i64* %ptr, i64 %in seq_cst
160  store i64 %tmp0, i64* %out2
161  ret void
162}
163
164; GCN-LABEL: {{^}}atomic_sub_i64_offset:
165; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
166define amdgpu_kernel void @atomic_sub_i64_offset(i64* %out, i64 %in) {
167entry:
168  %gep = getelementptr i64, i64* %out, i64 4
169  %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst
170  ret void
171}
172
173; GCN-LABEL: {{^}}atomic_sub_i64_ret_offset:
174; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
175; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
176define amdgpu_kernel void @atomic_sub_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
177entry:
178  %gep = getelementptr i64, i64* %out, i64 4
179  %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst
180  store i64 %tmp0, i64* %out2
181  ret void
182}
183
184; GCN-LABEL: {{^}}atomic_sub_i64_addr64_offset:
185; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
186define amdgpu_kernel void @atomic_sub_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
187entry:
188  %ptr = getelementptr i64, i64* %out, i64 %index
189  %gep = getelementptr i64, i64* %ptr, i64 4
190  %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst
191  ret void
192}
193
194; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64_offset:
195; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
196; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
197define amdgpu_kernel void @atomic_sub_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
198entry:
199  %ptr = getelementptr i64, i64* %out, i64 %index
200  %gep = getelementptr i64, i64* %ptr, i64 4
201  %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst
202  store i64 %tmp0, i64* %out2
203  ret void
204}
205
206; GCN-LABEL: {{^}}atomic_sub_i64:
207; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
208define amdgpu_kernel void @atomic_sub_i64(i64* %out, i64 %in) {
209entry:
210  %tmp0 = atomicrmw volatile sub i64* %out, i64 %in seq_cst
211  ret void
212}
213
214; GCN-LABEL: {{^}}atomic_sub_i64_ret:
215; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
216; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
217define amdgpu_kernel void @atomic_sub_i64_ret(i64* %out, i64* %out2, i64 %in) {
218entry:
219  %tmp0 = atomicrmw volatile sub i64* %out, i64 %in seq_cst
220  store i64 %tmp0, i64* %out2
221  ret void
222}
223
224; GCN-LABEL: {{^}}atomic_sub_i64_addr64:
225; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
226define amdgpu_kernel void @atomic_sub_i64_addr64(i64* %out, i64 %in, i64 %index) {
227entry:
228  %ptr = getelementptr i64, i64* %out, i64 %index
229  %tmp0 = atomicrmw volatile sub i64* %ptr, i64 %in seq_cst
230  ret void
231}
232
233; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64:
234; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
235; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
236define amdgpu_kernel void @atomic_sub_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
237entry:
238  %ptr = getelementptr i64, i64* %out, i64 %index
239  %tmp0 = atomicrmw volatile sub i64* %ptr, i64 %in seq_cst
240  store i64 %tmp0, i64* %out2
241  ret void
242}
243
244; GCN-LABEL: {{^}}atomic_max_i64_offset:
245; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
246define amdgpu_kernel void @atomic_max_i64_offset(i64* %out, i64 %in) {
247entry:
248  %gep = getelementptr i64, i64* %out, i64 4
249  %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst
250  ret void
251}
252
253; GCN-LABEL: {{^}}atomic_max_i64_ret_offset:
254; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
255; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
256define amdgpu_kernel void @atomic_max_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
257entry:
258  %gep = getelementptr i64, i64* %out, i64 4
259  %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst
260  store i64 %tmp0, i64* %out2
261  ret void
262}
263
264; GCN-LABEL: {{^}}atomic_max_i64_addr64_offset:
265; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
266define amdgpu_kernel void @atomic_max_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
267entry:
268  %ptr = getelementptr i64, i64* %out, i64 %index
269  %gep = getelementptr i64, i64* %ptr, i64 4
270  %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst
271  ret void
272}
273
274; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64_offset:
275; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
276; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
277define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
278entry:
279  %ptr = getelementptr i64, i64* %out, i64 %index
280  %gep = getelementptr i64, i64* %ptr, i64 4
281  %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst
282  store i64 %tmp0, i64* %out2
283  ret void
284}
285
286; GCN-LABEL: {{^}}atomic_max_i64:
287; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
288define amdgpu_kernel void @atomic_max_i64(i64* %out, i64 %in) {
289entry:
290  %tmp0 = atomicrmw volatile max i64* %out, i64 %in seq_cst
291  ret void
292}
293
294; GCN-LABEL: {{^}}atomic_max_i64_ret:
295; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
296; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
297define amdgpu_kernel void @atomic_max_i64_ret(i64* %out, i64* %out2, i64 %in) {
298entry:
299  %tmp0 = atomicrmw volatile max i64* %out, i64 %in seq_cst
300  store i64 %tmp0, i64* %out2
301  ret void
302}
303
304; GCN-LABEL: {{^}}atomic_max_i64_addr64:
305; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
306define amdgpu_kernel void @atomic_max_i64_addr64(i64* %out, i64 %in, i64 %index) {
307entry:
308  %ptr = getelementptr i64, i64* %out, i64 %index
309  %tmp0 = atomicrmw volatile max i64* %ptr, i64 %in seq_cst
310  ret void
311}
312
313; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64:
314; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
315; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
316define amdgpu_kernel void @atomic_max_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
317entry:
318  %ptr = getelementptr i64, i64* %out, i64 %index
319  %tmp0 = atomicrmw volatile max i64* %ptr, i64 %in seq_cst
320  store i64 %tmp0, i64* %out2
321  ret void
322}
323
324; GCN-LABEL: {{^}}atomic_umax_i64_offset:
325; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
326define amdgpu_kernel void @atomic_umax_i64_offset(i64* %out, i64 %in) {
327entry:
328  %gep = getelementptr i64, i64* %out, i64 4
329  %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst
330  ret void
331}
332
333; GCN-LABEL: {{^}}atomic_umax_i64_ret_offset:
334; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
335; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
336define amdgpu_kernel void @atomic_umax_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
337entry:
338  %gep = getelementptr i64, i64* %out, i64 4
339  %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst
340  store i64 %tmp0, i64* %out2
341  ret void
342}
343
344; GCN-LABEL: {{^}}atomic_umax_i64_addr64_offset:
345; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
346define amdgpu_kernel void @atomic_umax_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
347entry:
348  %ptr = getelementptr i64, i64* %out, i64 %index
349  %gep = getelementptr i64, i64* %ptr, i64 4
350  %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst
351  ret void
352}
353
354; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64_offset:
355; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
356; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
357define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
358entry:
359  %ptr = getelementptr i64, i64* %out, i64 %index
360  %gep = getelementptr i64, i64* %ptr, i64 4
361  %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst
362  store i64 %tmp0, i64* %out2
363  ret void
364}
365
366; GCN-LABEL: {{^}}atomic_umax_i64:
367; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
368define amdgpu_kernel void @atomic_umax_i64(i64* %out, i64 %in) {
369entry:
370  %tmp0 = atomicrmw volatile umax i64* %out, i64 %in seq_cst
371  ret void
372}
373
374; GCN-LABEL: {{^}}atomic_umax_i64_ret:
375; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
376; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
377define amdgpu_kernel void @atomic_umax_i64_ret(i64* %out, i64* %out2, i64 %in) {
378entry:
379  %tmp0 = atomicrmw volatile umax i64* %out, i64 %in seq_cst
380  store i64 %tmp0, i64* %out2
381  ret void
382}
383
384; GCN-LABEL: {{^}}atomic_umax_i64_addr64:
385; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
386define amdgpu_kernel void @atomic_umax_i64_addr64(i64* %out, i64 %in, i64 %index) {
387entry:
388  %ptr = getelementptr i64, i64* %out, i64 %index
389  %tmp0 = atomicrmw volatile umax i64* %ptr, i64 %in seq_cst
390  ret void
391}
392
393; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64:
394; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
395; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
396define amdgpu_kernel void @atomic_umax_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
397entry:
398  %ptr = getelementptr i64, i64* %out, i64 %index
399  %tmp0 = atomicrmw volatile umax i64* %ptr, i64 %in seq_cst
400  store i64 %tmp0, i64* %out2
401  ret void
402}
403
404; GCN-LABEL: {{^}}atomic_min_i64_offset:
405; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
406define amdgpu_kernel void @atomic_min_i64_offset(i64* %out, i64 %in) {
407entry:
408  %gep = getelementptr i64, i64* %out, i64 4
409  %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst
410  ret void
411}
412
413; GCN-LABEL: {{^}}atomic_min_i64_ret_offset:
414; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
415; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
416define amdgpu_kernel void @atomic_min_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
417entry:
418  %gep = getelementptr i64, i64* %out, i64 4
419  %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst
420  store i64 %tmp0, i64* %out2
421  ret void
422}
423
424; GCN-LABEL: {{^}}atomic_min_i64_addr64_offset:
425; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
426define amdgpu_kernel void @atomic_min_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
427entry:
428  %ptr = getelementptr i64, i64* %out, i64 %index
429  %gep = getelementptr i64, i64* %ptr, i64 4
430  %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst
431  ret void
432}
433
434; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64_offset:
435; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
436; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
437define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
438entry:
439  %ptr = getelementptr i64, i64* %out, i64 %index
440  %gep = getelementptr i64, i64* %ptr, i64 4
441  %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst
442  store i64 %tmp0, i64* %out2
443  ret void
444}
445
446; GCN-LABEL: {{^}}atomic_min_i64:
447; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
448define amdgpu_kernel void @atomic_min_i64(i64* %out, i64 %in) {
449entry:
450  %tmp0 = atomicrmw volatile min i64* %out, i64 %in seq_cst
451  ret void
452}
453
454; GCN-LABEL: {{^}}atomic_min_i64_ret:
455; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
456; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
457define amdgpu_kernel void @atomic_min_i64_ret(i64* %out, i64* %out2, i64 %in) {
458entry:
459  %tmp0 = atomicrmw volatile min i64* %out, i64 %in seq_cst
460  store i64 %tmp0, i64* %out2
461  ret void
462}
463
464; GCN-LABEL: {{^}}atomic_min_i64_addr64:
465; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
466define amdgpu_kernel void @atomic_min_i64_addr64(i64* %out, i64 %in, i64 %index) {
467entry:
468  %ptr = getelementptr i64, i64* %out, i64 %index
469  %tmp0 = atomicrmw volatile min i64* %ptr, i64 %in seq_cst
470  ret void
471}
472
473; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64:
474; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
475; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
476define amdgpu_kernel void @atomic_min_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
477entry:
478  %ptr = getelementptr i64, i64* %out, i64 %index
479  %tmp0 = atomicrmw volatile min i64* %ptr, i64 %in seq_cst
480  store i64 %tmp0, i64* %out2
481  ret void
482}
483
484; GCN-LABEL: {{^}}atomic_umin_i64_offset:
485; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
486define amdgpu_kernel void @atomic_umin_i64_offset(i64* %out, i64 %in) {
487entry:
488  %gep = getelementptr i64, i64* %out, i64 4
489  %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst
490  ret void
491}
492
493; GCN-LABEL: {{^}}atomic_umin_i64_ret_offset:
494; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
495; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
496define amdgpu_kernel void @atomic_umin_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
497entry:
498  %gep = getelementptr i64, i64* %out, i64 4
499  %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst
500  store i64 %tmp0, i64* %out2
501  ret void
502}
503
504; GCN-LABEL: {{^}}atomic_umin_i64_addr64_offset:
505; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
506define amdgpu_kernel void @atomic_umin_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
507entry:
508  %ptr = getelementptr i64, i64* %out, i64 %index
509  %gep = getelementptr i64, i64* %ptr, i64 4
510  %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst
511  ret void
512}
513
514; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64_offset:
515; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
516; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
517define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
518entry:
519  %ptr = getelementptr i64, i64* %out, i64 %index
520  %gep = getelementptr i64, i64* %ptr, i64 4
521  %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst
522  store i64 %tmp0, i64* %out2
523  ret void
524}
525
526; GCN-LABEL: {{^}}atomic_umin_i64:
527; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
528define amdgpu_kernel void @atomic_umin_i64(i64* %out, i64 %in) {
529entry:
530  %tmp0 = atomicrmw volatile umin i64* %out, i64 %in seq_cst
531  ret void
532}
533
534; GCN-LABEL: {{^}}atomic_umin_i64_ret:
535; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
536; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
537define amdgpu_kernel void @atomic_umin_i64_ret(i64* %out, i64* %out2, i64 %in) {
538entry:
539  %tmp0 = atomicrmw volatile umin i64* %out, i64 %in seq_cst
540  store i64 %tmp0, i64* %out2
541  ret void
542}
543
544; GCN-LABEL: {{^}}atomic_umin_i64_addr64:
545; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
546define amdgpu_kernel void @atomic_umin_i64_addr64(i64* %out, i64 %in, i64 %index) {
547entry:
548  %ptr = getelementptr i64, i64* %out, i64 %index
549  %tmp0 = atomicrmw volatile umin i64* %ptr, i64 %in seq_cst
550  ret void
551}
552
553; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64:
554; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
555; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
556define amdgpu_kernel void @atomic_umin_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
557entry:
558  %ptr = getelementptr i64, i64* %out, i64 %index
559  %tmp0 = atomicrmw volatile umin i64* %ptr, i64 %in seq_cst
560  store i64 %tmp0, i64* %out2
561  ret void
562}
563
564; GCN-LABEL: {{^}}atomic_or_i64_offset:
565; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
566define amdgpu_kernel void @atomic_or_i64_offset(i64* %out, i64 %in) {
567entry:
568  %gep = getelementptr i64, i64* %out, i64 4
569  %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst
570  ret void
571}
572
573; GCN-LABEL: {{^}}atomic_or_i64_ret_offset:
574; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
575; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
576define amdgpu_kernel void @atomic_or_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
577entry:
578  %gep = getelementptr i64, i64* %out, i64 4
579  %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst
580  store i64 %tmp0, i64* %out2
581  ret void
582}
583
584; GCN-LABEL: {{^}}atomic_or_i64_addr64_offset:
585; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
586define amdgpu_kernel void @atomic_or_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
587entry:
588  %ptr = getelementptr i64, i64* %out, i64 %index
589  %gep = getelementptr i64, i64* %ptr, i64 4
590  %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst
591  ret void
592}
593
594; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64_offset:
595; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
596; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
597define amdgpu_kernel void @atomic_or_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
598entry:
599  %ptr = getelementptr i64, i64* %out, i64 %index
600  %gep = getelementptr i64, i64* %ptr, i64 4
601  %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst
602  store i64 %tmp0, i64* %out2
603  ret void
604}
605
606; GCN-LABEL: {{^}}atomic_or_i64:
607; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
608define amdgpu_kernel void @atomic_or_i64(i64* %out, i64 %in) {
609entry:
610  %tmp0 = atomicrmw volatile or i64* %out, i64 %in seq_cst
611  ret void
612}
613
614; GCN-LABEL: {{^}}atomic_or_i64_ret:
615; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
616; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
617define amdgpu_kernel void @atomic_or_i64_ret(i64* %out, i64* %out2, i64 %in) {
618entry:
619  %tmp0 = atomicrmw volatile or i64* %out, i64 %in seq_cst
620  store i64 %tmp0, i64* %out2
621  ret void
622}
623
624; GCN-LABEL: {{^}}atomic_or_i64_addr64:
625; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
626define amdgpu_kernel void @atomic_or_i64_addr64(i64* %out, i64 %in, i64 %index) {
627entry:
628  %ptr = getelementptr i64, i64* %out, i64 %index
629  %tmp0 = atomicrmw volatile or i64* %ptr, i64 %in seq_cst
630  ret void
631}
632
633; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64:
634; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
635; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
636define amdgpu_kernel void @atomic_or_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
637entry:
638  %ptr = getelementptr i64, i64* %out, i64 %index
639  %tmp0 = atomicrmw volatile or i64* %ptr, i64 %in seq_cst
640  store i64 %tmp0, i64* %out2
641  ret void
642}
643
644; GCN-LABEL: {{^}}atomic_xchg_i64_offset:
645; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
646define amdgpu_kernel void @atomic_xchg_i64_offset(i64* %out, i64 %in) {
647entry:
648  %gep = getelementptr i64, i64* %out, i64 4
649  %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst
650  ret void
651}
652
653; GCN-LABEL: {{^}}atomic_xchg_f64_offset:
654; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
655define amdgpu_kernel void @atomic_xchg_f64_offset(double* %out, double %in) {
656entry:
657  %gep = getelementptr double, double* %out, i64 4
658  %tmp0 = atomicrmw volatile xchg double* %gep, double %in seq_cst
659  ret void
660}
661
662; GCN-LABEL: {{^}}atomic_xchg_pointer_offset:
663; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
664define amdgpu_kernel void @atomic_xchg_pointer_offset(i8** %out, i8* %in) {
665entry:
666  %gep = getelementptr i8*, i8** %out, i32 4
667  %val = atomicrmw volatile xchg i8** %gep, i8* %in seq_cst
668  ret void
669}
670
671; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset:
672; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
673; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
674define amdgpu_kernel void @atomic_xchg_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
675entry:
676  %gep = getelementptr i64, i64* %out, i64 4
677  %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst
678  store i64 %tmp0, i64* %out2
679  ret void
680}
681
682; GCN-LABEL: {{^}}atomic_xchg_i64_addr64_offset:
683; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
684define amdgpu_kernel void @atomic_xchg_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
685entry:
686  %ptr = getelementptr i64, i64* %out, i64 %index
687  %gep = getelementptr i64, i64* %ptr, i64 4
688  %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst
689  ret void
690}
691
692; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64_offset:
693; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
694; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
695define amdgpu_kernel void @atomic_xchg_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
696entry:
697  %ptr = getelementptr i64, i64* %out, i64 %index
698  %gep = getelementptr i64, i64* %ptr, i64 4
699  %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst
700  store i64 %tmp0, i64* %out2
701  ret void
702}
703
704; GCN-LABEL: {{^}}atomic_xchg_i64:
705; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
706define amdgpu_kernel void @atomic_xchg_i64(i64* %out, i64 %in) {
707entry:
708  %tmp0 = atomicrmw volatile xchg i64* %out, i64 %in seq_cst
709  ret void
710}
711
712; GCN-LABEL: {{^}}atomic_xchg_i64_ret:
713; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
714; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
715define amdgpu_kernel void @atomic_xchg_i64_ret(i64* %out, i64* %out2, i64 %in) {
716entry:
717  %tmp0 = atomicrmw volatile xchg i64* %out, i64 %in seq_cst
718  store i64 %tmp0, i64* %out2
719  ret void
720}
721
722; GCN-LABEL: {{^}}atomic_xchg_i64_addr64:
723; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
724define amdgpu_kernel void @atomic_xchg_i64_addr64(i64* %out, i64 %in, i64 %index) {
725entry:
726  %ptr = getelementptr i64, i64* %out, i64 %index
727  %tmp0 = atomicrmw volatile xchg i64* %ptr, i64 %in seq_cst
728  ret void
729}
730
731; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64:
732; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]],  v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
733; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
734define amdgpu_kernel void @atomic_xchg_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
735entry:
736  %ptr = getelementptr i64, i64* %out, i64 %index
737  %tmp0 = atomicrmw volatile xchg i64* %ptr, i64 %in seq_cst
738  store i64 %tmp0, i64* %out2
739  ret void
740}
741
742; GCN-LABEL: {{^}}atomic_xor_i64_offset:
743; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
744define amdgpu_kernel void @atomic_xor_i64_offset(i64* %out, i64 %in) {
745entry:
746  %gep = getelementptr i64, i64* %out, i64 4
747  %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst
748  ret void
749}
750
751; GCN-LABEL: {{^}}atomic_xor_i64_ret_offset:
752; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
753; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
754define amdgpu_kernel void @atomic_xor_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
755entry:
756  %gep = getelementptr i64, i64* %out, i64 4
757  %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst
758  store i64 %tmp0, i64* %out2
759  ret void
760}
761
762; GCN-LABEL: {{^}}atomic_xor_i64_addr64_offset:
763; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
764define amdgpu_kernel void @atomic_xor_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
765entry:
766  %ptr = getelementptr i64, i64* %out, i64 %index
767  %gep = getelementptr i64, i64* %ptr, i64 4
768  %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst
769  ret void
770}
771
772; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64_offset:
773; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
774; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
775define amdgpu_kernel void @atomic_xor_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
776entry:
777  %ptr = getelementptr i64, i64* %out, i64 %index
778  %gep = getelementptr i64, i64* %ptr, i64 4
779  %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst
780  store i64 %tmp0, i64* %out2
781  ret void
782}
783
784; GCN-LABEL: {{^}}atomic_xor_i64:
785; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
786define amdgpu_kernel void @atomic_xor_i64(i64* %out, i64 %in) {
787entry:
788  %tmp0 = atomicrmw volatile xor i64* %out, i64 %in seq_cst
789  ret void
790}
791
792; GCN-LABEL: {{^}}atomic_xor_i64_ret:
793; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
794; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
795define amdgpu_kernel void @atomic_xor_i64_ret(i64* %out, i64* %out2, i64 %in) {
796entry:
797  %tmp0 = atomicrmw volatile xor i64* %out, i64 %in seq_cst
798  store i64 %tmp0, i64* %out2
799  ret void
800}
801
802; GCN-LABEL: {{^}}atomic_xor_i64_addr64:
803; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
804define amdgpu_kernel void @atomic_xor_i64_addr64(i64* %out, i64 %in, i64 %index) {
805entry:
806  %ptr = getelementptr i64, i64* %out, i64 %index
807  %tmp0 = atomicrmw volatile xor i64* %ptr, i64 %in seq_cst
808  ret void
809}
810
811; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64:
812; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
813; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
814define amdgpu_kernel void @atomic_xor_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
815entry:
816  %ptr = getelementptr i64, i64* %out, i64 %index
817  %tmp0 = atomicrmw volatile xor i64* %ptr, i64 %in seq_cst
818  store i64 %tmp0, i64* %out2
819  ret void
820}
821
822; GCN-LABEL: {{^}}atomic_load_i64_offset:
823; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
824; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
825define amdgpu_kernel void @atomic_load_i64_offset(i64* %in, i64* %out) {
826entry:
827  %gep = getelementptr i64, i64* %in, i64 4
828  %val = load atomic i64, i64* %gep  seq_cst, align 8
829  store i64 %val, i64* %out
830  ret void
831}
832
833; GCN-LABEL: {{^}}atomic_load_i64:
834; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc
835; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
836define amdgpu_kernel void @atomic_load_i64(i64* %in, i64* %out) {
837entry:
838  %val = load atomic i64, i64* %in seq_cst, align 8
839  store i64 %val, i64* %out
840  ret void
841}
842
843; GCN-LABEL: {{^}}atomic_load_i64_addr64_offset:
844; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
845; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
846define amdgpu_kernel void @atomic_load_i64_addr64_offset(i64* %in, i64* %out, i64 %index) {
847entry:
848  %ptr = getelementptr i64, i64* %in, i64 %index
849  %gep = getelementptr i64, i64* %ptr, i64 4
850  %val = load atomic i64, i64* %gep seq_cst, align 8
851  store i64 %val, i64* %out
852  ret void
853}
854
855; GCN-LABEL: {{^}}atomic_load_i64_addr64:
856; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
857; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
858define amdgpu_kernel void @atomic_load_i64_addr64(i64* %in, i64* %out, i64 %index) {
859entry:
860  %ptr = getelementptr i64, i64* %in, i64 %index
861  %val = load atomic i64, i64* %ptr seq_cst, align 8
862  store i64 %val, i64* %out
863  ret void
864}
865
866; GCN-LABEL: {{^}}atomic_store_i64_offset:
867; GCN: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
868define amdgpu_kernel void @atomic_store_i64_offset(i64 %in, i64* %out) {
869entry:
870  %gep = getelementptr i64, i64* %out, i64 4
871  store atomic i64 %in, i64* %gep  seq_cst, align 8
872  ret void
873}
874
875; GCN-LABEL: {{^}}atomic_store_i64:
876; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}]
877define amdgpu_kernel void @atomic_store_i64(i64 %in, i64* %out) {
878entry:
879  store atomic i64 %in, i64* %out seq_cst, align 8
880  ret void
881}
882
883; GCN-LABEL: {{^}}atomic_store_i64_addr64_offset:
884; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}
885define amdgpu_kernel void @atomic_store_i64_addr64_offset(i64 %in, i64* %out, i64 %index) {
886entry:
887  %ptr = getelementptr i64, i64* %out, i64 %index
888  %gep = getelementptr i64, i64* %ptr, i64 4
889  store atomic i64 %in, i64* %gep seq_cst, align 8
890  ret void
891}
892
893; GCN-LABEL: {{^}}atomic_store_i64_addr64:
894; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}
895define amdgpu_kernel void @atomic_store_i64_addr64(i64 %in, i64* %out, i64 %index) {
896entry:
897  %ptr = getelementptr i64, i64* %out, i64 %index
898  store atomic i64 %in, i64* %ptr seq_cst, align 8
899  ret void
900}
901
902; GCN-LABEL: {{^}}atomic_cmpxchg_i64_offset:
903; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
904define amdgpu_kernel void @atomic_cmpxchg_i64_offset(i64* %out, i64 %in, i64 %old) {
905entry:
906  %gep = getelementptr i64, i64* %out, i64 4
907  %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst
908  ret void
909}
910
911; GCN-LABEL: {{^}}atomic_cmpxchg_i64_soffset:
912; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
913define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(i64* %out, i64 %in, i64 %old) {
914entry:
915  %gep = getelementptr i64, i64* %out, i64 9000
916  %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst
917  ret void
918}
919
920; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_offset:
921; GCN: flat_atomic_cmpswap_x2 v[[[RET:[0-9]+]]{{:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
922; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[RET]]:
923define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(i64* %out, i64* %out2, i64 %in, i64 %old) {
924entry:
925  %gep = getelementptr i64, i64* %out, i64 4
926  %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst
927  %extract0 = extractvalue { i64, i1 } %val, 0
928  store i64 %extract0, i64* %out2
929  ret void
930}
931
932; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64_offset:
933; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
934define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(i64* %out, i64 %in, i64 %index, i64 %old) {
935entry:
936  %ptr = getelementptr i64, i64* %out, i64 %index
937  %gep = getelementptr i64, i64* %ptr, i64 4
938  %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst
939  ret void
940}
941
942; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64_offset:
943; GCN: flat_atomic_cmpswap_x2 v[[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
944; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v[[[RET]]:
945define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index, i64 %old) {
946entry:
947  %ptr = getelementptr i64, i64* %out, i64 %index
948  %gep = getelementptr i64, i64* %ptr, i64 4
949  %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst
950  %extract0 = extractvalue { i64, i1 } %val, 0
951  store i64 %extract0, i64* %out2
952  ret void
953}
954
955; GCN-LABEL: {{^}}atomic_cmpxchg_i64:
956; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
957define amdgpu_kernel void @atomic_cmpxchg_i64(i64* %out, i64 %in, i64 %old) {
958entry:
959  %val = cmpxchg volatile i64* %out, i64 %old, i64 %in seq_cst seq_cst
960  ret void
961}
962
963; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret:
964; GCN: flat_atomic_cmpswap_x2 v[[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
965; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[[[RET]]:
966define amdgpu_kernel void @atomic_cmpxchg_i64_ret(i64* %out, i64* %out2, i64 %in, i64 %old) {
967entry:
968  %val = cmpxchg volatile i64* %out, i64 %old, i64 %in seq_cst seq_cst
969  %extract0 = extractvalue { i64, i1 } %val, 0
970  store i64 %extract0, i64* %out2
971  ret void
972}
973
974; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64:
975; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
976define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(i64* %out, i64 %in, i64 %index, i64 %old) {
977entry:
978  %ptr = getelementptr i64, i64* %out, i64 %index
979  %val = cmpxchg volatile i64* %ptr, i64 %old, i64 %in seq_cst seq_cst
980  ret void
981}
982
983; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64:
984; GCN: flat_atomic_cmpswap_x2 v[[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
985; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[[[RET]]:
986define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index, i64 %old) {
987entry:
988  %ptr = getelementptr i64, i64* %out, i64 %index
989  %val = cmpxchg volatile i64* %ptr, i64 %old, i64 %in seq_cst seq_cst
990  %extract0 = extractvalue { i64, i1 } %val, 0
991  store i64 %extract0, i64* %out2
992  ret void
993}
994
995; GCN-LABEL: {{^}}atomic_load_f64_offset:
996; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
997; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
998define amdgpu_kernel void @atomic_load_f64_offset(double* %in, double* %out) {
999entry:
1000  %gep = getelementptr double, double* %in, i64 4
1001  %val = load atomic double, double* %gep  seq_cst, align 8
1002  store double %val, double* %out
1003  ret void
1004}
1005
1006; GCN-LABEL: {{^}}atomic_load_f64:
1007; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc
1008; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
1009define amdgpu_kernel void @atomic_load_f64(double* %in, double* %out) {
1010entry:
1011  %val = load atomic double, double* %in seq_cst, align 8
1012  store double %val, double* %out
1013  ret void
1014}
1015
1016; GCN-LABEL: {{^}}atomic_load_f64_addr64_offset:
1017; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1018; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
1019define amdgpu_kernel void @atomic_load_f64_addr64_offset(double* %in, double* %out, i64 %index) {
1020entry:
1021  %ptr = getelementptr double, double* %in, i64 %index
1022  %gep = getelementptr double, double* %ptr, i64 4
1023  %val = load atomic double, double* %gep seq_cst, align 8
1024  store double %val, double* %out
1025  ret void
1026}
1027
1028; GCN-LABEL: {{^}}atomic_load_f64_addr64:
1029; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1030; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
1031define amdgpu_kernel void @atomic_load_f64_addr64(double* %in, double* %out, i64 %index) {
1032entry:
1033  %ptr = getelementptr double, double* %in, i64 %index
1034  %val = load atomic double, double* %ptr seq_cst, align 8
1035  store double %val, double* %out
1036  ret void
1037}
1038
1039; GCN-LABEL: {{^}}atomic_store_f64_offset:
1040; GCN: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
1041define amdgpu_kernel void @atomic_store_f64_offset(double %in, double* %out) {
1042entry:
1043  %gep = getelementptr double, double* %out, i64 4
1044  store atomic double %in, double* %gep  seq_cst, align 8
1045  ret void
1046}
1047
1048; GCN-LABEL: {{^}}atomic_store_f64:
1049; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}]
1050define amdgpu_kernel void @atomic_store_f64(double %in, double* %out) {
1051entry:
1052  store atomic double %in, double* %out seq_cst, align 8
1053  ret void
1054}
1055
1056; GCN-LABEL: {{^}}atomic_store_f64_addr64_offset:
1057; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}
1058define amdgpu_kernel void @atomic_store_f64_addr64_offset(double %in, double* %out, i64 %index) {
1059entry:
1060  %ptr = getelementptr double, double* %out, i64 %index
1061  %gep = getelementptr double, double* %ptr, i64 4
1062  store atomic double %in, double* %gep seq_cst, align 8
1063  ret void
1064}
1065
1066; GCN-LABEL: {{^}}atomic_store_f64_addr64:
1067; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}
1068define amdgpu_kernel void @atomic_store_f64_addr64(double %in, double* %out, i64 %index) {
1069entry:
1070  %ptr = getelementptr double, double* %out, i64 %index
1071  store atomic double %in, double* %ptr seq_cst, align 8
1072  ret void
1073}
1074