1; RUN: llc -march=amdgcn -mcpu=bonaire -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
2; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s
3
4; GCN-LABEL: {{^}}atomic_add_i64_offset:
5; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}}
6define amdgpu_kernel void @atomic_add_i64_offset(i64* %out, i64 %in) {
7entry:
8  %gep = getelementptr i64, i64* %out, i64 4
9  %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst
10  ret void
11}
12
13; GCN-LABEL: {{^}}atomic_add_i64_ret_offset:
14; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
15; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
16define amdgpu_kernel void @atomic_add_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
17entry:
18  %gep = getelementptr i64, i64* %out, i64 4
19  %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst
20  store i64 %tmp0, i64* %out2
21  ret void
22}
23
24; GCN-LABEL: {{^}}atomic_add_i64_addr64_offset:
25; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}}{{$}}
26define amdgpu_kernel void @atomic_add_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
27entry:
28  %ptr = getelementptr i64, i64* %out, i64 %index
29  %gep = getelementptr i64, i64* %ptr, i64 4
30  %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst
31  ret void
32}
33
34; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64_offset:
35; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
36; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
37define amdgpu_kernel void @atomic_add_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
38entry:
39  %ptr = getelementptr i64, i64* %out, i64 %index
40  %gep = getelementptr i64, i64* %ptr, i64 4
41  %tmp0 = atomicrmw volatile add i64* %gep, i64 %in seq_cst
42  store i64 %tmp0, i64* %out2
43  ret void
44}
45
46; GCN-LABEL: {{^}}atomic_add_i64:
47; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
48define amdgpu_kernel void @atomic_add_i64(i64* %out, i64 %in) {
49entry:
50  %tmp0 = atomicrmw volatile add i64* %out, i64 %in seq_cst
51  ret void
52}
53
54; GCN-LABEL: {{^}}atomic_add_i64_ret:
55; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
56; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
57define amdgpu_kernel void @atomic_add_i64_ret(i64* %out, i64* %out2, i64 %in) {
58entry:
59  %tmp0 = atomicrmw volatile add i64* %out, i64 %in seq_cst
60  store i64 %tmp0, i64* %out2
61  ret void
62}
63
64; GCN-LABEL: {{^}}atomic_add_i64_addr64:
65; GCN: flat_atomic_add_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
66define amdgpu_kernel void @atomic_add_i64_addr64(i64* %out, i64 %in, i64 %index) {
67entry:
68  %ptr = getelementptr i64, i64* %out, i64 %index
69  %tmp0 = atomicrmw volatile add i64* %ptr, i64 %in seq_cst
70  ret void
71}
72
73; GCN-LABEL: {{^}}atomic_add_i64_ret_addr64:
74; GCN: flat_atomic_add_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
75; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
76define amdgpu_kernel void @atomic_add_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
77entry:
78  %ptr = getelementptr i64, i64* %out, i64 %index
79  %tmp0 = atomicrmw volatile add i64* %ptr, i64 %in seq_cst
80  store i64 %tmp0, i64* %out2
81  ret void
82}
83
84; GCN-LABEL: {{^}}atomic_and_i64_offset:
85; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
86define amdgpu_kernel void @atomic_and_i64_offset(i64* %out, i64 %in) {
87entry:
88  %gep = getelementptr i64, i64* %out, i64 4
89  %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst
90  ret void
91}
92
93; GCN-LABEL: {{^}}atomic_and_i64_ret_offset:
94; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
95; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
96define amdgpu_kernel void @atomic_and_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
97entry:
98  %gep = getelementptr i64, i64* %out, i64 4
99  %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst
100  store i64 %tmp0, i64* %out2
101  ret void
102}
103
104; GCN-LABEL: {{^}}atomic_and_i64_addr64_offset:
105; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
106define amdgpu_kernel void @atomic_and_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
107entry:
108  %ptr = getelementptr i64, i64* %out, i64 %index
109  %gep = getelementptr i64, i64* %ptr, i64 4
110  %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst
111  ret void
112}
113
114; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64_offset:
115; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
116; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
117define amdgpu_kernel void @atomic_and_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
118entry:
119  %ptr = getelementptr i64, i64* %out, i64 %index
120  %gep = getelementptr i64, i64* %ptr, i64 4
121  %tmp0 = atomicrmw volatile and i64* %gep, i64 %in seq_cst
122  store i64 %tmp0, i64* %out2
123  ret void
124}
125
126; GCN-LABEL: {{^}}atomic_and_i64:
127; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
128define amdgpu_kernel void @atomic_and_i64(i64* %out, i64 %in) {
129entry:
130  %tmp0 = atomicrmw volatile and i64* %out, i64 %in seq_cst
131  ret void
132}
133
134; GCN-LABEL: {{^}}atomic_and_i64_ret:
135; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
136; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
137define amdgpu_kernel void @atomic_and_i64_ret(i64* %out, i64* %out2, i64 %in) {
138entry:
139  %tmp0 = atomicrmw volatile and i64* %out, i64 %in seq_cst
140  store i64 %tmp0, i64* %out2
141  ret void
142}
143
144; GCN-LABEL: {{^}}atomic_and_i64_addr64:
145; GCN: flat_atomic_and_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
146define amdgpu_kernel void @atomic_and_i64_addr64(i64* %out, i64 %in, i64 %index) {
147entry:
148  %ptr = getelementptr i64, i64* %out, i64 %index
149  %tmp0 = atomicrmw volatile and i64* %ptr, i64 %in seq_cst
150  ret void
151}
152
153; GCN-LABEL: {{^}}atomic_and_i64_ret_addr64:
154; GCN: flat_atomic_and_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
155; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
156define amdgpu_kernel void @atomic_and_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
157entry:
158  %ptr = getelementptr i64, i64* %out, i64 %index
159  %tmp0 = atomicrmw volatile and i64* %ptr, i64 %in seq_cst
160  store i64 %tmp0, i64* %out2
161  ret void
162}
163
164; GCN-LABEL: {{^}}atomic_sub_i64_offset:
165; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
166define amdgpu_kernel void @atomic_sub_i64_offset(i64* %out, i64 %in) {
167entry:
168  %gep = getelementptr i64, i64* %out, i64 4
169  %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst
170  ret void
171}
172
173; GCN-LABEL: {{^}}atomic_sub_i64_ret_offset:
174; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
175; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
176define amdgpu_kernel void @atomic_sub_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
177entry:
178  %gep = getelementptr i64, i64* %out, i64 4
179  %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst
180  store i64 %tmp0, i64* %out2
181  ret void
182}
183
184; GCN-LABEL: {{^}}atomic_sub_i64_addr64_offset:
185; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
186define amdgpu_kernel void @atomic_sub_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
187entry:
188  %ptr = getelementptr i64, i64* %out, i64 %index
189  %gep = getelementptr i64, i64* %ptr, i64 4
190  %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst
191  ret void
192}
193
194; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64_offset:
195; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
196; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
197define amdgpu_kernel void @atomic_sub_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
198entry:
199  %ptr = getelementptr i64, i64* %out, i64 %index
200  %gep = getelementptr i64, i64* %ptr, i64 4
201  %tmp0 = atomicrmw volatile sub i64* %gep, i64 %in seq_cst
202  store i64 %tmp0, i64* %out2
203  ret void
204}
205
206; GCN-LABEL: {{^}}atomic_sub_i64:
207; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
208define amdgpu_kernel void @atomic_sub_i64(i64* %out, i64 %in) {
209entry:
210  %tmp0 = atomicrmw volatile sub i64* %out, i64 %in seq_cst
211  ret void
212}
213
214; GCN-LABEL: {{^}}atomic_sub_i64_ret:
215; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
216; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
217define amdgpu_kernel void @atomic_sub_i64_ret(i64* %out, i64* %out2, i64 %in) {
218entry:
219  %tmp0 = atomicrmw volatile sub i64* %out, i64 %in seq_cst
220  store i64 %tmp0, i64* %out2
221  ret void
222}
223
224; GCN-LABEL: {{^}}atomic_sub_i64_addr64:
225; GCN: flat_atomic_sub_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
226define amdgpu_kernel void @atomic_sub_i64_addr64(i64* %out, i64 %in, i64 %index) {
227entry:
228  %ptr = getelementptr i64, i64* %out, i64 %index
229  %tmp0 = atomicrmw volatile sub i64* %ptr, i64 %in seq_cst
230  ret void
231}
232
233; GCN-LABEL: {{^}}atomic_sub_i64_ret_addr64:
234; GCN: flat_atomic_sub_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
235; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
236define amdgpu_kernel void @atomic_sub_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
237entry:
238  %ptr = getelementptr i64, i64* %out, i64 %index
239  %tmp0 = atomicrmw volatile sub i64* %ptr, i64 %in seq_cst
240  store i64 %tmp0, i64* %out2
241  ret void
242}
243
244; GCN-LABEL: {{^}}atomic_max_i64_offset:
245; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
246define amdgpu_kernel void @atomic_max_i64_offset(i64* %out, i64 %in) {
247entry:
248  %gep = getelementptr i64, i64* %out, i64 4
249  %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst
250  ret void
251}
252
253; GCN-LABEL: {{^}}atomic_max_i64_ret_offset:
254; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
255; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
256define amdgpu_kernel void @atomic_max_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
257entry:
258  %gep = getelementptr i64, i64* %out, i64 4
259  %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst
260  store i64 %tmp0, i64* %out2
261  ret void
262}
263
264; GCN-LABEL: {{^}}atomic_max_i64_addr64_offset:
265; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
266define amdgpu_kernel void @atomic_max_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
267entry:
268  %ptr = getelementptr i64, i64* %out, i64 %index
269  %gep = getelementptr i64, i64* %ptr, i64 4
270  %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst
271  ret void
272}
273
274; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64_offset:
275; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
276; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
277define amdgpu_kernel void @atomic_max_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
278entry:
279  %ptr = getelementptr i64, i64* %out, i64 %index
280  %gep = getelementptr i64, i64* %ptr, i64 4
281  %tmp0 = atomicrmw volatile max i64* %gep, i64 %in seq_cst
282  store i64 %tmp0, i64* %out2
283  ret void
284}
285
286; GCN-LABEL: {{^}}atomic_max_i64:
287; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
288define amdgpu_kernel void @atomic_max_i64(i64* %out, i64 %in) {
289entry:
290  %tmp0 = atomicrmw volatile max i64* %out, i64 %in seq_cst
291  ret void
292}
293
294; GCN-LABEL: {{^}}atomic_max_i64_ret:
295; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
296; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
297define amdgpu_kernel void @atomic_max_i64_ret(i64* %out, i64* %out2, i64 %in) {
298entry:
299  %tmp0 = atomicrmw volatile max i64* %out, i64 %in seq_cst
300  store i64 %tmp0, i64* %out2
301  ret void
302}
303
304; GCN-LABEL: {{^}}atomic_max_i64_addr64:
305; GCN: flat_atomic_smax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
306define amdgpu_kernel void @atomic_max_i64_addr64(i64* %out, i64 %in, i64 %index) {
307entry:
308  %ptr = getelementptr i64, i64* %out, i64 %index
309  %tmp0 = atomicrmw volatile max i64* %ptr, i64 %in seq_cst
310  ret void
311}
312
313; GCN-LABEL: {{^}}atomic_max_i64_ret_addr64:
314; GCN: flat_atomic_smax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
315; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
316define amdgpu_kernel void @atomic_max_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
317entry:
318  %ptr = getelementptr i64, i64* %out, i64 %index
319  %tmp0 = atomicrmw volatile max i64* %ptr, i64 %in seq_cst
320  store i64 %tmp0, i64* %out2
321  ret void
322}
323
324; GCN-LABEL: {{^}}atomic_umax_i64_offset:
325; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
326define amdgpu_kernel void @atomic_umax_i64_offset(i64* %out, i64 %in) {
327entry:
328  %gep = getelementptr i64, i64* %out, i64 4
329  %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst
330  ret void
331}
332
333; GCN-LABEL: {{^}}atomic_umax_i64_ret_offset:
334; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
335; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
336define amdgpu_kernel void @atomic_umax_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
337entry:
338  %gep = getelementptr i64, i64* %out, i64 4
339  %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst
340  store i64 %tmp0, i64* %out2
341  ret void
342}
343
344; GCN-LABEL: {{^}}atomic_umax_i64_addr64_offset:
345; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
346define amdgpu_kernel void @atomic_umax_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
347entry:
348  %ptr = getelementptr i64, i64* %out, i64 %index
349  %gep = getelementptr i64, i64* %ptr, i64 4
350  %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst
351  ret void
352}
353
354; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64_offset:
355; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
356; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
357define amdgpu_kernel void @atomic_umax_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
358entry:
359  %ptr = getelementptr i64, i64* %out, i64 %index
360  %gep = getelementptr i64, i64* %ptr, i64 4
361  %tmp0 = atomicrmw volatile umax i64* %gep, i64 %in seq_cst
362  store i64 %tmp0, i64* %out2
363  ret void
364}
365
366; GCN-LABEL: {{^}}atomic_umax_i64:
367; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
368define amdgpu_kernel void @atomic_umax_i64(i64* %out, i64 %in) {
369entry:
370  %tmp0 = atomicrmw volatile umax i64* %out, i64 %in seq_cst
371  ret void
372}
373
374; GCN-LABEL: {{^}}atomic_umax_i64_ret:
375; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
376; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
377define amdgpu_kernel void @atomic_umax_i64_ret(i64* %out, i64* %out2, i64 %in) {
378entry:
379  %tmp0 = atomicrmw volatile umax i64* %out, i64 %in seq_cst
380  store i64 %tmp0, i64* %out2
381  ret void
382}
383
384; GCN-LABEL: {{^}}atomic_umax_i64_addr64:
385; GCN: flat_atomic_umax_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
386define amdgpu_kernel void @atomic_umax_i64_addr64(i64* %out, i64 %in, i64 %index) {
387entry:
388  %ptr = getelementptr i64, i64* %out, i64 %index
389  %tmp0 = atomicrmw volatile umax i64* %ptr, i64 %in seq_cst
390  ret void
391}
392
393; GCN-LABEL: {{^}}atomic_umax_i64_ret_addr64:
394; GCN: flat_atomic_umax_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
395; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
396define amdgpu_kernel void @atomic_umax_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
397entry:
398  %ptr = getelementptr i64, i64* %out, i64 %index
399  %tmp0 = atomicrmw volatile umax i64* %ptr, i64 %in seq_cst
400  store i64 %tmp0, i64* %out2
401  ret void
402}
403
404; GCN-LABEL: {{^}}atomic_min_i64_offset:
405; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
406define amdgpu_kernel void @atomic_min_i64_offset(i64* %out, i64 %in) {
407entry:
408  %gep = getelementptr i64, i64* %out, i64 4
409  %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst
410  ret void
411}
412
413; GCN-LABEL: {{^}}atomic_min_i64_ret_offset:
414; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
415; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
416define amdgpu_kernel void @atomic_min_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
417entry:
418  %gep = getelementptr i64, i64* %out, i64 4
419  %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst
420  store i64 %tmp0, i64* %out2
421  ret void
422}
423
424; GCN-LABEL: {{^}}atomic_min_i64_addr64_offset:
425; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
426define amdgpu_kernel void @atomic_min_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
427entry:
428  %ptr = getelementptr i64, i64* %out, i64 %index
429  %gep = getelementptr i64, i64* %ptr, i64 4
430  %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst
431  ret void
432}
433
434; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64_offset:
435; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
436; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
437define amdgpu_kernel void @atomic_min_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
438entry:
439  %ptr = getelementptr i64, i64* %out, i64 %index
440  %gep = getelementptr i64, i64* %ptr, i64 4
441  %tmp0 = atomicrmw volatile min i64* %gep, i64 %in seq_cst
442  store i64 %tmp0, i64* %out2
443  ret void
444}
445
446; GCN-LABEL: {{^}}atomic_min_i64:
447; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
448define amdgpu_kernel void @atomic_min_i64(i64* %out, i64 %in) {
449entry:
450  %tmp0 = atomicrmw volatile min i64* %out, i64 %in seq_cst
451  ret void
452}
453
454; GCN-LABEL: {{^}}atomic_min_i64_ret:
455; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
456; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
457define amdgpu_kernel void @atomic_min_i64_ret(i64* %out, i64* %out2, i64 %in) {
458entry:
459  %tmp0 = atomicrmw volatile min i64* %out, i64 %in seq_cst
460  store i64 %tmp0, i64* %out2
461  ret void
462}
463
464; GCN-LABEL: {{^}}atomic_min_i64_addr64:
465; GCN: flat_atomic_smin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
466define amdgpu_kernel void @atomic_min_i64_addr64(i64* %out, i64 %in, i64 %index) {
467entry:
468  %ptr = getelementptr i64, i64* %out, i64 %index
469  %tmp0 = atomicrmw volatile min i64* %ptr, i64 %in seq_cst
470  ret void
471}
472
473; GCN-LABEL: {{^}}atomic_min_i64_ret_addr64:
474; GCN: flat_atomic_smin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
475; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
476define amdgpu_kernel void @atomic_min_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
477entry:
478  %ptr = getelementptr i64, i64* %out, i64 %index
479  %tmp0 = atomicrmw volatile min i64* %ptr, i64 %in seq_cst
480  store i64 %tmp0, i64* %out2
481  ret void
482}
483
484; GCN-LABEL: {{^}}atomic_umin_i64_offset:
485; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
486define amdgpu_kernel void @atomic_umin_i64_offset(i64* %out, i64 %in) {
487entry:
488  %gep = getelementptr i64, i64* %out, i64 4
489  %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst
490  ret void
491}
492
493; GCN-LABEL: {{^}}atomic_umin_i64_ret_offset:
494; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
495; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
496define amdgpu_kernel void @atomic_umin_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
497entry:
498  %gep = getelementptr i64, i64* %out, i64 4
499  %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst
500  store i64 %tmp0, i64* %out2
501  ret void
502}
503
504; GCN-LABEL: {{^}}atomic_umin_i64_addr64_offset:
505; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
506define amdgpu_kernel void @atomic_umin_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
507entry:
508  %ptr = getelementptr i64, i64* %out, i64 %index
509  %gep = getelementptr i64, i64* %ptr, i64 4
510  %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst
511  ret void
512}
513
514; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64_offset:
515; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
516; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
517define amdgpu_kernel void @atomic_umin_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
518entry:
519  %ptr = getelementptr i64, i64* %out, i64 %index
520  %gep = getelementptr i64, i64* %ptr, i64 4
521  %tmp0 = atomicrmw volatile umin i64* %gep, i64 %in seq_cst
522  store i64 %tmp0, i64* %out2
523  ret void
524}
525
526; GCN-LABEL: {{^}}atomic_umin_i64:
527; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
528define amdgpu_kernel void @atomic_umin_i64(i64* %out, i64 %in) {
529entry:
530  %tmp0 = atomicrmw volatile umin i64* %out, i64 %in seq_cst
531  ret void
532}
533
534; GCN-LABEL: {{^}}atomic_umin_i64_ret:
535; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
536; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
537define amdgpu_kernel void @atomic_umin_i64_ret(i64* %out, i64* %out2, i64 %in) {
538entry:
539  %tmp0 = atomicrmw volatile umin i64* %out, i64 %in seq_cst
540  store i64 %tmp0, i64* %out2
541  ret void
542}
543
544; GCN-LABEL: {{^}}atomic_umin_i64_addr64:
545; GCN: flat_atomic_umin_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
546define amdgpu_kernel void @atomic_umin_i64_addr64(i64* %out, i64 %in, i64 %index) {
547entry:
548  %ptr = getelementptr i64, i64* %out, i64 %index
549  %tmp0 = atomicrmw volatile umin i64* %ptr, i64 %in seq_cst
550  ret void
551}
552
553; GCN-LABEL: {{^}}atomic_umin_i64_ret_addr64:
554; GCN: flat_atomic_umin_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
555; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
556define amdgpu_kernel void @atomic_umin_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
557entry:
558  %ptr = getelementptr i64, i64* %out, i64 %index
559  %tmp0 = atomicrmw volatile umin i64* %ptr, i64 %in seq_cst
560  store i64 %tmp0, i64* %out2
561  ret void
562}
563
564; GCN-LABEL: {{^}}atomic_or_i64_offset:
565; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
566define amdgpu_kernel void @atomic_or_i64_offset(i64* %out, i64 %in) {
567entry:
568  %gep = getelementptr i64, i64* %out, i64 4
569  %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst
570  ret void
571}
572
573; GCN-LABEL: {{^}}atomic_or_i64_ret_offset:
574; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
575; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
576define amdgpu_kernel void @atomic_or_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
577entry:
578  %gep = getelementptr i64, i64* %out, i64 4
579  %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst
580  store i64 %tmp0, i64* %out2
581  ret void
582}
583
584; GCN-LABEL: {{^}}atomic_or_i64_addr64_offset:
585; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
586define amdgpu_kernel void @atomic_or_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
587entry:
588  %ptr = getelementptr i64, i64* %out, i64 %index
589  %gep = getelementptr i64, i64* %ptr, i64 4
590  %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst
591  ret void
592}
593
594; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64_offset:
595; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
596; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
597define amdgpu_kernel void @atomic_or_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
598entry:
599  %ptr = getelementptr i64, i64* %out, i64 %index
600  %gep = getelementptr i64, i64* %ptr, i64 4
601  %tmp0 = atomicrmw volatile or i64* %gep, i64 %in seq_cst
602  store i64 %tmp0, i64* %out2
603  ret void
604}
605
606; GCN-LABEL: {{^}}atomic_or_i64:
607; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
608define amdgpu_kernel void @atomic_or_i64(i64* %out, i64 %in) {
609entry:
610  %tmp0 = atomicrmw volatile or i64* %out, i64 %in seq_cst
611  ret void
612}
613
614; GCN-LABEL: {{^}}atomic_or_i64_ret:
615; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
616; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
617define amdgpu_kernel void @atomic_or_i64_ret(i64* %out, i64* %out2, i64 %in) {
618entry:
619  %tmp0 = atomicrmw volatile or i64* %out, i64 %in seq_cst
620  store i64 %tmp0, i64* %out2
621  ret void
622}
623
624; GCN-LABEL: {{^}}atomic_or_i64_addr64:
625; GCN: flat_atomic_or_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
626define amdgpu_kernel void @atomic_or_i64_addr64(i64* %out, i64 %in, i64 %index) {
627entry:
628  %ptr = getelementptr i64, i64* %out, i64 %index
629  %tmp0 = atomicrmw volatile or i64* %ptr, i64 %in seq_cst
630  ret void
631}
632
633; GCN-LABEL: {{^}}atomic_or_i64_ret_addr64:
634; GCN: flat_atomic_or_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
635; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
636define amdgpu_kernel void @atomic_or_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
637entry:
638  %ptr = getelementptr i64, i64* %out, i64 %index
639  %tmp0 = atomicrmw volatile or i64* %ptr, i64 %in seq_cst
640  store i64 %tmp0, i64* %out2
641  ret void
642}
643
644; GCN-LABEL: {{^}}atomic_xchg_i64_offset:
645; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
646define amdgpu_kernel void @atomic_xchg_i64_offset(i64* %out, i64 %in) {
647entry:
648  %gep = getelementptr i64, i64* %out, i64 4
649  %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst
650  ret void
651}
652
653; GCN-LABEL: {{^}}atomic_xchg_f64_offset:
654; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
655define amdgpu_kernel void @atomic_xchg_f64_offset(double* %out, double %in) {
656entry:
657  %gep = getelementptr double, double* %out, i64 4
658  %tmp0 = atomicrmw volatile xchg double* %gep, double %in seq_cst
659  ret void
660}
661
662; GCN-LABEL: {{^}}atomic_xchg_i64_ret_offset:
663; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
664; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
665define amdgpu_kernel void @atomic_xchg_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
666entry:
667  %gep = getelementptr i64, i64* %out, i64 4
668  %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst
669  store i64 %tmp0, i64* %out2
670  ret void
671}
672
673; GCN-LABEL: {{^}}atomic_xchg_i64_addr64_offset:
674; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
675define amdgpu_kernel void @atomic_xchg_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
676entry:
677  %ptr = getelementptr i64, i64* %out, i64 %index
678  %gep = getelementptr i64, i64* %ptr, i64 4
679  %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst
680  ret void
681}
682
683; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64_offset:
684; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
685; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
686define amdgpu_kernel void @atomic_xchg_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
687entry:
688  %ptr = getelementptr i64, i64* %out, i64 %index
689  %gep = getelementptr i64, i64* %ptr, i64 4
690  %tmp0 = atomicrmw volatile xchg i64* %gep, i64 %in seq_cst
691  store i64 %tmp0, i64* %out2
692  ret void
693}
694
695; GCN-LABEL: {{^}}atomic_xchg_i64:
696; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
697define amdgpu_kernel void @atomic_xchg_i64(i64* %out, i64 %in) {
698entry:
699  %tmp0 = atomicrmw volatile xchg i64* %out, i64 %in seq_cst
700  ret void
701}
702
703; GCN-LABEL: {{^}}atomic_xchg_i64_ret:
704; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
705; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
706define amdgpu_kernel void @atomic_xchg_i64_ret(i64* %out, i64* %out2, i64 %in) {
707entry:
708  %tmp0 = atomicrmw volatile xchg i64* %out, i64 %in seq_cst
709  store i64 %tmp0, i64* %out2
710  ret void
711}
712
713; GCN-LABEL: {{^}}atomic_xchg_i64_addr64:
714; GCN: flat_atomic_swap_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
715define amdgpu_kernel void @atomic_xchg_i64_addr64(i64* %out, i64 %in, i64 %index) {
716entry:
717  %ptr = getelementptr i64, i64* %out, i64 %index
718  %tmp0 = atomicrmw volatile xchg i64* %ptr, i64 %in seq_cst
719  ret void
720}
721
722; GCN-LABEL: {{^}}atomic_xchg_i64_ret_addr64:
723; GCN: flat_atomic_swap_x2 [[RET:v\[[0-9]+:[0-9]+\]]],  v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
724; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
725define amdgpu_kernel void @atomic_xchg_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
726entry:
727  %ptr = getelementptr i64, i64* %out, i64 %index
728  %tmp0 = atomicrmw volatile xchg i64* %ptr, i64 %in seq_cst
729  store i64 %tmp0, i64* %out2
730  ret void
731}
732
733; GCN-LABEL: {{^}}atomic_xor_i64_offset:
734; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
735define amdgpu_kernel void @atomic_xor_i64_offset(i64* %out, i64 %in) {
736entry:
737  %gep = getelementptr i64, i64* %out, i64 4
738  %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst
739  ret void
740}
741
742; GCN-LABEL: {{^}}atomic_xor_i64_ret_offset:
743; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
744; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
745define amdgpu_kernel void @atomic_xor_i64_ret_offset(i64* %out, i64* %out2, i64 %in) {
746entry:
747  %gep = getelementptr i64, i64* %out, i64 4
748  %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst
749  store i64 %tmp0, i64* %out2
750  ret void
751}
752
753; GCN-LABEL: {{^}}atomic_xor_i64_addr64_offset:
754; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
755define amdgpu_kernel void @atomic_xor_i64_addr64_offset(i64* %out, i64 %in, i64 %index) {
756entry:
757  %ptr = getelementptr i64, i64* %out, i64 %index
758  %gep = getelementptr i64, i64* %ptr, i64 4
759  %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst
760  ret void
761}
762
763; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64_offset:
764; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
765; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
766define amdgpu_kernel void @atomic_xor_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index) {
767entry:
768  %ptr = getelementptr i64, i64* %out, i64 %index
769  %gep = getelementptr i64, i64* %ptr, i64 4
770  %tmp0 = atomicrmw volatile xor i64* %gep, i64 %in seq_cst
771  store i64 %tmp0, i64* %out2
772  ret void
773}
774
775; GCN-LABEL: {{^}}atomic_xor_i64:
776; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
777define amdgpu_kernel void @atomic_xor_i64(i64* %out, i64 %in) {
778entry:
779  %tmp0 = atomicrmw volatile xor i64* %out, i64 %in seq_cst
780  ret void
781}
782
783; GCN-LABEL: {{^}}atomic_xor_i64_ret:
784; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
785; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
786define amdgpu_kernel void @atomic_xor_i64_ret(i64* %out, i64* %out2, i64 %in) {
787entry:
788  %tmp0 = atomicrmw volatile xor i64* %out, i64 %in seq_cst
789  store i64 %tmp0, i64* %out2
790  ret void
791}
792
793; GCN-LABEL: {{^}}atomic_xor_i64_addr64:
794; GCN: flat_atomic_xor_x2 v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]$}}
795define amdgpu_kernel void @atomic_xor_i64_addr64(i64* %out, i64 %in, i64 %index) {
796entry:
797  %ptr = getelementptr i64, i64* %out, i64 %index
798  %tmp0 = atomicrmw volatile xor i64* %ptr, i64 %in seq_cst
799  ret void
800}
801
802; GCN-LABEL: {{^}}atomic_xor_i64_ret_addr64:
803; GCN: flat_atomic_xor_x2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}], v{{\[[0-9]+:[0-9]+\]}} glc{{$}}
804; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
805define amdgpu_kernel void @atomic_xor_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index) {
806entry:
807  %ptr = getelementptr i64, i64* %out, i64 %index
808  %tmp0 = atomicrmw volatile xor i64* %ptr, i64 %in seq_cst
809  store i64 %tmp0, i64* %out2
810  ret void
811}
812
813; GCN-LABEL: {{^}}atomic_load_i64_offset:
814; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
815; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
816define amdgpu_kernel void @atomic_load_i64_offset(i64* %in, i64* %out) {
817entry:
818  %gep = getelementptr i64, i64* %in, i64 4
819  %val = load atomic i64, i64* %gep  seq_cst, align 8
820  store i64 %val, i64* %out
821  ret void
822}
823
824; GCN-LABEL: {{^}}atomic_load_i64:
825; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc
826; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
827define amdgpu_kernel void @atomic_load_i64(i64* %in, i64* %out) {
828entry:
829  %val = load atomic i64, i64* %in seq_cst, align 8
830  store i64 %val, i64* %out
831  ret void
832}
833
834; GCN-LABEL: {{^}}atomic_load_i64_addr64_offset:
835; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
836; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
837define amdgpu_kernel void @atomic_load_i64_addr64_offset(i64* %in, i64* %out, i64 %index) {
838entry:
839  %ptr = getelementptr i64, i64* %in, i64 %index
840  %gep = getelementptr i64, i64* %ptr, i64 4
841  %val = load atomic i64, i64* %gep seq_cst, align 8
842  store i64 %val, i64* %out
843  ret void
844}
845
846; GCN-LABEL: {{^}}atomic_load_i64_addr64:
847; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
848; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
849define amdgpu_kernel void @atomic_load_i64_addr64(i64* %in, i64* %out, i64 %index) {
850entry:
851  %ptr = getelementptr i64, i64* %in, i64 %index
852  %val = load atomic i64, i64* %ptr seq_cst, align 8
853  store i64 %val, i64* %out
854  ret void
855}
856
857; GCN-LABEL: {{^}}atomic_store_i64_offset:
858; GCN: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
859define amdgpu_kernel void @atomic_store_i64_offset(i64 %in, i64* %out) {
860entry:
861  %gep = getelementptr i64, i64* %out, i64 4
862  store atomic i64 %in, i64* %gep  seq_cst, align 8
863  ret void
864}
865
866; GCN-LABEL: {{^}}atomic_store_i64:
867; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}]
868define amdgpu_kernel void @atomic_store_i64(i64 %in, i64* %out) {
869entry:
870  store atomic i64 %in, i64* %out seq_cst, align 8
871  ret void
872}
873
874; GCN-LABEL: {{^}}atomic_store_i64_addr64_offset:
875; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}
876define amdgpu_kernel void @atomic_store_i64_addr64_offset(i64 %in, i64* %out, i64 %index) {
877entry:
878  %ptr = getelementptr i64, i64* %out, i64 %index
879  %gep = getelementptr i64, i64* %ptr, i64 4
880  store atomic i64 %in, i64* %gep seq_cst, align 8
881  ret void
882}
883
884; GCN-LABEL: {{^}}atomic_store_i64_addr64:
885; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}
886define amdgpu_kernel void @atomic_store_i64_addr64(i64 %in, i64* %out, i64 %index) {
887entry:
888  %ptr = getelementptr i64, i64* %out, i64 %index
889  store atomic i64 %in, i64* %ptr seq_cst, align 8
890  ret void
891}
892
893; GCN-LABEL: {{^}}atomic_cmpxchg_i64_offset:
894; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
895define amdgpu_kernel void @atomic_cmpxchg_i64_offset(i64* %out, i64 %in, i64 %old) {
896entry:
897  %gep = getelementptr i64, i64* %out, i64 4
898  %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst
899  ret void
900}
901
902; GCN-LABEL: {{^}}atomic_cmpxchg_i64_soffset:
903; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+}}:{{[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
904define amdgpu_kernel void @atomic_cmpxchg_i64_soffset(i64* %out, i64 %in, i64 %old) {
905entry:
906  %gep = getelementptr i64, i64* %out, i64 9000
907  %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst
908  ret void
909}
910
911; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_offset:
912; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]{{:[0-9]+}}], v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
913; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]:
914define amdgpu_kernel void @atomic_cmpxchg_i64_ret_offset(i64* %out, i64* %out2, i64 %in, i64 %old) {
915entry:
916  %gep = getelementptr i64, i64* %out, i64 4
917  %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst
918  %extract0 = extractvalue { i64, i1 } %val, 0
919  store i64 %extract0, i64* %out2
920  ret void
921}
922
923; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64_offset:
924; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+\:[0-9]+}}], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
925define amdgpu_kernel void @atomic_cmpxchg_i64_addr64_offset(i64* %out, i64 %in, i64 %index, i64 %old) {
926entry:
927  %ptr = getelementptr i64, i64* %out, i64 %index
928  %gep = getelementptr i64, i64* %ptr, i64 4
929  %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst
930  ret void
931}
932
933; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64_offset:
934; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
935; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]:
936define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64_offset(i64* %out, i64* %out2, i64 %in, i64 %index, i64 %old) {
937entry:
938  %ptr = getelementptr i64, i64* %out, i64 %index
939  %gep = getelementptr i64, i64* %ptr, i64 4
940  %val = cmpxchg volatile i64* %gep, i64 %old, i64 %in seq_cst seq_cst
941  %extract0 = extractvalue { i64, i1 } %val, 0
942  store i64 %extract0, i64* %out2
943  ret void
944}
945
946; GCN-LABEL: {{^}}atomic_cmpxchg_i64:
947; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
948define amdgpu_kernel void @atomic_cmpxchg_i64(i64* %out, i64 %in, i64 %old) {
949entry:
950  %val = cmpxchg volatile i64* %out, i64 %old, i64 %in seq_cst seq_cst
951  ret void
952}
953
954; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret:
955; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
956; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]:
957define amdgpu_kernel void @atomic_cmpxchg_i64_ret(i64* %out, i64* %out2, i64 %in, i64 %old) {
958entry:
959  %val = cmpxchg volatile i64* %out, i64 %old, i64 %in seq_cst seq_cst
960  %extract0 = extractvalue { i64, i1 } %val, 0
961  store i64 %extract0, i64* %out2
962  ret void
963}
964
965; GCN-LABEL: {{^}}atomic_cmpxchg_i64_addr64:
966; GCN: flat_atomic_cmpswap_x2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}]{{$}}
967define amdgpu_kernel void @atomic_cmpxchg_i64_addr64(i64* %out, i64 %in, i64 %index, i64 %old) {
968entry:
969  %ptr = getelementptr i64, i64* %out, i64 %index
970  %val = cmpxchg volatile i64* %ptr, i64 %old, i64 %in seq_cst seq_cst
971  ret void
972}
973
974; GCN-LABEL: {{^}}atomic_cmpxchg_i64_ret_addr64:
975; GCN: flat_atomic_cmpswap_x2 v{{\[}}[[RET:[0-9]+]]:{{[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] glc{{$}}
976; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v{{\[}}[[RET]]:
977define amdgpu_kernel void @atomic_cmpxchg_i64_ret_addr64(i64* %out, i64* %out2, i64 %in, i64 %index, i64 %old) {
978entry:
979  %ptr = getelementptr i64, i64* %out, i64 %index
980  %val = cmpxchg volatile i64* %ptr, i64 %old, i64 %in seq_cst seq_cst
981  %extract0 = extractvalue { i64, i1 } %val, 0
982  store i64 %extract0, i64* %out2
983  ret void
984}
985
986; GCN-LABEL: {{^}}atomic_load_f64_offset:
987; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc{{$}}
988; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
989define amdgpu_kernel void @atomic_load_f64_offset(double* %in, double* %out) {
990entry:
991  %gep = getelementptr double, double* %in, i64 4
992  %val = load atomic double, double* %gep  seq_cst, align 8
993  store double %val, double* %out
994  ret void
995}
996
997; GCN-LABEL: {{^}}atomic_load_f64:
998; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}] glc
999; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
1000define amdgpu_kernel void @atomic_load_f64(double* %in, double* %out) {
1001entry:
1002  %val = load atomic double, double* %in seq_cst, align 8
1003  store double %val, double* %out
1004  ret void
1005}
1006
1007; GCN-LABEL: {{^}}atomic_load_f64_addr64_offset:
1008; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1009; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
1010define amdgpu_kernel void @atomic_load_f64_addr64_offset(double* %in, double* %out, i64 %index) {
1011entry:
1012  %ptr = getelementptr double, double* %in, i64 %index
1013  %gep = getelementptr double, double* %ptr, i64 4
1014  %val = load atomic double, double* %gep seq_cst, align 8
1015  store double %val, double* %out
1016  ret void
1017}
1018
1019; GCN-LABEL: {{^}}atomic_load_f64_addr64:
1020; GCN: flat_load_dwordx2 [[RET:v\[[0-9]+:[0-9]+\]]], v[{{[0-9]+:[0-9]+}}] glc{{$}}
1021; GCN: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, [[RET]]
1022define amdgpu_kernel void @atomic_load_f64_addr64(double* %in, double* %out, i64 %index) {
1023entry:
1024  %ptr = getelementptr double, double* %in, i64 %index
1025  %val = load atomic double, double* %ptr seq_cst, align 8
1026  store double %val, double* %out
1027  ret void
1028}
1029
1030; GCN-LABEL: {{^}}atomic_store_f64_offset:
1031; GCN: flat_store_dwordx2 [[RET:v\[[0-9]+:[0-9]\]]], v[{{[0-9]+}}:{{[0-9]+}}]{{$}}
1032define amdgpu_kernel void @atomic_store_f64_offset(double %in, double* %out) {
1033entry:
1034  %gep = getelementptr double, double* %out, i64 4
1035  store atomic double %in, double* %gep  seq_cst, align 8
1036  ret void
1037}
1038
1039; GCN-LABEL: {{^}}atomic_store_f64:
1040; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]\]}}, v[{{[0-9]+}}:{{[0-9]+}}]
1041define amdgpu_kernel void @atomic_store_f64(double %in, double* %out) {
1042entry:
1043  store atomic double %in, double* %out seq_cst, align 8
1044  ret void
1045}
1046
1047; GCN-LABEL: {{^}}atomic_store_f64_addr64_offset:
1048; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}
1049define amdgpu_kernel void @atomic_store_f64_addr64_offset(double %in, double* %out, i64 %index) {
1050entry:
1051  %ptr = getelementptr double, double* %out, i64 %index
1052  %gep = getelementptr double, double* %ptr, i64 4
1053  store atomic double %in, double* %gep seq_cst, align 8
1054  ret void
1055}
1056
1057; GCN-LABEL: {{^}}atomic_store_f64_addr64:
1058; GCN: flat_store_dwordx2 {{v\[[0-9]+:[0-9]+\]}}, v[{{[0-9]+:[0-9]+}}]{{$}}
1059define amdgpu_kernel void @atomic_store_f64_addr64(double %in, double* %out, i64 %index) {
1060entry:
1061  %ptr = getelementptr double, double* %out, i64 %index
1062  store atomic double %in, double* %ptr seq_cst, align 8
1063  ret void
1064}
1065