1 // RUN: %clang_cc1 -no-opaque-pointers -x hip -std=c++11 -triple amdgcn -fcuda-is-device -emit-llvm %s -o - | FileCheck %s
2 #include "Inputs/cuda.h"
3
4 // CHECK-LABEL: @_Z24atomic32_op_singlethreadPiii
5 // CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
6 // CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as") monotonic monotonic, align 4
7 // CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
8 // CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
9 // CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
10 // CHECK: atomicrmw or i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
11 // CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
12 // CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
13 // CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
14 // CHECK: load atomic i32, i32* {{%[0-9]+}} syncscope("singlethread-one-as") monotonic, align 4
15 // CHECK: store atomic i32 %{{.*}}, i32* %{{.*}} syncscope("singlethread-one-as") monotonic, align 4
atomic32_op_singlethread(int * ptr,int val,int desired)16 __device__ int atomic32_op_singlethread(int *ptr, int val, int desired) {
17 bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
18 flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
19 val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
20 val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
21 val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
22 val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
23 val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
24 val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
25 val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
26 val = __hip_atomic_load(ptr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
27 __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
28 return flag ? val : desired;
29 }
30
31 // CHECK-LABEL: @_Z25atomicu32_op_singlethreadPjjj
32 // CHECK: atomicrmw umin i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
33 // CHECK: atomicrmw umax i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("singlethread-one-as")
atomicu32_op_singlethread(unsigned int * ptr,unsigned int val,unsigned int desired)34 __device__ unsigned int atomicu32_op_singlethread(unsigned int *ptr, unsigned int val, unsigned int desired) {
35 val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
36 val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
37 return val;
38 }
39
40 // CHECK-LABEL: @_Z21atomic32_op_wavefrontPiii
41 // CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
42 // CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as") monotonic monotonic, align 4
43 // CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
44 // CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
45 // CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
46 // CHECK: atomicrmw or i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
47 // CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
48 // CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
49 // CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
50 // CHECK: load atomic i32, i32* {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 4
51 // CHECK: store atomic i32 %{{.*}}, i32* %{{.*}} syncscope("wavefront-one-as") monotonic, align 4
atomic32_op_wavefront(int * ptr,int val,int desired)52 __device__ int atomic32_op_wavefront(int *ptr, int val, int desired) {
53 bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
54 flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
55 val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
56 val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
57 val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
58 val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
59 val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
60 val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
61 val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
62 val = __hip_atomic_load(ptr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
63 __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
64 return flag ? val : desired;
65 }
66
67 // CHECK-LABEL: @_Z22atomicu32_op_wavefrontPjjj
68 // CHECK: atomicrmw umin i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
69 // CHECK: atomicrmw umax i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("wavefront-one-as")
atomicu32_op_wavefront(unsigned int * ptr,unsigned int val,unsigned int desired)70 __device__ unsigned int atomicu32_op_wavefront(unsigned int *ptr, unsigned int val, unsigned int desired) {
71 val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
72 val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
73 return val;
74 }
75
76 // CHECK-LABEL: @_Z21atomic32_op_workgroupPiii
77 // CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
78 // CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as") monotonic monotonic, align 4
79 // CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
80 // CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
81 // CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
82 // CHECK: atomicrmw or i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
83 // CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
84 // CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
85 // CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
86 // CHECK: store atomic i32 %{{.*}}, i32* %{{.*}} syncscope("workgroup-one-as") monotonic, align 4
atomic32_op_workgroup(int * ptr,int val,int desired)87 __device__ int atomic32_op_workgroup(int *ptr, int val, int desired) {
88 bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
89 flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
90 val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
91 val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
92 val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
93 val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
94 val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
95 val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
96 val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
97 __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
98 return flag ? val : desired;
99 }
100
101 // CHECK-LABEL: @_Z22atomicu32_op_workgroupPjjj
102 // CHECK: atomicrmw umin i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
103 // CHECK: atomicrmw umax i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("workgroup-one-as")
atomicu32_op_workgroup(unsigned int * ptr,unsigned int val,unsigned int desired)104 __device__ unsigned int atomicu32_op_workgroup(unsigned int *ptr, unsigned int val, unsigned int desired) {
105 val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
106 val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
107 return val;
108 }
109
110 // CHECK-LABEL: @_Z17atomic32_op_agentPiii
111 // CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
112 // CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as") monotonic monotonic, align 4
113 // CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
114 // CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
115 // CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
116 // CHECK: atomicrmw or i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
117 // CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
118 // CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
119 // CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
120 // CHECK: store atomic i32 %{{.*}}, i32* %{{.*}} syncscope("agent-one-as") monotonic, align 4
atomic32_op_agent(int * ptr,int val,int desired)121 __device__ int atomic32_op_agent(int *ptr, int val, int desired) {
122 bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
123 flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
124 val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
125 val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
126 val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
127 val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
128 val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
129 val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
130 val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
131 __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
132 return flag ? val : desired;
133 }
134
135 // CHECK-LABEL: @_Z18atomicu32_op_agentPjjj
136 // CHECK: atomicrmw umin i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
137 // CHECK: atomicrmw umax i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("agent-one-as")
atomicu32_op_agent(unsigned int * ptr,unsigned int val,unsigned int desired)138 __device__ unsigned int atomicu32_op_agent(unsigned int *ptr, unsigned int val, unsigned int desired) {
139 val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
140 val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
141 return val;
142 }
143
144 // CHECK-LABEL: @_Z18atomic32_op_systemPiii
145 // CHECK: cmpxchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
146 // CHECK: cmpxchg weak i32* {{%[0-9]+}}, i32 {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as") monotonic monotonic, align 4
147 // CHECK: atomicrmw xchg i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
148 // CHECK: atomicrmw add i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
149 // CHECK: atomicrmw and i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
150 // CHECK: atomicrmw or i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
151 // CHECK: atomicrmw xor i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
152 // CHECK: atomicrmw min i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
153 // CHECK: atomicrmw max i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
154 // CHECK: load i32, i32* %{{.*}}, align 4
155 // CHECK: store atomic i32 %{{.*}}, i32* %{{.*}} syncscope("one-as") monotonic, align 4
atomic32_op_system(int * ptr,int val,int desired)156 __device__ int atomic32_op_system(int *ptr, int val, int desired) {
157 bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
158 flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
159 val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
160 val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
161 val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
162 val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
163 val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
164 val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
165 val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
166 val = __hip_atomic_load(ptr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
167 __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
168 return flag ? val : desired;
169 }
170
171 // CHECK-LABEL: @_Z19atomicu32_op_systemPjjj
172 // CHECK: atomicrmw umin i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
173 // CHECK: atomicrmw umax i32* {{%[0-9]+}}, i32 {{%[0-9]+}} syncscope("one-as")
atomicu32_op_system(unsigned int * ptr,unsigned int val,unsigned int desired)174 __device__ unsigned int atomicu32_op_system(unsigned int *ptr, unsigned int val, unsigned int desired) {
175 val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
176 val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
177 return val;
178 }
179
180 // CHECK-LABEL: @_Z24atomic64_op_singlethreadPxS_xx
181 // CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
182 // CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as") monotonic monotonic, align 8
183 // CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
184 // CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
185 // CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
186 // CHECK: atomicrmw or i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
187 // CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
188 // CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
189 // CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
190 // CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("singlethread-one-as") monotonic, align 8
atomic64_op_singlethread(long long * ptr,long long * ptr2,long long val,long long desired)191 __device__ long long atomic64_op_singlethread(long long *ptr, long long *ptr2, long long val, long long desired) {
192 bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
193 flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
194 val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
195 val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
196 val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
197 val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
198 val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
199 val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
200 val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
201 __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
202 return flag ? val : desired;
203 }
204
205 // CHECK-LABEL: @_Z25atomicu64_op_singlethreadPyS_yy
206 // CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
207 // CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("singlethread-one-as")
208 // CHECK: load atomic i64, i64* %{{.*}} syncscope("singlethread-one-as") monotonic, align 8
209 // CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("singlethread-one-as") monotonic, align 8
atomicu64_op_singlethread(unsigned long long * ptr,unsigned long long * ptr2,unsigned long long val,unsigned long long desired)210 __device__ unsigned long long atomicu64_op_singlethread(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
211 val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
212 val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
213 val = __hip_atomic_load(ptr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
214 __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SINGLETHREAD);
215 return val;
216 }
217
218 // CHECK-LABEL: @_Z21atomic64_op_wavefrontPxS_xx
219 // CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
220 // CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as") monotonic monotonic, align 8
221 // CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
222 // CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
223 // CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
224 // CHECK: atomicrmw or i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
225 // CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
226 // CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
227 // CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
228 // CHECK: load atomic i64, i64* {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 8
229 // CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("wavefront-one-as") monotonic, align 8
atomic64_op_wavefront(long long * ptr,long long * ptr2,long long val,long long desired)230 __device__ long long atomic64_op_wavefront(long long *ptr, long long *ptr2, long long val, long long desired) {
231 bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
232 flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
233 val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
234 val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
235 val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
236 val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
237 val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
238 val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
239 val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
240 val = __hip_atomic_load(ptr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
241 __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
242 return flag ? val : desired;
243 }
244
245 // CHECK-LABEL: @_Z22atomicu64_op_wavefrontPyS_yy
246 // CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
247 // CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("wavefront-one-as")
248 // CHECK: load atomic i64, i64* {{%[0-9]+}} syncscope("wavefront-one-as") monotonic, align 8
249 // CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("wavefront-one-as") monotonic, align 8
atomicu64_op_wavefront(unsigned long long * ptr,unsigned long long * ptr2,unsigned long long val,unsigned long long desired)250 __device__ unsigned long long atomicu64_op_wavefront(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
251 val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
252 val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
253 val = __hip_atomic_load(ptr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
254 __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WAVEFRONT);
255 return val;
256 }
257
258 // CHECK-LABEL: @_Z21atomic64_op_workgroupPxS_xx
259 // CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
260 // CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as") monotonic monotonic, align 8
261 // CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
262 // CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
263 // CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
264 // CHECK: atomicrmw or i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
265 // CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
266 // CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
267 // CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
268 // CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("workgroup-one-as") monotonic, align 8
atomic64_op_workgroup(long long * ptr,long long * ptr2,long long val,long long desired)269 __device__ long long atomic64_op_workgroup(long long *ptr, long long *ptr2, long long val, long long desired) {
270 bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
271 flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
272 val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
273 val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
274 val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
275 val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
276 val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
277 val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
278 val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
279 __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
280 return flag ? val : desired;
281 }
282
283 // CHECK-LABEL: @_Z22atomicu64_op_workgroupPyS_yy
284 // CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
285 // CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("workgroup-one-as")
286 // CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("workgroup-one-as") monotonic, align 8
atomicu64_op_workgroup(unsigned long long * ptr,unsigned long long * ptr2,unsigned long long val,unsigned long long desired)287 __device__ unsigned long long atomicu64_op_workgroup(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
288 val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
289 val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
290 __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_WORKGROUP);
291 return val;
292 }
293
294 // CHECK-LABEL: @_Z17atomic64_op_agentPxS_xx
295 // CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
296 // CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as") monotonic monotonic, align 8
297 // CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
298 // CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
299 // CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
300 // CHECK: atomicrmw or i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
301 // CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
302 // CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
303 // CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
304 // CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("agent-one-as") monotonic, align 8
atomic64_op_agent(long long * ptr,long long * ptr2,long long val,long long desired)305 __device__ long long atomic64_op_agent(long long *ptr, long long *ptr2, long long val, long long desired) {
306 bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
307 flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
308 val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
309 val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
310 val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
311 val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
312 val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
313 val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
314 val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
315 __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
316 return flag ? val : desired;
317 }
318
319 // CHECK-LABEL: @_Z18atomicu64_op_agentPyS_yy
320 // CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
321 // CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("agent-one-as")
322 // CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("agent-one-as") monotonic, align 8
atomicu64_op_agent(unsigned long long * ptr,unsigned long long * ptr2,unsigned long long val,unsigned long long desired)323 __device__ unsigned long long atomicu64_op_agent(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
324 val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
325 val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
326 __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_AGENT);
327 return val;
328 }
329
330 // CHECK-LABEL: @_Z18atomic64_op_systemPxS_xx
331 // CHECK: cmpxchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
332 // CHECK: cmpxchg weak i64* {{%[0-9]+}}, i64 {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as") monotonic monotonic, align 8
333 // CHECK: atomicrmw xchg i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
334 // CHECK: atomicrmw add i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
335 // CHECK: atomicrmw and i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
336 // CHECK: atomicrmw or i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
337 // CHECK: atomicrmw xor i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
338 // CHECK: atomicrmw min i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
339 // CHECK: atomicrmw max i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
340 // CHECK: load i64, i64* %{{.*}}, align 8
341 // CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("one-as") monotonic, align 8
atomic64_op_system(long long * ptr,long long * ptr2,long long val,long long desired)342 __device__ long long atomic64_op_system(long long *ptr, long long *ptr2, long long val, long long desired) {
343 bool flag = __hip_atomic_compare_exchange_strong(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
344 flag = __hip_atomic_compare_exchange_weak(ptr, &val, desired, __ATOMIC_RELAXED, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
345 val = __hip_atomic_exchange(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
346 val = __hip_atomic_fetch_add(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
347 val = __hip_atomic_fetch_and(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
348 val = __hip_atomic_fetch_or(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
349 val = __hip_atomic_fetch_xor(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
350 val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
351 val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
352 val = __hip_atomic_load(ptr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
353 __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
354 return flag ? val : desired;
355 }
356
357 // CHECK-LABEL: @_Z19atomicu64_op_systemPyS_yy
358 // CHECK: atomicrmw umin i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
359 // CHECK: atomicrmw umax i64* {{%[0-9]+}}, i64 {{%[0-9]+}} syncscope("one-as")
360 // CHECK: load i64, i64* %{{.*}}, align 8
361 // CHECK: store atomic i64 %{{.*}}, i64* %{{.*}} syncscope("one-as") monotonic, align 8
atomicu64_op_system(unsigned long long * ptr,unsigned long long * ptr2,unsigned long long val,unsigned long long desired)362 __device__ unsigned long long atomicu64_op_system(unsigned long long *ptr, unsigned long long *ptr2, unsigned long long val, unsigned long long desired) {
363 val = __hip_atomic_fetch_min(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
364 val = __hip_atomic_fetch_max(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
365 val = __hip_atomic_load(ptr, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
366 __hip_atomic_store(ptr, val, __ATOMIC_RELAXED, __HIP_MEMORY_SCOPE_SYSTEM);
367 return val;
368 }
369