1// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa \ 2// RUN: | opt -instnamer -S | FileCheck %s 3 4// Also test serialization of atomic operations here, to avoid duplicating the test. 5// RUN: %clang_cc1 %s -cl-std=CL2.0 -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa 6// RUN: %clang_cc1 %s -cl-std=CL2.0 -include-pch %t -O0 -triple=amdgcn-amd-amdhsa \ 7// RUN: -emit-llvm -o - | opt -instnamer -S | FileCheck %s 8 9#ifndef ALREADY_INCLUDED 10#define ALREADY_INCLUDED 11 12#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable 13#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable 14 15typedef __INTPTR_TYPE__ intptr_t; 16typedef int int8 __attribute__((ext_vector_type(8))); 17 18typedef enum memory_order { 19 memory_order_relaxed = __ATOMIC_RELAXED, 20 memory_order_acquire = __ATOMIC_ACQUIRE, 21 memory_order_release = __ATOMIC_RELEASE, 22 memory_order_acq_rel = __ATOMIC_ACQ_REL, 23 memory_order_seq_cst = __ATOMIC_SEQ_CST 24} memory_order; 25 26typedef enum memory_scope { 27 memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, 28 memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, 29 memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, 30 memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, 31#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) 32 memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP 33#endif 34} memory_scope; 35 36atomic_int j; 37 38void fi1(atomic_int *i) { 39 // CHECK-LABEL: @fi1 40 // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 41 int x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group); 42 43 // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst, align 4 44 x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_device); 45 46 // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} seq_cst, align 4 47 x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices); 48 49 // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("wavefront") seq_cst, align 4 50 x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group); 51} 52 53void fi2(atomic_int *i) { 54 // CHECK-LABEL: @fi2 55 // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 56 __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group); 57} 58 59void test_addr(global atomic_int *ig, private atomic_int *ip, local atomic_int *il) { 60 // CHECK-LABEL: @test_addr 61 // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(1)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 62 __opencl_atomic_store(ig, 1, memory_order_seq_cst, memory_scope_work_group); 63 64 // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(5)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 65 __opencl_atomic_store(ip, 1, memory_order_seq_cst, memory_scope_work_group); 66 67 // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(3)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 68 __opencl_atomic_store(il, 1, memory_order_seq_cst, memory_scope_work_group); 69} 70 71void fi3(atomic_int *i, atomic_uint *ui) { 72 // CHECK-LABEL: @fi3 73 // CHECK: atomicrmw and i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 74 int x = __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group); 75 76 // CHECK: atomicrmw min i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 77 x = __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group); 78 79 // CHECK: atomicrmw max i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 80 x = __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group); 81 82 // CHECK: atomicrmw umin i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 83 x = __opencl_atomic_fetch_min(ui, 1, memory_order_seq_cst, memory_scope_work_group); 84 85 // CHECK: atomicrmw umax i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 86 x = __opencl_atomic_fetch_max(ui, 1, memory_order_seq_cst, memory_scope_work_group); 87} 88 89bool fi4(atomic_int *i) { 90 // CHECK-LABEL: @fi4( 91 // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup-one-as") acquire acquire, align 4 92 // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0 93 // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1 94 // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]] 95 // CHECK: store i32 [[OLD]] 96 int cmp = 0; 97 return __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_acquire, memory_order_acquire, memory_scope_work_group); 98} 99 100void fi5(atomic_int *i, int scope) { 101 // CHECK-LABEL: @fi5 102 // CHECK: switch i32 %{{.*}}, label %[[opencl_allsvmdevices:.*]] [ 103 // CHECK-NEXT: i32 1, label %[[opencl_workgroup:.*]] 104 // CHECK-NEXT: i32 2, label %[[opencl_device:.*]] 105 // CHECK-NEXT: i32 4, label %[[opencl_subgroup:.*]] 106 // CHECK-NEXT: ] 107 // CHECK: [[opencl_workgroup]]: 108 // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst, align 4 109 // CHECK: br label %[[continue:.*]] 110 // CHECK: [[opencl_device]]: 111 // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst, align 4 112 // CHECK: br label %[[continue]] 113 // CHECK: [[opencl_allsvmdevices]]: 114 // CHECK: load atomic i32, i32* %{{.*}} seq_cst, align 4 115 // CHECK: br label %[[continue]] 116 // CHECK: [[opencl_subgroup]]: 117 // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") seq_cst, align 4 118 // CHECK: br label %[[continue]] 119 // CHECK: [[continue]]: 120 int x = __opencl_atomic_load(i, memory_order_seq_cst, scope); 121} 122 123void fi6(atomic_int *i, int order, int scope) { 124 // CHECK-LABEL: @fi6 125 // CHECK: switch i32 %{{.*}}, label %[[monotonic:.*]] [ 126 // CHECK-NEXT: i32 1, label %[[acquire:.*]] 127 // CHECK-NEXT: i32 2, label %[[acquire:.*]] 128 // CHECK-NEXT: i32 5, label %[[seqcst:.*]] 129 // CHECK-NEXT: ] 130 // CHECK: [[monotonic]]: 131 // CHECK: switch i32 %{{.*}}, label %[[MON_ALL:.*]] [ 132 // CHECK-NEXT: i32 1, label %[[MON_WG:.*]] 133 // CHECK-NEXT: i32 2, label %[[MON_DEV:.*]] 134 // CHECK-NEXT: i32 4, label %[[MON_SUB:.*]] 135 // CHECK-NEXT: ] 136 // CHECK: [[acquire]]: 137 // CHECK: switch i32 %{{.*}}, label %[[ACQ_ALL:.*]] [ 138 // CHECK-NEXT: i32 1, label %[[ACQ_WG:.*]] 139 // CHECK-NEXT: i32 2, label %[[ACQ_DEV:.*]] 140 // CHECK-NEXT: i32 4, label %[[ACQ_SUB:.*]] 141 // CHECK-NEXT: ] 142 // CHECK: [[seqcst]]: 143 // CHECK: switch i32 %{{.*}}, label %[[SEQ_ALL:.*]] [ 144 // CHECK-NEXT: i32 1, label %[[SEQ_WG:.*]] 145 // CHECK-NEXT: i32 2, label %[[SEQ_DEV:.*]] 146 // CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]] 147 // CHECK-NEXT: ] 148 // CHECK: [[MON_WG]]: 149 // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") monotonic, align 4 150 // CHECK: [[MON_DEV]]: 151 // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") monotonic, align 4 152 // CHECK: [[MON_ALL]]: 153 // CHECK: load atomic i32, i32* %{{.*}} monotonic, align 4 154 // CHECK: [[MON_SUB]]: 155 // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") monotonic, align 4 156 // CHECK: [[ACQ_WG]]: 157 // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") acquire, align 4 158 // CHECK: [[ACQ_DEV]]: 159 // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") acquire, align 4 160 // CHECK: [[ACQ_ALL]]: 161 // CHECK: load atomic i32, i32* %{{.*}} acquire, align 4 162 // CHECK: [[ACQ_SUB]]: 163 // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") acquire, align 4 164 // CHECK: [[SEQ_WG]]: 165 // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst, align 4 166 // CHECK: [[SEQ_DEV]]: 167 // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst, align 4 168 // CHECK: [[SEQ_ALL]]: 169 // CHECK: load atomic i32, i32* %{{.*}} seq_cst, align 4 170 // CHECK: [[SEQ_SUB]]: 171 // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") seq_cst, align 4 172 int x = __opencl_atomic_load(i, order, scope); 173} 174 175float ff1(global atomic_float *d) { 176 // CHECK-LABEL: @ff1 177 // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic, align 4 178 return __opencl_atomic_load(d, memory_order_relaxed, memory_scope_work_group); 179} 180 181void ff2(atomic_float *d) { 182 // CHECK-LABEL: @ff2 183 // CHECK: store atomic i32 {{.*}} syncscope("workgroup-one-as") release, align 4 184 __opencl_atomic_store(d, 1, memory_order_release, memory_scope_work_group); 185} 186 187float ff3(atomic_float *d) { 188 // CHECK-LABEL: @ff3 189 // CHECK: atomicrmw xchg i32* {{.*}} syncscope("workgroup") seq_cst, align 4 190 return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group); 191} 192 193float ff4(global atomic_float *d, float a) { 194 // CHECK-LABEL: @ff4 195 // CHECK: atomicrmw fadd float addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic 196 return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group); 197} 198 199float ff5(global atomic_double *d, double a) { 200 // CHECK-LABEL: @ff5 201 // CHECK: atomicrmw fadd double addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic 202 return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group); 203} 204 205// CHECK-LABEL: @atomic_init_foo 206void atomic_init_foo() 207{ 208 // CHECK-NOT: atomic 209 // CHECK: store 210 __opencl_atomic_init(&j, 42); 211 212 // CHECK-NOT: atomic 213 // CHECK: } 214} 215 216// CHECK-LABEL: @failureOrder 217void failureOrder(atomic_int *ptr, int *ptr2) { 218 // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup-one-as") acquire monotonic, align 4 219 __opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group); 220 221 // CHECK: cmpxchg weak i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire, align 4 222 __opencl_atomic_compare_exchange_weak(ptr, ptr2, 43, memory_order_seq_cst, memory_order_acquire, memory_scope_work_group); 223} 224 225// CHECK-LABEL: @generalFailureOrder 226void generalFailureOrder(atomic_int *ptr, int *ptr2, int success, int fail) { 227 __opencl_atomic_compare_exchange_strong(ptr, ptr2, 42, success, fail, memory_scope_work_group); 228 // CHECK: switch i32 {{.*}}, label %[[MONOTONIC:[0-9a-zA-Z._]+]] [ 229 // CHECK-NEXT: i32 1, label %[[ACQUIRE:[0-9a-zA-Z._]+]] 230 // CHECK-NEXT: i32 2, label %[[ACQUIRE]] 231 // CHECK-NEXT: i32 3, label %[[RELEASE:[0-9a-zA-Z._]+]] 232 // CHECK-NEXT: i32 4, label %[[ACQREL:[0-9a-zA-Z._]+]] 233 // CHECK-NEXT: i32 5, label %[[SEQCST:[0-9a-zA-Z._]+]] 234 235 // CHECK: [[MONOTONIC]] 236 // CHECK: switch {{.*}}, label %[[MONOTONIC_MONOTONIC:[0-9a-zA-Z._]+]] [ 237 // CHECK-NEXT: ] 238 239 // CHECK: [[ACQUIRE]] 240 // CHECK: switch {{.*}}, label %[[ACQUIRE_MONOTONIC:[0-9a-zA-Z._]+]] [ 241 // CHECK-NEXT: i32 1, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]] 242 // CHECK-NEXT: i32 2, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]] 243 // CHECK-NEXT: ] 244 245 // CHECK: [[RELEASE]] 246 // CHECK: switch {{.*}}, label %[[RELEASE_MONOTONIC:[0-9a-zA-Z._]+]] [ 247 // CHECK-NEXT: i32 2, label %[[RELEASE_ACQUIRE:[0-9a-zA-Z._]+]] 248 // CHECK-NEXT: ] 249 250 // CHECK: [[ACQREL]] 251 // CHECK: switch {{.*}}, label %[[ACQREL_MONOTONIC:[0-9a-zA-Z._]+]] [ 252 // CHECK-NEXT: i32 1, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]] 253 // CHECK-NEXT: i32 2, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]] 254 // CHECK-NEXT: ] 255 256 // CHECK: [[SEQCST]] 257 // CHECK: switch {{.*}}, label %[[SEQCST_MONOTONIC:[0-9a-zA-Z._]+]] [ 258 // CHECK-NEXT: i32 1, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]] 259 // CHECK-NEXT: i32 2, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]] 260 // CHECK-NEXT: i32 5, label %[[SEQCST_SEQCST:[0-9a-zA-Z._]+]] 261 // CHECK-NEXT: ] 262 263 // CHECK: [[MONOTONIC_MONOTONIC]] 264 // CHECK: cmpxchg {{.*}} monotonic monotonic, align 4 265 // CHECK: br 266 267 // CHECK: [[ACQUIRE_MONOTONIC]] 268 // CHECK: cmpxchg {{.*}} acquire monotonic, align 4 269 // CHECK: br 270 271 // CHECK: [[ACQUIRE_ACQUIRE]] 272 // CHECK: cmpxchg {{.*}} acquire acquire, align 4 273 // CHECK: br 274 275 // CHECK: [[RELEASE_MONOTONIC]] 276 // CHECK: cmpxchg {{.*}} release monotonic, align 4 277 // CHECK: br 278 279 // CHECK: [[RELEASE_ACQUIRE]] 280 // CHECK: cmpxchg {{.*}} release acquire, align 4 281 // CHECK: br 282 283 // CHECK: [[ACQREL_MONOTONIC]] 284 // CHECK: cmpxchg {{.*}} acq_rel monotonic, align 4 285 // CHECK: br 286 287 // CHECK: [[ACQREL_ACQUIRE]] 288 // CHECK: cmpxchg {{.*}} acq_rel acquire, align 4 289 // CHECK: br 290 291 // CHECK: [[SEQCST_MONOTONIC]] 292 // CHECK: cmpxchg {{.*}} seq_cst monotonic, align 4 293 // CHECK: br 294 295 // CHECK: [[SEQCST_ACQUIRE]] 296 // CHECK: cmpxchg {{.*}} seq_cst acquire, align 4 297 // CHECK: br 298 299 // CHECK: [[SEQCST_SEQCST]] 300 // CHECK: cmpxchg {{.*}} seq_cst seq_cst, align 4 301 // CHECK: br 302} 303 304int test_volatile(volatile atomic_int *i) { 305 // CHECK-LABEL: @test_volatile 306 // CHECK: %[[i_addr:.*]] = alloca i32 307 // CHECK-NEXT: %[[atomicdst:.*]] = alloca i32 308 // CHECK-NEXT: store i32* %i, i32* addrspace(5)* %[[i_addr]] 309 // CHECK-NEXT: %[[addr:.*]] = load i32*, i32* addrspace(5)* %[[i_addr]] 310 // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, i32* %[[addr]] syncscope("workgroup") seq_cst, align 4 311 // CHECK-NEXT: store i32 %[[res]], i32 addrspace(5)* %[[atomicdst]] 312 // CHECK-NEXT: %[[retval:.*]] = load i32, i32 addrspace(5)* %[[atomicdst]] 313 // CHECK-NEXT: ret i32 %[[retval]] 314 return __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group); 315} 316 317#endif 318