1// RUN: %clang_cc1 -no-opaque-pointers %s -cl-std=CL2.0 -emit-llvm -O0 -o - -triple=amdgcn-amd-amdhsa \ 2// RUN: | opt -instnamer -S | FileCheck %s 3 4// Also test serialization of atomic operations here, to avoid duplicating the test. 5// RUN: %clang_cc1 -no-opaque-pointers %s -cl-std=CL2.0 -emit-pch -O0 -o %t -triple=amdgcn-amd-amdhsa 6// RUN: %clang_cc1 -no-opaque-pointers %s -cl-std=CL2.0 -include-pch %t -O0 -triple=amdgcn-amd-amdhsa \ 7// RUN: -emit-llvm -o - | opt -instnamer -S | FileCheck %s 8 9#ifndef ALREADY_INCLUDED 10#define ALREADY_INCLUDED 11 12#pragma OPENCL EXTENSION cl_khr_int64_base_atomics : enable 13#pragma OPENCL EXTENSION cl_khr_int64_extended_atomics : enable 14 15typedef __INTPTR_TYPE__ intptr_t; 16typedef int int8 __attribute__((ext_vector_type(8))); 17 18typedef enum memory_order { 19 memory_order_relaxed = __ATOMIC_RELAXED, 20 memory_order_acquire = __ATOMIC_ACQUIRE, 21 memory_order_release = __ATOMIC_RELEASE, 22 memory_order_acq_rel = __ATOMIC_ACQ_REL, 23 memory_order_seq_cst = __ATOMIC_SEQ_CST 24} memory_order; 25 26typedef enum memory_scope { 27 memory_scope_work_item = __OPENCL_MEMORY_SCOPE_WORK_ITEM, 28 memory_scope_work_group = __OPENCL_MEMORY_SCOPE_WORK_GROUP, 29 memory_scope_device = __OPENCL_MEMORY_SCOPE_DEVICE, 30 memory_scope_all_svm_devices = __OPENCL_MEMORY_SCOPE_ALL_SVM_DEVICES, 31#if defined(cl_intel_subgroups) || defined(cl_khr_subgroups) 32 memory_scope_sub_group = __OPENCL_MEMORY_SCOPE_SUB_GROUP 33#endif 34} memory_scope; 35 36atomic_int j; 37 38void fi1(atomic_int *i) { 39 // CHECK-LABEL: @fi1 40 // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 41 int x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group); 42 43 // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("agent") seq_cst, align 4 44 x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_device); 45 46 // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} seq_cst, align 4 47 x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_all_svm_devices); 48 49 // CHECK: load atomic i32, i32* %{{[.0-9A-Z_a-z]+}} syncscope("wavefront") seq_cst, align 4 50 x = __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_sub_group); 51} 52 53void fi2(atomic_int *i) { 54 // CHECK-LABEL: @fi2 55 // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 56 __opencl_atomic_store(i, 1, memory_order_seq_cst, memory_scope_work_group); 57} 58 59void test_addr(global atomic_int *ig, private atomic_int *ip, local atomic_int *il) { 60 // CHECK-LABEL: @test_addr 61 // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(1)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 62 __opencl_atomic_store(ig, 1, memory_order_seq_cst, memory_scope_work_group); 63 64 // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(5)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 65 __opencl_atomic_store(ip, 1, memory_order_seq_cst, memory_scope_work_group); 66 67 // CHECK: store atomic i32 %{{[.0-9A-Z_a-z]+}}, i32 addrspace(3)* %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 68 __opencl_atomic_store(il, 1, memory_order_seq_cst, memory_scope_work_group); 69} 70 71void fi3(atomic_int *i, atomic_uint *ui) { 72 // CHECK-LABEL: @fi3 73 // CHECK: atomicrmw and i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 74 int x = __opencl_atomic_fetch_and(i, 1, memory_order_seq_cst, memory_scope_work_group); 75 76 // CHECK: atomicrmw min i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 77 x = __opencl_atomic_fetch_min(i, 1, memory_order_seq_cst, memory_scope_work_group); 78 79 // CHECK: atomicrmw max i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 80 x = __opencl_atomic_fetch_max(i, 1, memory_order_seq_cst, memory_scope_work_group); 81 82 // CHECK: atomicrmw umin i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 83 x = __opencl_atomic_fetch_min(ui, 1, memory_order_seq_cst, memory_scope_work_group); 84 85 // CHECK: atomicrmw umax i32* %{{[.0-9A-Z_a-z]+}}, i32 %{{[.0-9A-Z_a-z]+}} syncscope("workgroup") seq_cst, align 4 86 x = __opencl_atomic_fetch_max(ui, 1, memory_order_seq_cst, memory_scope_work_group); 87} 88 89bool fi4(atomic_int *i) { 90 // CHECK-LABEL: @fi4( 91 // CHECK: [[PAIR:%[.0-9A-Z_a-z]+]] = cmpxchg i32* [[PTR:%[.0-9A-Z_a-z]+]], i32 [[EXPECTED:%[.0-9A-Z_a-z]+]], i32 [[DESIRED:%[.0-9A-Z_a-z]+]] syncscope("workgroup-one-as") acquire acquire, align 4 92 // CHECK: [[OLD:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 0 93 // CHECK: [[CMP:%[.0-9A-Z_a-z]+]] = extractvalue { i32, i1 } [[PAIR]], 1 94 // CHECK: br i1 [[CMP]], label %[[STORE_EXPECTED:[.0-9A-Z_a-z]+]], label %[[CONTINUE:[.0-9A-Z_a-z]+]] 95 // CHECK: store i32 [[OLD]] 96 int cmp = 0; 97 return __opencl_atomic_compare_exchange_strong(i, &cmp, 1, memory_order_acquire, memory_order_acquire, memory_scope_work_group); 98} 99 100void fi5(atomic_int *i, int scope) { 101 // CHECK-LABEL: @fi5 102 // CHECK: switch i32 %{{.*}}, label %[[opencl_allsvmdevices:.*]] [ 103 // CHECK-NEXT: i32 1, label %[[opencl_workgroup:.*]] 104 // CHECK-NEXT: i32 2, label %[[opencl_device:.*]] 105 // CHECK-NEXT: i32 4, label %[[opencl_subgroup:.*]] 106 // CHECK-NEXT: ] 107 // CHECK: [[opencl_workgroup]]: 108 // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst, align 4 109 // CHECK: br label %[[continue:.*]] 110 // CHECK: [[opencl_device]]: 111 // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst, align 4 112 // CHECK: br label %[[continue]] 113 // CHECK: [[opencl_allsvmdevices]]: 114 // CHECK: load atomic i32, i32* %{{.*}} seq_cst, align 4 115 // CHECK: br label %[[continue]] 116 // CHECK: [[opencl_subgroup]]: 117 // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") seq_cst, align 4 118 // CHECK: br label %[[continue]] 119 // CHECK: [[continue]]: 120 int x = __opencl_atomic_load(i, memory_order_seq_cst, scope); 121} 122 123void fi6(atomic_int *i, int order, int scope) { 124 // CHECK-LABEL: @fi6 125 // CHECK: switch i32 %{{.*}}, label %[[monotonic:.*]] [ 126 // CHECK-NEXT: i32 1, label %[[acquire:.*]] 127 // CHECK-NEXT: i32 2, label %[[acquire:.*]] 128 // CHECK-NEXT: i32 5, label %[[seqcst:.*]] 129 // CHECK-NEXT: ] 130 // CHECK: [[monotonic]]: 131 // CHECK: switch i32 %{{.*}}, label %[[MON_ALL:.*]] [ 132 // CHECK-NEXT: i32 1, label %[[MON_WG:.*]] 133 // CHECK-NEXT: i32 2, label %[[MON_DEV:.*]] 134 // CHECK-NEXT: i32 4, label %[[MON_SUB:.*]] 135 // CHECK-NEXT: ] 136 // CHECK: [[acquire]]: 137 // CHECK: switch i32 %{{.*}}, label %[[ACQ_ALL:.*]] [ 138 // CHECK-NEXT: i32 1, label %[[ACQ_WG:.*]] 139 // CHECK-NEXT: i32 2, label %[[ACQ_DEV:.*]] 140 // CHECK-NEXT: i32 4, label %[[ACQ_SUB:.*]] 141 // CHECK-NEXT: ] 142 // CHECK: [[seqcst]]: 143 // CHECK: switch i32 %{{.*}}, label %[[SEQ_ALL:.*]] [ 144 // CHECK-NEXT: i32 1, label %[[SEQ_WG:.*]] 145 // CHECK-NEXT: i32 2, label %[[SEQ_DEV:.*]] 146 // CHECK-NEXT: i32 4, label %[[SEQ_SUB:.*]] 147 // CHECK-NEXT: ] 148 // CHECK: [[MON_WG]]: 149 // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") monotonic, align 4 150 // CHECK: [[MON_DEV]]: 151 // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") monotonic, align 4 152 // CHECK: [[MON_ALL]]: 153 // CHECK: load atomic i32, i32* %{{.*}} monotonic, align 4 154 // CHECK: [[MON_SUB]]: 155 // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") monotonic, align 4 156 // CHECK: [[ACQ_WG]]: 157 // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup-one-as") acquire, align 4 158 // CHECK: [[ACQ_DEV]]: 159 // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent-one-as") acquire, align 4 160 // CHECK: [[ACQ_ALL]]: 161 // CHECK: load atomic i32, i32* %{{.*}} acquire, align 4 162 // CHECK: [[ACQ_SUB]]: 163 // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront-one-as") acquire, align 4 164 // CHECK: [[SEQ_WG]]: 165 // CHECK: load atomic i32, i32* %{{.*}} syncscope("workgroup") seq_cst, align 4 166 // CHECK: [[SEQ_DEV]]: 167 // CHECK: load atomic i32, i32* %{{.*}} syncscope("agent") seq_cst, align 4 168 // CHECK: [[SEQ_ALL]]: 169 // CHECK: load atomic i32, i32* %{{.*}} seq_cst, align 4 170 // CHECK: [[SEQ_SUB]]: 171 // CHECK: load atomic i32, i32* %{{.*}} syncscope("wavefront") seq_cst, align 4 172 int x = __opencl_atomic_load(i, order, scope); 173} 174 175float ff1(global atomic_float *d) { 176 // CHECK-LABEL: @ff1 177 // CHECK: load atomic i32, i32 addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic, align 4 178 return __opencl_atomic_load(d, memory_order_relaxed, memory_scope_work_group); 179} 180 181void ff2(atomic_float *d) { 182 // CHECK-LABEL: @ff2 183 // CHECK: store atomic i32 {{.*}} syncscope("workgroup-one-as") release, align 4 184 __opencl_atomic_store(d, 1, memory_order_release, memory_scope_work_group); 185} 186 187float ff3(atomic_float *d) { 188 // CHECK-LABEL: @ff3 189 // CHECK: atomicrmw xchg i32* {{.*}} syncscope("workgroup") seq_cst, align 4 190 return __opencl_atomic_exchange(d, 2, memory_order_seq_cst, memory_scope_work_group); 191} 192 193float ff4(global atomic_float *d, float a) { 194 // CHECK-LABEL: @ff4 195 // CHECK: atomicrmw fadd float addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic 196 return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group); 197} 198 199float ff5(global atomic_double *d, double a) { 200 // CHECK-LABEL: @ff5 201 // CHECK: atomicrmw fadd double addrspace(1)* {{.*}} syncscope("workgroup-one-as") monotonic 202 return __opencl_atomic_fetch_add(d, a, memory_order_relaxed, memory_scope_work_group); 203} 204 205// CHECK-LABEL: @atomic_init_foo 206void atomic_init_foo() 207{ 208 // CHECK-NOT: atomic 209 // CHECK: store 210 __opencl_atomic_init(&j, 42); 211 212 // CHECK-NOT: atomic 213 // CHECK: } 214} 215 216// CHECK-LABEL: @failureOrder 217void failureOrder(atomic_int *ptr, int *ptr2) { 218 // CHECK: cmpxchg i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup-one-as") acquire monotonic, align 4 219 __opencl_atomic_compare_exchange_strong(ptr, ptr2, 43, memory_order_acquire, memory_order_relaxed, memory_scope_work_group); 220 221 // CHECK: cmpxchg weak i32* {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z._]+}}, i32 {{%[0-9A-Za-z_.]+}} syncscope("workgroup") seq_cst acquire, align 4 222 __opencl_atomic_compare_exchange_weak(ptr, ptr2, 43, memory_order_seq_cst, memory_order_acquire, memory_scope_work_group); 223} 224 225// CHECK-LABEL: @generalFailureOrder 226void generalFailureOrder(atomic_int *ptr, int *ptr2, int success, int fail) { 227 __opencl_atomic_compare_exchange_strong(ptr, ptr2, 42, success, fail, memory_scope_work_group); 228// CHECK: switch i32 {{.*}}, label %[[MONOTONIC:[0-9a-zA-Z._]+]] [ 229 // CHECK-NEXT: i32 1, label %[[ACQUIRE:[0-9a-zA-Z._]+]] 230 // CHECK-NEXT: i32 2, label %[[ACQUIRE]] 231 // CHECK-NEXT: i32 3, label %[[RELEASE:[0-9a-zA-Z._]+]] 232 // CHECK-NEXT: i32 4, label %[[ACQREL:[0-9a-zA-Z._]+]] 233 // CHECK-NEXT: i32 5, label %[[SEQCST:[0-9a-zA-Z._]+]] 234 235 // CHECK: [[MONOTONIC]] 236 // CHECK: switch {{.*}}, label %[[MONOTONIC_MONOTONIC:[0-9a-zA-Z._]+]] [ 237 // CHECK-NEXT: i32 1, label %[[MONOTONIC_ACQUIRE:[0-9a-zA-Z._]+]] 238 // CHECK-NEXT: i32 2, label %[[MONOTONIC_ACQUIRE:[0-9a-zA-Z._]+]] 239 // CHECK-NEXT: i32 5, label %[[MONOTONIC_SEQCST:[0-9a-zA-Z._]+]] 240 // CHECK-NEXT: ] 241 242 // CHECK: [[ACQUIRE]] 243 // CHECK: switch {{.*}}, label %[[ACQUIRE_MONOTONIC:[0-9a-zA-Z._]+]] [ 244 // CHECK-NEXT: i32 1, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]] 245 // CHECK-NEXT: i32 2, label %[[ACQUIRE_ACQUIRE:[0-9a-zA-Z._]+]] 246 // CHECK-NEXT: i32 5, label %[[ACQUIRE_SEQCST:[0-9a-zA-Z._]+]] 247 // CHECK-NEXT: ] 248 249 // CHECK: [[RELEASE]] 250 // CHECK: switch {{.*}}, label %[[RELEASE_MONOTONIC:[0-9a-zA-Z._]+]] [ 251 // CHECK-NEXT: i32 1, label %[[RELEASE_ACQUIRE:[0-9a-zA-Z._]+]] 252 // CHECK-NEXT: i32 2, label %[[RELEASE_ACQUIRE:[0-9a-zA-Z._]+]] 253 // CHECK-NEXT: i32 5, label %[[RELEASE_SEQCST:[0-9a-zA-Z._]+]] 254 // CHECK-NEXT: ] 255 256 // CHECK: [[ACQREL]] 257 // CHECK: switch {{.*}}, label %[[ACQREL_MONOTONIC:[0-9a-zA-Z._]+]] [ 258 // CHECK-NEXT: i32 1, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]] 259 // CHECK-NEXT: i32 2, label %[[ACQREL_ACQUIRE:[0-9a-zA-Z._]+]] 260 // CHECK-NEXT: i32 5, label %[[ACQREL_SEQCST:[0-9a-zA-Z._]+]] 261 // CHECK-NEXT: ] 262 263 // CHECK: [[SEQCST]] 264 // CHECK: switch {{.*}}, label %[[SEQCST_MONOTONIC:[0-9a-zA-Z._]+]] [ 265 // CHECK-NEXT: i32 1, label %[[SEQCST_ACQUIRE:[0-9a-zA-Z._]+]] 266 // CHECK-NEXT: i32 2, label %[[SEQCST_ACQUIRE]] 267 // CHECK-NEXT: i32 5, label %[[SEQCST_SEQCST:[0-9a-zA-Z._]+]] 268 // CHECK-NEXT: ] 269 270 // CHECK: [[MONOTONIC_MONOTONIC]] 271 // CHECK: cmpxchg {{.*}} monotonic monotonic, align 4 272 // CHECK: br 273 274 // CHECK: [[MONOTONIC_ACQUIRE]] 275 // CHECK: cmpxchg {{.*}} monotonic acquire, align 4 276 // CHECK: br 277 278 // CHECK: [[MONOTONIC_SEQCST]] 279 // CHECK: cmpxchg {{.*}} monotonic seq_cst, align 4 280 // CHECK: br 281 282 // CHECK: [[ACQUIRE_MONOTONIC]] 283 // CHECK: cmpxchg {{.*}} acquire monotonic, align 4 284 // CHECK: br 285 286 // CHECK: [[ACQUIRE_ACQUIRE]] 287 // CHECK: cmpxchg {{.*}} acquire acquire, align 4 288 // CHECK: br 289 290 // CHECK: [[ACQUIRE_SEQCST]] 291 // CHECK: cmpxchg {{.*}} acquire seq_cst, align 4 292 // CHECK: br 293 294 // CHECK: [[RELEASE_MONOTONIC]] 295 // CHECK: cmpxchg {{.*}} release monotonic, align 4 296 // CHECK: br 297 298 // CHECK: [[RELEASE_ACQUIRE]] 299 // CHECK: cmpxchg {{.*}} release acquire, align 4 300 // CHECK: br 301 302 // CHECK: [[RELEASE_SEQCST]] 303 // CHECK: cmpxchg {{.*}} release seq_cst, align 4 304 // CHECK: br 305 306 // CHECK: [[ACQREL_MONOTONIC]] 307 // CHECK: cmpxchg {{.*}} acq_rel monotonic, align 4 308 // CHECK: br 309 310 // CHECK: [[ACQREL_ACQUIRE]] 311 // CHECK: cmpxchg {{.*}} acq_rel acquire, align 4 312 // CHECK: br 313 314 // CHECK: [[ACQREL_SEQCST]] 315 // CHECK: cmpxchg {{.*}} acq_rel seq_cst, align 4 316 // CHECK: br 317 318 // CHECK: [[SEQCST_MONOTONIC]] 319 // CHECK: cmpxchg {{.*}} seq_cst monotonic, align 4 320 // CHECK: br 321 322 // CHECK: [[SEQCST_ACQUIRE]] 323 // CHECK: cmpxchg {{.*}} seq_cst acquire, align 4 324 // CHECK: br 325 326 // CHECK: [[SEQCST_SEQCST]] 327 // CHECK: cmpxchg {{.*}} seq_cst seq_cst, align 4 328 // CHECK: br 329} 330 331int test_volatile(volatile atomic_int *i) { 332 // CHECK-LABEL: @test_volatile 333 // CHECK: %[[i_addr:.*]] = alloca i32 334 // CHECK-NEXT: %[[atomicdst:.*]] = alloca i32 335 // CHECK-NEXT: store i32* %i, i32* addrspace(5)* %[[i_addr]] 336 // CHECK-NEXT: %[[addr:.*]] = load i32*, i32* addrspace(5)* %[[i_addr]] 337 // CHECK-NEXT: %[[res:.*]] = load atomic volatile i32, i32* %[[addr]] syncscope("workgroup") seq_cst, align 4 338 // CHECK-NEXT: store i32 %[[res]], i32 addrspace(5)* %[[atomicdst]] 339 // CHECK-NEXT: %[[retval:.*]] = load i32, i32 addrspace(5)* %[[atomicdst]] 340 // CHECK-NEXT: ret i32 %[[retval]] 341 return __opencl_atomic_load(i, memory_order_seq_cst, memory_scope_work_group); 342} 343 344#endif 345