1 // Copyright (c) 2011 The LevelDB Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style license that can be 3 // found in the LICENSE file. See the AUTHORS file for names of contributors. 4 5 // AtomicPointer provides storage for a lock-free pointer. 6 // Platform-dependent implementation of AtomicPointer: 7 // - If the platform provides a cheap barrier, we use it with raw pointers 8 // - If <atomic> is present (on newer versions of gcc, it is), we use 9 // a <atomic>-based AtomicPointer. However we prefer the memory 10 // barrier based version, because at least on a gcc 4.4 32-bit build 11 // on linux, we have encountered a buggy <atomic> implementation. 12 // Also, some <atomic> implementations are much slower than a memory-barrier 13 // based implementation (~16ns for <atomic> based acquire-load vs. ~1ns for 14 // a barrier based acquire-load). 15 // This code is based on atomicops-internals-* in Google's perftools: 16 // http://code.google.com/p/google-perftools/source/browse/#svn%2Ftrunk%2Fsrc%2Fbase 17 18 #ifndef PORT_ATOMIC_POINTER_H_ 19 #define PORT_ATOMIC_POINTER_H_ 20 21 #include <stdint.h> 22 #ifdef LEVELDB_ATOMIC_PRESENT 23 #include <atomic> 24 #endif 25 #ifdef OS_WIN 26 #include <windows.h> 27 #endif 28 #ifdef OS_MACOSX 29 #include <libkern/OSAtomic.h> 30 #endif 31 32 #if defined(_M_X64) || defined(__x86_64__) 33 #define ARCH_CPU_X86_FAMILY 1 34 #elif defined(_M_IX86) || defined(__i386__) || defined(__i386) 35 #define ARCH_CPU_X86_FAMILY 1 36 #elif defined(__ARMEL__) 37 #define ARCH_CPU_ARM_FAMILY 1 38 #elif defined(__aarch64__) 39 #define ARCH_CPU_ARM64_FAMILY 1 40 #elif defined(__ppc__) || defined(__powerpc__) || defined(__powerpc64__) 41 #define ARCH_CPU_PPC_FAMILY 1 42 #elif defined(__mips__) 43 #define ARCH_CPU_MIPS_FAMILY 1 44 #endif 45 46 namespace leveldb { 47 namespace port { 48 49 // Define MemoryBarrier() if available 50 // Windows on x86 51 #if defined(OS_WIN) && defined(COMPILER_MSVC) && defined(ARCH_CPU_X86_FAMILY) 52 // windows.h already provides a MemoryBarrier(void) macro 53 // http://msdn.microsoft.com/en-us/library/ms684208(v=vs.85).aspx 54 #define LEVELDB_HAVE_MEMORY_BARRIER 55 56 // Mac OS 57 #elif defined(OS_MACOSX) 58 inline void MemoryBarrier() { 59 OSMemoryBarrier(); 60 } 61 #define LEVELDB_HAVE_MEMORY_BARRIER 62 63 // Gcc on x86 64 #elif defined(ARCH_CPU_X86_FAMILY) && defined(__GNUC__) 65 inline void MemoryBarrier() { 66 // See http://gcc.gnu.org/ml/gcc/2003-04/msg01180.html for a discussion on 67 // this idiom. Also see http://en.wikipedia.org/wiki/Memory_ordering. 68 __asm__ __volatile__("" : : : "memory"); 69 } 70 #define LEVELDB_HAVE_MEMORY_BARRIER 71 72 // Sun Studio 73 #elif defined(ARCH_CPU_X86_FAMILY) && defined(__SUNPRO_CC) 74 inline void MemoryBarrier() { 75 // See http://gcc.gnu.org/ml/gcc/2003-04/msg01180.html for a discussion on 76 // this idiom. Also see http://en.wikipedia.org/wiki/Memory_ordering. 77 asm volatile("" : : : "memory"); 78 } 79 #define LEVELDB_HAVE_MEMORY_BARRIER 80 81 // ARM Linux 82 #elif defined(ARCH_CPU_ARM_FAMILY) && defined(__linux__) 83 typedef void (*LinuxKernelMemoryBarrierFunc)(void); 84 // The Linux ARM kernel provides a highly optimized device-specific memory 85 // barrier function at a fixed memory address that is mapped in every 86 // user-level process. 87 // 88 // This beats using CPU-specific instructions which are, on single-core 89 // devices, un-necessary and very costly (e.g. ARMv7-A "dmb" takes more 90 // than 180ns on a Cortex-A8 like the one on a Nexus One). Benchmarking 91 // shows that the extra function call cost is completely negligible on 92 // multi-core devices. 93 // 94 inline void MemoryBarrier() { 95 (*(LinuxKernelMemoryBarrierFunc)0xffff0fa0)(); 96 } 97 #define LEVELDB_HAVE_MEMORY_BARRIER 98 99 // ARM64 100 #elif defined(ARCH_CPU_ARM64_FAMILY) 101 inline void MemoryBarrier() { 102 asm volatile("dmb sy" : : : "memory"); 103 } 104 #define LEVELDB_HAVE_MEMORY_BARRIER 105 106 // PPC 107 #elif defined(ARCH_CPU_PPC_FAMILY) && defined(__GNUC__) 108 inline void MemoryBarrier() { 109 // TODO for some powerpc expert: is there a cheaper suitable variant? 110 // Perhaps by having separate barriers for acquire and release ops. 111 asm volatile("sync" : : : "memory"); 112 } 113 #define LEVELDB_HAVE_MEMORY_BARRIER 114 115 // MIPS 116 #elif defined(ARCH_CPU_MIPS_FAMILY) && defined(__GNUC__) 117 inline void MemoryBarrier() { 118 __asm__ __volatile__("sync" : : : "memory"); 119 } 120 #define LEVELDB_HAVE_MEMORY_BARRIER 121 122 #endif 123 124 // AtomicPointer built using platform-specific MemoryBarrier() 125 #if defined(LEVELDB_HAVE_MEMORY_BARRIER) 126 class AtomicPointer { 127 private: 128 void* rep_; 129 public: AtomicPointer()130 AtomicPointer() { } AtomicPointer(void * p)131 explicit AtomicPointer(void* p) : rep_(p) {} NoBarrier_Load()132 inline void* NoBarrier_Load() const { return rep_; } NoBarrier_Store(void * v)133 inline void NoBarrier_Store(void* v) { rep_ = v; } Acquire_Load()134 inline void* Acquire_Load() const { 135 void* result = rep_; 136 MemoryBarrier(); 137 return result; 138 } Release_Store(void * v)139 inline void Release_Store(void* v) { 140 MemoryBarrier(); 141 rep_ = v; 142 } 143 }; 144 145 // AtomicPointer based on <cstdatomic> 146 #elif defined(LEVELDB_ATOMIC_PRESENT) 147 class AtomicPointer { 148 private: 149 std::atomic<void*> rep_; 150 public: 151 AtomicPointer() { } 152 explicit AtomicPointer(void* v) : rep_(v) { } 153 inline void* Acquire_Load() const { 154 return rep_.load(std::memory_order_acquire); 155 } 156 inline void Release_Store(void* v) { 157 rep_.store(v, std::memory_order_release); 158 } 159 inline void* NoBarrier_Load() const { 160 return rep_.load(std::memory_order_relaxed); 161 } 162 inline void NoBarrier_Store(void* v) { 163 rep_.store(v, std::memory_order_relaxed); 164 } 165 }; 166 167 // Atomic pointer based on sparc memory barriers 168 #elif defined(__sparcv9) && defined(__GNUC__) 169 class AtomicPointer { 170 private: 171 void* rep_; 172 public: 173 AtomicPointer() { } 174 explicit AtomicPointer(void* v) : rep_(v) { } 175 inline void* Acquire_Load() const { 176 void* val; 177 __asm__ __volatile__ ( 178 "ldx [%[rep_]], %[val] \n\t" 179 "membar #LoadLoad|#LoadStore \n\t" 180 : [val] "=r" (val) 181 : [rep_] "r" (&rep_) 182 : "memory"); 183 return val; 184 } 185 inline void Release_Store(void* v) { 186 __asm__ __volatile__ ( 187 "membar #LoadStore|#StoreStore \n\t" 188 "stx %[v], [%[rep_]] \n\t" 189 : 190 : [rep_] "r" (&rep_), [v] "r" (v) 191 : "memory"); 192 } 193 inline void* NoBarrier_Load() const { return rep_; } 194 inline void NoBarrier_Store(void* v) { rep_ = v; } 195 }; 196 197 // Atomic pointer based on ia64 acq/rel 198 #elif defined(__ia64) && defined(__GNUC__) 199 class AtomicPointer { 200 private: 201 void* rep_; 202 public: 203 AtomicPointer() { } 204 explicit AtomicPointer(void* v) : rep_(v) { } 205 inline void* Acquire_Load() const { 206 void* val ; 207 __asm__ __volatile__ ( 208 "ld8.acq %[val] = [%[rep_]] \n\t" 209 : [val] "=r" (val) 210 : [rep_] "r" (&rep_) 211 : "memory" 212 ); 213 return val; 214 } 215 inline void Release_Store(void* v) { 216 __asm__ __volatile__ ( 217 "st8.rel [%[rep_]] = %[v] \n\t" 218 : 219 : [rep_] "r" (&rep_), [v] "r" (v) 220 : "memory" 221 ); 222 } 223 inline void* NoBarrier_Load() const { return rep_; } 224 inline void NoBarrier_Store(void* v) { rep_ = v; } 225 }; 226 227 // We have neither MemoryBarrier(), nor <atomic> 228 #else 229 #error Please implement AtomicPointer for this platform. 230 231 #endif 232 233 #undef LEVELDB_HAVE_MEMORY_BARRIER 234 #undef ARCH_CPU_X86_FAMILY 235 #undef ARCH_CPU_ARM_FAMILY 236 #undef ARCH_CPU_ARM64_FAMILY 237 #undef ARCH_CPU_PPC_FAMILY 238 239 } // namespace port 240 } // namespace leveldb 241 242 #endif // PORT_ATOMIC_POINTER_H_ 243