1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 
10 #include "util/thread_local.h"
11 #include "util/mutexlock.h"
12 #include "port/likely.h"
13 #include <stdlib.h>
14 
15 namespace ROCKSDB_NAMESPACE {
16 
17 struct Entry {
EntryROCKSDB_NAMESPACE::Entry18   Entry() : ptr(nullptr) {}
EntryROCKSDB_NAMESPACE::Entry19   Entry(const Entry& e) : ptr(e.ptr.load(std::memory_order_relaxed)) {}
20   std::atomic<void*> ptr;
21 };
22 
23 class StaticMeta;
24 
25 // This is the structure that is declared as "thread_local" storage.
26 // The vector keep list of atomic pointer for all instances for "current"
27 // thread. The vector is indexed by an Id that is unique in process and
28 // associated with one ThreadLocalPtr instance. The Id is assigned by a
29 // global StaticMeta singleton. So if we instantiated 3 ThreadLocalPtr
30 // instances, each thread will have a ThreadData with a vector of size 3:
31 //     ---------------------------------------------------
32 //     |          | instance 1 | instance 2 | instnace 3 |
33 //     ---------------------------------------------------
34 //     | thread 1 |    void*   |    void*   |    void*   | <- ThreadData
35 //     ---------------------------------------------------
36 //     | thread 2 |    void*   |    void*   |    void*   | <- ThreadData
37 //     ---------------------------------------------------
38 //     | thread 3 |    void*   |    void*   |    void*   | <- ThreadData
39 //     ---------------------------------------------------
40 struct ThreadData {
ThreadDataROCKSDB_NAMESPACE::ThreadData41   explicit ThreadData(ThreadLocalPtr::StaticMeta* _inst)
42     : entries(),
43       next(nullptr),
44       prev(nullptr),
45       inst(_inst) {}
46   std::vector<Entry> entries;
47   ThreadData* next;
48   ThreadData* prev;
49   ThreadLocalPtr::StaticMeta* inst;
50 };
51 
52 class ThreadLocalPtr::StaticMeta {
53 public:
54   StaticMeta();
55 
56   // Return the next available Id
57   uint32_t GetId();
58   // Return the next available Id without claiming it
59   uint32_t PeekId() const;
60   // Return the given Id back to the free pool. This also triggers
61   // UnrefHandler for associated pointer value (if not NULL) for all threads.
62   void ReclaimId(uint32_t id);
63 
64   // Return the pointer value for the given id for the current thread.
65   void* Get(uint32_t id) const;
66   // Reset the pointer value for the given id for the current thread.
67   void Reset(uint32_t id, void* ptr);
68   // Atomically swap the supplied ptr and return the previous value
69   void* Swap(uint32_t id, void* ptr);
70   // Atomically compare and swap the provided value only if it equals
71   // to expected value.
72   bool CompareAndSwap(uint32_t id, void* ptr, void*& expected);
73   // Reset all thread local data to replacement, and return non-nullptr
74   // data for all existing threads
75   void Scrape(uint32_t id, autovector<void*>* ptrs, void* const replacement);
76   // Update res by applying func on each thread-local value. Holds a lock that
77   // prevents unref handler from running during this call, but clients must
78   // still provide external synchronization since the owning thread can
79   // access the values without internal locking, e.g., via Get() and Reset().
80   void Fold(uint32_t id, FoldFunc func, void* res);
81 
82   // Register the UnrefHandler for id
83   void SetHandler(uint32_t id, UnrefHandler handler);
84 
85   // protect inst, next_instance_id_, free_instance_ids_, head_,
86   // ThreadData.entries
87   //
88   // Note that here we prefer function static variable instead of the usual
89   // global static variable.  The reason is that c++ destruction order of
90   // static variables in the reverse order of their construction order.
91   // However, C++ does not guarantee any construction order when global
92   // static variables are defined in different files, while the function
93   // static variables are initialized when their function are first called.
94   // As a result, the construction order of the function static variables
95   // can be controlled by properly invoke their first function calls in
96   // the right order.
97   //
98   // For instance, the following function contains a function static
99   // variable.  We place a dummy function call of this inside
100   // Env::Default() to ensure the construction order of the construction
101   // order.
102   static port::Mutex* Mutex();
103 
104   // Returns the member mutex of the current StaticMeta.  In general,
105   // Mutex() should be used instead of this one.  However, in case where
106   // the static variable inside Instance() goes out of scope, MemberMutex()
107   // should be used.  One example is OnThreadExit() function.
MemberMutex()108   port::Mutex* MemberMutex() { return &mutex_; }
109 
110 private:
111   // Get UnrefHandler for id with acquiring mutex
112   // REQUIRES: mutex locked
113   UnrefHandler GetHandler(uint32_t id);
114 
115   // Triggered before a thread terminates
116   static void OnThreadExit(void* ptr);
117 
118   // Add current thread's ThreadData to the global chain
119   // REQUIRES: mutex locked
120   void AddThreadData(ThreadData* d);
121 
122   // Remove current thread's ThreadData from the global chain
123   // REQUIRES: mutex locked
124   void RemoveThreadData(ThreadData* d);
125 
126   static ThreadData* GetThreadLocal();
127 
128   uint32_t next_instance_id_;
129   // Used to recycle Ids in case ThreadLocalPtr is instantiated and destroyed
130   // frequently. This also prevents it from blowing up the vector space.
131   autovector<uint32_t> free_instance_ids_;
132   // Chain all thread local structure together. This is necessary since
133   // when one ThreadLocalPtr gets destroyed, we need to loop over each
134   // thread's version of pointer corresponding to that instance and
135   // call UnrefHandler for it.
136   ThreadData head_;
137 
138   std::unordered_map<uint32_t, UnrefHandler> handler_map_;
139 
140   // The private mutex.  Developers should always use Mutex() instead of
141   // using this variable directly.
142   port::Mutex mutex_;
143 #ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
144   // Thread local storage
145   static __thread ThreadData* tls_;
146 #endif
147 
148   // Used to make thread exit trigger possible if !defined(OS_MACOSX).
149   // Otherwise, used to retrieve thread data.
150   pthread_key_t pthread_key_;
151 };
152 
153 
154 #ifdef ROCKSDB_SUPPORT_THREAD_LOCAL
155 __thread ThreadData* ThreadLocalPtr::StaticMeta::tls_ = nullptr;
156 #endif
157 
158 // Windows doesn't support a per-thread destructor with its
159 // TLS primitives.  So, we build it manually by inserting a
160 // function to be called on each thread's exit.
161 // See http://www.codeproject.com/Articles/8113/Thread-Local-Storage-The-C-Way
162 // and http://www.nynaeve.net/?p=183
163 //
164 // really we do this to have clear conscience since using TLS with thread-pools
165 // is iffy
166 // although OK within a request. But otherwise, threads have no identity in its
167 // modern use.
168 
169 // This runs on windows only called from the System Loader
170 #ifdef OS_WIN
171 
172 // Windows cleanup routine is invoked from a System Loader with a different
173 // signature so we can not directly hookup the original OnThreadExit which is
174 // private member
175 // so we make StaticMeta class share with the us the address of the function so
176 // we can invoke it.
177 namespace wintlscleanup {
178 
179 // This is set to OnThreadExit in StaticMeta singleton constructor
180 UnrefHandler thread_local_inclass_routine = nullptr;
181 pthread_key_t thread_local_key = pthread_key_t (-1);
182 
183 // Static callback function to call with each thread termination.
WinOnThreadExit(PVOID module,DWORD reason,PVOID reserved)184 void NTAPI WinOnThreadExit(PVOID module, DWORD reason, PVOID reserved) {
185   // We decided to punt on PROCESS_EXIT
186   if (DLL_THREAD_DETACH == reason) {
187     if (thread_local_key != pthread_key_t(-1) &&
188         thread_local_inclass_routine != nullptr) {
189       void* tls = TlsGetValue(thread_local_key);
190       if (tls != nullptr) {
191         thread_local_inclass_routine(tls);
192       }
193     }
194   }
195 }
196 
197 }  // wintlscleanup
198 
199 // extern "C" suppresses C++ name mangling so we know the symbol name for the
200 // linker /INCLUDE:symbol pragma above.
201 extern "C" {
202 
203 #ifdef _MSC_VER
204 // The linker must not discard thread_callback_on_exit.  (We force a reference
205 // to this variable with a linker /include:symbol pragma to ensure that.) If
206 // this variable is discarded, the OnThreadExit function will never be called.
207 #ifndef _X86_
208 
209 // .CRT section is merged with .rdata on x64 so it must be constant data.
210 #pragma const_seg(".CRT$XLB")
211 // When defining a const variable, it must have external linkage to be sure the
212 // linker doesn't discard it.
213 extern const PIMAGE_TLS_CALLBACK p_thread_callback_on_exit;
214 const PIMAGE_TLS_CALLBACK p_thread_callback_on_exit =
215     wintlscleanup::WinOnThreadExit;
216 // Reset the default section.
217 #pragma const_seg()
218 
219 #pragma comment(linker, "/include:_tls_used")
220 #pragma comment(linker, "/include:p_thread_callback_on_exit")
221 
222 #else  // _X86_
223 
224 #pragma data_seg(".CRT$XLB")
225 PIMAGE_TLS_CALLBACK p_thread_callback_on_exit = wintlscleanup::WinOnThreadExit;
226 // Reset the default section.
227 #pragma data_seg()
228 
229 #pragma comment(linker, "/INCLUDE:__tls_used")
230 #pragma comment(linker, "/INCLUDE:_p_thread_callback_on_exit")
231 
232 #endif  // _X86_
233 
234 #else
235 // https://github.com/couchbase/gperftools/blob/master/src/windows/port.cc
236 BOOL WINAPI DllMain(HINSTANCE h, DWORD dwReason, PVOID pv) {
237   if (dwReason == DLL_THREAD_DETACH)
238     wintlscleanup::WinOnThreadExit(h, dwReason, pv);
239   return TRUE;
240 }
241 #endif
242 }  // extern "C"
243 
244 #endif  // OS_WIN
245 
InitSingletons()246 void ThreadLocalPtr::InitSingletons() { ThreadLocalPtr::Instance(); }
247 
Instance()248 ThreadLocalPtr::StaticMeta* ThreadLocalPtr::Instance() {
249   // Here we prefer function static variable instead of global
250   // static variable as function static variable is initialized
251   // when the function is first call.  As a result, we can properly
252   // control their construction order by properly preparing their
253   // first function call.
254   //
255   // Note that here we decide to make "inst" a static pointer w/o deleting
256   // it at the end instead of a static variable.  This is to avoid the following
257   // destruction order disaster happens when a child thread using ThreadLocalPtr
258   // dies AFTER the main thread dies:  When a child thread happens to use
259   // ThreadLocalPtr, it will try to delete its thread-local data on its
260   // OnThreadExit when the child thread dies.  However, OnThreadExit depends
261   // on the following variable.  As a result, if the main thread dies before any
262   // child thread happen to use ThreadLocalPtr dies, then the destruction of
263   // the following variable will go first, then OnThreadExit, therefore causing
264   // invalid access.
265   //
266   // The above problem can be solved by using thread_local to store tls_ instead
267   // of using __thread.  The major difference between thread_local and __thread
268   // is that thread_local supports dynamic construction and destruction of
269   // non-primitive typed variables.  As a result, we can guarantee the
270   // destruction order even when the main thread dies before any child threads.
271   // However, thread_local is not supported in all compilers that accept -std=c++11
272   // (e.g., eg Mac with XCode < 8. XCode 8+ supports thread_local).
273   static ThreadLocalPtr::StaticMeta* inst = new ThreadLocalPtr::StaticMeta();
274   return inst;
275 }
276 
Mutex()277 port::Mutex* ThreadLocalPtr::StaticMeta::Mutex() { return &Instance()->mutex_; }
278 
OnThreadExit(void * ptr)279 void ThreadLocalPtr::StaticMeta::OnThreadExit(void* ptr) {
280   auto* tls = static_cast<ThreadData*>(ptr);
281   assert(tls != nullptr);
282 
283   // Use the cached StaticMeta::Instance() instead of directly calling
284   // the variable inside StaticMeta::Instance() might already go out of
285   // scope here in case this OnThreadExit is called after the main thread
286   // dies.
287   auto* inst = tls->inst;
288   pthread_setspecific(inst->pthread_key_, nullptr);
289 
290   MutexLock l(inst->MemberMutex());
291   inst->RemoveThreadData(tls);
292   // Unref stored pointers of current thread from all instances
293   uint32_t id = 0;
294   for (auto& e : tls->entries) {
295     void* raw = e.ptr.load();
296     if (raw != nullptr) {
297       auto unref = inst->GetHandler(id);
298       if (unref != nullptr) {
299         unref(raw);
300       }
301     }
302     ++id;
303   }
304   // Delete thread local structure no matter if it is Mac platform
305   delete tls;
306 }
307 
StaticMeta()308 ThreadLocalPtr::StaticMeta::StaticMeta()
309   : next_instance_id_(0),
310     head_(this),
311     pthread_key_(0) {
312   if (pthread_key_create(&pthread_key_, &OnThreadExit) != 0) {
313     abort();
314   }
315 
316   // OnThreadExit is not getting called on the main thread.
317   // Call through the static destructor mechanism to avoid memory leak.
318   //
319   // Caveats: ~A() will be invoked _after_ ~StaticMeta for the global
320   // singleton (destructors are invoked in reverse order of constructor
321   // _completion_); the latter must not mutate internal members. This
322   // cleanup mechanism inherently relies on use-after-release of the
323   // StaticMeta, and is brittle with respect to compiler-specific handling
324   // of memory backing destructed statically-scoped objects. Perhaps
325   // registering with atexit(3) would be more robust.
326   //
327 // This is not required on Windows.
328 #if !defined(OS_WIN)
329   static struct A {
330     ~A() {
331 #ifndef ROCKSDB_SUPPORT_THREAD_LOCAL
332       ThreadData* tls_ =
333         static_cast<ThreadData*>(pthread_getspecific(Instance()->pthread_key_));
334 #endif
335       if (tls_) {
336         OnThreadExit(tls_);
337       }
338     }
339   } a;
340 #endif  // !defined(OS_WIN)
341 
342   head_.next = &head_;
343   head_.prev = &head_;
344 
345 #ifdef OS_WIN
346   // Share with Windows its cleanup routine and the key
347   wintlscleanup::thread_local_inclass_routine = OnThreadExit;
348   wintlscleanup::thread_local_key = pthread_key_;
349 #endif
350 }
351 
AddThreadData(ThreadData * d)352 void ThreadLocalPtr::StaticMeta::AddThreadData(ThreadData* d) {
353   Mutex()->AssertHeld();
354   d->next = &head_;
355   d->prev = head_.prev;
356   head_.prev->next = d;
357   head_.prev = d;
358 }
359 
RemoveThreadData(ThreadData * d)360 void ThreadLocalPtr::StaticMeta::RemoveThreadData(
361     ThreadData* d) {
362   Mutex()->AssertHeld();
363   d->next->prev = d->prev;
364   d->prev->next = d->next;
365   d->next = d->prev = d;
366 }
367 
GetThreadLocal()368 ThreadData* ThreadLocalPtr::StaticMeta::GetThreadLocal() {
369 #ifndef ROCKSDB_SUPPORT_THREAD_LOCAL
370   // Make this local variable name look like a member variable so that we
371   // can share all the code below
372   ThreadData* tls_ =
373       static_cast<ThreadData*>(pthread_getspecific(Instance()->pthread_key_));
374 #endif
375 
376   if (UNLIKELY(tls_ == nullptr)) {
377     auto* inst = Instance();
378     tls_ = new ThreadData(inst);
379     {
380       // Register it in the global chain, needs to be done before thread exit
381       // handler registration
382       MutexLock l(Mutex());
383       inst->AddThreadData(tls_);
384     }
385     // Even it is not OS_MACOSX, need to register value for pthread_key_ so that
386     // its exit handler will be triggered.
387     if (pthread_setspecific(inst->pthread_key_, tls_) != 0) {
388       {
389         MutexLock l(Mutex());
390         inst->RemoveThreadData(tls_);
391       }
392       delete tls_;
393       abort();
394     }
395   }
396   return tls_;
397 }
398 
Get(uint32_t id) const399 void* ThreadLocalPtr::StaticMeta::Get(uint32_t id) const {
400   auto* tls = GetThreadLocal();
401   if (UNLIKELY(id >= tls->entries.size())) {
402     return nullptr;
403   }
404   return tls->entries[id].ptr.load(std::memory_order_acquire);
405 }
406 
Reset(uint32_t id,void * ptr)407 void ThreadLocalPtr::StaticMeta::Reset(uint32_t id, void* ptr) {
408   auto* tls = GetThreadLocal();
409   if (UNLIKELY(id >= tls->entries.size())) {
410     // Need mutex to protect entries access within ReclaimId
411     MutexLock l(Mutex());
412     tls->entries.resize(id + 1);
413   }
414   tls->entries[id].ptr.store(ptr, std::memory_order_release);
415 }
416 
Swap(uint32_t id,void * ptr)417 void* ThreadLocalPtr::StaticMeta::Swap(uint32_t id, void* ptr) {
418   auto* tls = GetThreadLocal();
419   if (UNLIKELY(id >= tls->entries.size())) {
420     // Need mutex to protect entries access within ReclaimId
421     MutexLock l(Mutex());
422     tls->entries.resize(id + 1);
423   }
424   return tls->entries[id].ptr.exchange(ptr, std::memory_order_acquire);
425 }
426 
CompareAndSwap(uint32_t id,void * ptr,void * & expected)427 bool ThreadLocalPtr::StaticMeta::CompareAndSwap(uint32_t id, void* ptr,
428     void*& expected) {
429   auto* tls = GetThreadLocal();
430   if (UNLIKELY(id >= tls->entries.size())) {
431     // Need mutex to protect entries access within ReclaimId
432     MutexLock l(Mutex());
433     tls->entries.resize(id + 1);
434   }
435   return tls->entries[id].ptr.compare_exchange_strong(
436       expected, ptr, std::memory_order_release, std::memory_order_relaxed);
437 }
438 
Scrape(uint32_t id,autovector<void * > * ptrs,void * const replacement)439 void ThreadLocalPtr::StaticMeta::Scrape(uint32_t id, autovector<void*>* ptrs,
440     void* const replacement) {
441   MutexLock l(Mutex());
442   for (ThreadData* t = head_.next; t != &head_; t = t->next) {
443     if (id < t->entries.size()) {
444       void* ptr =
445           t->entries[id].ptr.exchange(replacement, std::memory_order_acquire);
446       if (ptr != nullptr) {
447         ptrs->push_back(ptr);
448       }
449     }
450   }
451 }
452 
Fold(uint32_t id,FoldFunc func,void * res)453 void ThreadLocalPtr::StaticMeta::Fold(uint32_t id, FoldFunc func, void* res) {
454   MutexLock l(Mutex());
455   for (ThreadData* t = head_.next; t != &head_; t = t->next) {
456     if (id < t->entries.size()) {
457       void* ptr = t->entries[id].ptr.load();
458       if (ptr != nullptr) {
459         func(ptr, res);
460       }
461     }
462   }
463 }
464 
TEST_PeekId()465 uint32_t ThreadLocalPtr::TEST_PeekId() {
466   return Instance()->PeekId();
467 }
468 
SetHandler(uint32_t id,UnrefHandler handler)469 void ThreadLocalPtr::StaticMeta::SetHandler(uint32_t id, UnrefHandler handler) {
470   MutexLock l(Mutex());
471   handler_map_[id] = handler;
472 }
473 
GetHandler(uint32_t id)474 UnrefHandler ThreadLocalPtr::StaticMeta::GetHandler(uint32_t id) {
475   Mutex()->AssertHeld();
476   auto iter = handler_map_.find(id);
477   if (iter == handler_map_.end()) {
478     return nullptr;
479   }
480   return iter->second;
481 }
482 
GetId()483 uint32_t ThreadLocalPtr::StaticMeta::GetId() {
484   MutexLock l(Mutex());
485   if (free_instance_ids_.empty()) {
486     return next_instance_id_++;
487   }
488 
489   uint32_t id = free_instance_ids_.back();
490   free_instance_ids_.pop_back();
491   return id;
492 }
493 
PeekId() const494 uint32_t ThreadLocalPtr::StaticMeta::PeekId() const {
495   MutexLock l(Mutex());
496   if (!free_instance_ids_.empty()) {
497     return free_instance_ids_.back();
498   }
499   return next_instance_id_;
500 }
501 
ReclaimId(uint32_t id)502 void ThreadLocalPtr::StaticMeta::ReclaimId(uint32_t id) {
503   // This id is not used, go through all thread local data and release
504   // corresponding value
505   MutexLock l(Mutex());
506   auto unref = GetHandler(id);
507   for (ThreadData* t = head_.next; t != &head_; t = t->next) {
508     if (id < t->entries.size()) {
509       void* ptr = t->entries[id].ptr.exchange(nullptr);
510       if (ptr != nullptr && unref != nullptr) {
511         unref(ptr);
512       }
513     }
514   }
515   handler_map_[id] = nullptr;
516   free_instance_ids_.push_back(id);
517 }
518 
ThreadLocalPtr(UnrefHandler handler)519 ThreadLocalPtr::ThreadLocalPtr(UnrefHandler handler)
520     : id_(Instance()->GetId()) {
521   if (handler != nullptr) {
522     Instance()->SetHandler(id_, handler);
523   }
524 }
525 
~ThreadLocalPtr()526 ThreadLocalPtr::~ThreadLocalPtr() {
527   Instance()->ReclaimId(id_);
528 }
529 
Get() const530 void* ThreadLocalPtr::Get() const {
531   return Instance()->Get(id_);
532 }
533 
Reset(void * ptr)534 void ThreadLocalPtr::Reset(void* ptr) {
535   Instance()->Reset(id_, ptr);
536 }
537 
Swap(void * ptr)538 void* ThreadLocalPtr::Swap(void* ptr) {
539   return Instance()->Swap(id_, ptr);
540 }
541 
CompareAndSwap(void * ptr,void * & expected)542 bool ThreadLocalPtr::CompareAndSwap(void* ptr, void*& expected) {
543   return Instance()->CompareAndSwap(id_, ptr, expected);
544 }
545 
Scrape(autovector<void * > * ptrs,void * const replacement)546 void ThreadLocalPtr::Scrape(autovector<void*>* ptrs, void* const replacement) {
547   Instance()->Scrape(id_, ptrs, replacement);
548 }
549 
Fold(FoldFunc func,void * res)550 void ThreadLocalPtr::Fold(FoldFunc func, void* res) {
551   Instance()->Fold(id_, func, res);
552 }
553 
554 }  // namespace ROCKSDB_NAMESPACE
555