1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 
6 #pragma once
7 #ifndef ROCKSDB_LITE
8 
9 #include <chrono>
10 #include <string>
11 #include <unordered_map>
12 #include <memory>
13 #include <utility>
14 #include <vector>
15 
16 #include "monitoring/instrumented_mutex.h"
17 #include "rocksdb/utilities/transaction.h"
18 #include "util/autovector.h"
19 #include "util/hash_map.h"
20 #include "util/thread_local.h"
21 #include "utilities/transactions/pessimistic_transaction.h"
22 
23 namespace ROCKSDB_NAMESPACE {
24 
25 class ColumnFamilyHandle;
26 struct LockInfo;
27 struct LockMap;
28 struct LockMapStripe;
29 
30 struct DeadlockInfoBuffer {
31  private:
32   std::vector<DeadlockPath> paths_buffer_;
33   uint32_t buffer_idx_;
34   std::mutex paths_buffer_mutex_;
35   std::vector<DeadlockPath> Normalize();
36 
37  public:
DeadlockInfoBufferDeadlockInfoBuffer38   explicit DeadlockInfoBuffer(uint32_t n_latest_dlocks)
39       : paths_buffer_(n_latest_dlocks), buffer_idx_(0) {}
40   void AddNewPath(DeadlockPath path);
41   void Resize(uint32_t target_size);
42   std::vector<DeadlockPath> PrepareBuffer();
43 };
44 
45 struct TrackedTrxInfo {
46   autovector<TransactionID> m_neighbors;
47   uint32_t m_cf_id;
48   bool m_exclusive;
49   std::string m_waiting_key;
50 };
51 
52 class Slice;
53 class PessimisticTransactionDB;
54 
55 class TransactionLockMgr {
56  public:
57   TransactionLockMgr(TransactionDB* txn_db, size_t default_num_stripes,
58                      int64_t max_num_locks, uint32_t max_num_deadlocks,
59                      std::shared_ptr<TransactionDBMutexFactory> factory);
60   // No copying allowed
61   TransactionLockMgr(const TransactionLockMgr&) = delete;
62   void operator=(const TransactionLockMgr&) = delete;
63 
64   ~TransactionLockMgr();
65 
66   // Creates a new LockMap for this column family.  Caller should guarantee
67   // that this column family does not already exist.
68   void AddColumnFamily(uint32_t column_family_id);
69 
70   // Deletes the LockMap for this column family.  Caller should guarantee that
71   // this column family is no longer in use.
72   void RemoveColumnFamily(uint32_t column_family_id);
73 
74   // Attempt to lock key.  If OK status is returned, the caller is responsible
75   // for calling UnLock() on this key.
76   Status TryLock(PessimisticTransaction* txn, uint32_t column_family_id,
77                  const std::string& key, Env* env, bool exclusive);
78 
79   // Unlock a key locked by TryLock().  txn must be the same Transaction that
80   // locked this key.
81   void UnLock(const PessimisticTransaction* txn, const TransactionKeyMap* keys,
82               Env* env);
83   void UnLock(PessimisticTransaction* txn, uint32_t column_family_id,
84               const std::string& key, Env* env);
85 
86   using LockStatusData = std::unordered_multimap<uint32_t, KeyLockInfo>;
87   LockStatusData GetLockStatusData();
88   std::vector<DeadlockPath> GetDeadlockInfoBuffer();
89   void Resize(uint32_t);
90 
91  private:
92   PessimisticTransactionDB* txn_db_impl_;
93 
94   // Default number of lock map stripes per column family
95   const size_t default_num_stripes_;
96 
97   // Limit on number of keys locked per column family
98   const int64_t max_num_locks_;
99 
100   // The following lock order must be satisfied in order to avoid deadlocking
101   // ourselves.
102   //   - lock_map_mutex_
103   //   - stripe mutexes in ascending cf id, ascending stripe order
104   //   - wait_txn_map_mutex_
105   //
106   // Must be held when accessing/modifying lock_maps_.
107   InstrumentedMutex lock_map_mutex_;
108 
109   // Map of ColumnFamilyId to locked key info
110   using LockMaps = std::unordered_map<uint32_t, std::shared_ptr<LockMap>>;
111   LockMaps lock_maps_;
112 
113   // Thread-local cache of entries in lock_maps_.  This is an optimization
114   // to avoid acquiring a mutex in order to look up a LockMap
115   std::unique_ptr<ThreadLocalPtr> lock_maps_cache_;
116 
117   // Must be held when modifying wait_txn_map_ and rev_wait_txn_map_.
118   std::mutex wait_txn_map_mutex_;
119 
120   // Maps from waitee -> number of waiters.
121   HashMap<TransactionID, int> rev_wait_txn_map_;
122   // Maps from waiter -> waitee.
123   HashMap<TransactionID, TrackedTrxInfo> wait_txn_map_;
124   DeadlockInfoBuffer dlock_buffer_;
125 
126   // Used to allocate mutexes/condvars to use when locking keys
127   std::shared_ptr<TransactionDBMutexFactory> mutex_factory_;
128 
129   bool IsLockExpired(TransactionID txn_id, const LockInfo& lock_info, Env* env,
130                      uint64_t* wait_time);
131 
132   std::shared_ptr<LockMap> GetLockMap(uint32_t column_family_id);
133 
134   Status AcquireWithTimeout(PessimisticTransaction* txn, LockMap* lock_map,
135                             LockMapStripe* stripe, uint32_t column_family_id,
136                             const std::string& key, Env* env, int64_t timeout,
137                             LockInfo&& lock_info);
138 
139   Status AcquireLocked(LockMap* lock_map, LockMapStripe* stripe,
140                        const std::string& key, Env* env,
141                        LockInfo&& lock_info, uint64_t* wait_time,
142                        autovector<TransactionID>* txn_ids);
143 
144   void UnLockKey(const PessimisticTransaction* txn, const std::string& key,
145                  LockMapStripe* stripe, LockMap* lock_map, Env* env);
146 
147   bool IncrementWaiters(const PessimisticTransaction* txn,
148                         const autovector<TransactionID>& wait_ids,
149                         const std::string& key, const uint32_t& cf_id,
150                         const bool& exclusive, Env* const env);
151   void DecrementWaiters(const PessimisticTransaction* txn,
152                         const autovector<TransactionID>& wait_ids);
153   void DecrementWaitersImpl(const PessimisticTransaction* txn,
154                             const autovector<TransactionID>& wait_ids);
155 };
156 
157 }  // namespace ROCKSDB_NAMESPACE
158 #endif  // ROCKSDB_LITE
159