1 // Copyright (c) 2011-present, Facebook, Inc. All rights reserved. 2 // This source code is licensed under both the GPLv2 (found in the 3 // COPYING file in the root directory) and Apache 2.0 License 4 // (found in the LICENSE.Apache file in the root directory). 5 6 #pragma once 7 8 #include <stdint.h> 9 #include <map> 10 #include <string> 11 12 #include "rocksdb/perf_level.h" 13 14 namespace ROCKSDB_NAMESPACE { 15 16 // A thread local context for gathering performance counter efficiently 17 // and transparently. 18 // Use SetPerfLevel(PerfLevel::kEnableTime) to enable time stats. 19 20 // Break down performance counters by level and store per-level perf context in 21 // PerfContextByLevel 22 struct PerfContextByLevel { 23 // # of times bloom filter has avoided file reads, i.e., negatives. 24 uint64_t bloom_filter_useful = 0; 25 // # of times bloom FullFilter has not avoided the reads. 26 uint64_t bloom_filter_full_positive = 0; 27 // # of times bloom FullFilter has not avoided the reads and data actually 28 // exist. 29 uint64_t bloom_filter_full_true_positive = 0; 30 31 // total number of user key returned (only include keys that are found, does 32 // not include keys that are deleted or merged without a final put 33 uint64_t user_key_return_count; 34 35 // total nanos spent on reading data from SST files 36 uint64_t get_from_table_nanos; 37 38 uint64_t block_cache_hit_count = 0; // total number of block cache hits 39 uint64_t block_cache_miss_count = 0; // total number of block cache misses 40 41 void Reset(); // reset all performance counters to zero 42 }; 43 44 struct PerfContext { 45 ~PerfContext(); 46 PerfContextPerfContext47 PerfContext() {} 48 49 PerfContext(const PerfContext&); 50 PerfContext& operator=(const PerfContext&); 51 PerfContext(PerfContext&&) noexcept; 52 53 void Reset(); // reset all performance counters to zero 54 55 std::string ToString(bool exclude_zero_counters = false) const; 56 57 // enable per level perf context and allocate storage for PerfContextByLevel 58 void EnablePerLevelPerfContext(); 59 60 // temporarily disable per level perf contxt by setting the flag to false 61 void DisablePerLevelPerfContext(); 62 63 // free the space for PerfContextByLevel, also disable per level perf context 64 void ClearPerLevelPerfContext(); 65 66 uint64_t user_key_comparison_count; // total number of user key comparisons 67 uint64_t block_cache_hit_count; // total number of block cache hits 68 uint64_t block_read_count; // total number of block reads (with IO) 69 uint64_t block_read_byte; // total number of bytes from block reads 70 uint64_t block_read_time; // total nanos spent on block reads 71 uint64_t block_cache_index_hit_count; // total number of index block hits 72 uint64_t index_block_read_count; // total number of index block reads 73 uint64_t block_cache_filter_hit_count; // total number of filter block hits 74 uint64_t filter_block_read_count; // total number of filter block reads 75 uint64_t compression_dict_block_read_count; // total number of compression 76 // dictionary block reads 77 uint64_t block_checksum_time; // total nanos spent on block checksum 78 uint64_t block_decompress_time; // total nanos spent on block decompression 79 80 uint64_t get_read_bytes; // bytes for vals returned by Get 81 uint64_t multiget_read_bytes; // bytes for vals returned by MultiGet 82 uint64_t iter_read_bytes; // bytes for keys/vals decoded by iterator 83 84 // total number of internal keys skipped over during iteration. 85 // There are several reasons for it: 86 // 1. when calling Next(), the iterator is in the position of the previous 87 // key, so that we'll need to skip it. It means this counter will always 88 // be incremented in Next(). 89 // 2. when calling Next(), we need to skip internal entries for the previous 90 // keys that are overwritten. 91 // 3. when calling Next(), Seek() or SeekToFirst(), after previous key 92 // before calling Next(), the seek key in Seek() or the beginning for 93 // SeekToFirst(), there may be one or more deleted keys before the next 94 // valid key that the operation should place the iterator to. We need 95 // to skip both of the tombstone and updates hidden by the tombstones. The 96 // tombstones are not included in this counter, while previous updates 97 // hidden by the tombstones will be included here. 98 // 4. symmetric cases for Prev() and SeekToLast() 99 // internal_recent_skipped_count is not included in this counter. 100 // 101 uint64_t internal_key_skipped_count; 102 // Total number of deletes and single deletes skipped over during iteration 103 // When calling Next(), Seek() or SeekToFirst(), after previous position 104 // before calling Next(), the seek key in Seek() or the beginning for 105 // SeekToFirst(), there may be one or more deleted keys before the next valid 106 // key. Every deleted key is counted once. We don't recount here if there are 107 // still older updates invalidated by the tombstones. 108 // 109 uint64_t internal_delete_skipped_count; 110 // How many times iterators skipped over internal keys that are more recent 111 // than the snapshot that iterator is using. 112 // 113 uint64_t internal_recent_skipped_count; 114 // How many values were fed into merge operator by iterators. 115 // 116 uint64_t internal_merge_count; 117 118 uint64_t get_snapshot_time; // total nanos spent on getting snapshot 119 uint64_t get_from_memtable_time; // total nanos spent on querying memtables 120 uint64_t get_from_memtable_count; // number of mem tables queried 121 // total nanos spent after Get() finds a key 122 uint64_t get_post_process_time; 123 uint64_t get_from_output_files_time; // total nanos reading from output files 124 // total nanos spent on seeking memtable 125 uint64_t seek_on_memtable_time; 126 // number of seeks issued on memtable 127 // (including SeekForPrev but not SeekToFirst and SeekToLast) 128 uint64_t seek_on_memtable_count; 129 // number of Next()s issued on memtable 130 uint64_t next_on_memtable_count; 131 // number of Prev()s issued on memtable 132 uint64_t prev_on_memtable_count; 133 // total nanos spent on seeking child iters 134 uint64_t seek_child_seek_time; 135 // number of seek issued in child iterators 136 uint64_t seek_child_seek_count; 137 uint64_t seek_min_heap_time; // total nanos spent on the merge min heap 138 uint64_t seek_max_heap_time; // total nanos spent on the merge max heap 139 // total nanos spent on seeking the internal entries 140 uint64_t seek_internal_seek_time; 141 // total nanos spent on iterating internal entries to find the next user entry 142 uint64_t find_next_user_entry_time; 143 144 // This group of stats provide a breakdown of time spent by Write(). 145 // May be inaccurate when 2PC, two_write_queues or enable_pipelined_write 146 // are enabled. 147 // 148 // total nanos spent on writing to WAL 149 uint64_t write_wal_time; 150 // total nanos spent on writing to mem tables 151 uint64_t write_memtable_time; 152 // total nanos spent on delaying or throttling write 153 uint64_t write_delay_time; 154 // total nanos spent on switching memtable/wal and scheduling 155 // flushes/compactions. 156 uint64_t write_scheduling_flushes_compactions_time; 157 // total nanos spent on writing a record, excluding the above four things 158 uint64_t write_pre_and_post_process_time; 159 160 // time spent waiting for other threads of the batch group 161 uint64_t write_thread_wait_nanos; 162 163 // time spent on acquiring DB mutex. 164 uint64_t db_mutex_lock_nanos; 165 // Time spent on waiting with a condition variable created with DB mutex. 166 uint64_t db_condition_wait_nanos; 167 // Time spent on merge operator. 168 uint64_t merge_operator_time_nanos; 169 170 // Time spent on reading index block from block cache or SST file 171 uint64_t read_index_block_nanos; 172 // Time spent on reading filter block from block cache or SST file 173 uint64_t read_filter_block_nanos; 174 // Time spent on creating data block iterator 175 uint64_t new_table_block_iter_nanos; 176 // Time spent on creating a iterator of an SST file. 177 uint64_t new_table_iterator_nanos; 178 // Time spent on seeking a key in data/index blocks 179 uint64_t block_seek_nanos; 180 // Time spent on finding or creating a table reader 181 uint64_t find_table_nanos; 182 // total number of mem table bloom hits 183 uint64_t bloom_memtable_hit_count; 184 // total number of mem table bloom misses 185 uint64_t bloom_memtable_miss_count; 186 // total number of SST table bloom hits 187 uint64_t bloom_sst_hit_count; 188 // total number of SST table bloom misses 189 uint64_t bloom_sst_miss_count; 190 191 // Time spent waiting on key locks in transaction lock manager. 192 uint64_t key_lock_wait_time; 193 // number of times acquiring a lock was blocked by another transaction. 194 uint64_t key_lock_wait_count; 195 196 // Total time spent in Env filesystem operations. These are only populated 197 // when TimedEnv is used. 198 uint64_t env_new_sequential_file_nanos; 199 uint64_t env_new_random_access_file_nanos; 200 uint64_t env_new_writable_file_nanos; 201 uint64_t env_reuse_writable_file_nanos; 202 uint64_t env_new_random_rw_file_nanos; 203 uint64_t env_new_directory_nanos; 204 uint64_t env_file_exists_nanos; 205 uint64_t env_get_children_nanos; 206 uint64_t env_get_children_file_attributes_nanos; 207 uint64_t env_delete_file_nanos; 208 uint64_t env_create_dir_nanos; 209 uint64_t env_create_dir_if_missing_nanos; 210 uint64_t env_delete_dir_nanos; 211 uint64_t env_get_file_size_nanos; 212 uint64_t env_get_file_modification_time_nanos; 213 uint64_t env_rename_file_nanos; 214 uint64_t env_link_file_nanos; 215 uint64_t env_lock_file_nanos; 216 uint64_t env_unlock_file_nanos; 217 uint64_t env_new_logger_nanos; 218 219 uint64_t get_cpu_nanos; 220 uint64_t iter_next_cpu_nanos; 221 uint64_t iter_prev_cpu_nanos; 222 uint64_t iter_seek_cpu_nanos; 223 224 std::map<uint32_t, PerfContextByLevel>* level_to_perf_context = nullptr; 225 bool per_level_perf_context_enabled = false; 226 }; 227 228 // Get Thread-local PerfContext object pointer 229 // if defined(NPERF_CONTEXT), then the pointer is not thread-local 230 PerfContext* get_perf_context(); 231 232 } // namespace ROCKSDB_NAMESPACE 233