1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 
6 #ifndef ROCKSDB_LITE
7 #include "table/plain/plain_table_builder.h"
8 
9 #include <assert.h>
10 
11 #include <string>
12 #include <limits>
13 #include <map>
14 
15 #include "db/dbformat.h"
16 #include "file/writable_file_writer.h"
17 #include "rocksdb/comparator.h"
18 #include "rocksdb/env.h"
19 #include "rocksdb/filter_policy.h"
20 #include "rocksdb/options.h"
21 #include "rocksdb/table.h"
22 #include "table/block_based/block_builder.h"
23 #include "table/format.h"
24 #include "table/meta_blocks.h"
25 #include "table/plain/plain_table_bloom.h"
26 #include "table/plain/plain_table_factory.h"
27 #include "table/plain/plain_table_index.h"
28 #include "util/coding.h"
29 #include "util/crc32c.h"
30 #include "util/stop_watch.h"
31 
32 namespace ROCKSDB_NAMESPACE {
33 
34 namespace {
35 
36 // a utility that helps writing block content to the file
37 //   @offset will advance if @block_contents was successfully written.
38 //   @block_handle the block handle this particular block.
WriteBlock(const Slice & block_contents,WritableFileWriter * file,uint64_t * offset,BlockHandle * block_handle)39 IOStatus WriteBlock(const Slice& block_contents, WritableFileWriter* file,
40                     uint64_t* offset, BlockHandle* block_handle) {
41   block_handle->set_offset(*offset);
42   block_handle->set_size(block_contents.size());
43   IOStatus io_s = file->Append(block_contents);
44 
45   if (io_s.ok()) {
46     *offset += block_contents.size();
47   }
48   return io_s;
49 }
50 
51 }  // namespace
52 
53 // kPlainTableMagicNumber was picked by running
54 //    echo rocksdb.table.plain | sha1sum
55 // and taking the leading 64 bits.
56 extern const uint64_t kPlainTableMagicNumber = 0x8242229663bf9564ull;
57 extern const uint64_t kLegacyPlainTableMagicNumber = 0x4f3418eb7a8f13b8ull;
58 
PlainTableBuilder(const ImmutableCFOptions & ioptions,const MutableCFOptions & moptions,const std::vector<std::unique_ptr<IntTblPropCollectorFactory>> * int_tbl_prop_collector_factories,uint32_t column_family_id,WritableFileWriter * file,uint32_t user_key_len,EncodingType encoding_type,size_t index_sparseness,uint32_t bloom_bits_per_key,const std::string & column_family_name,uint32_t num_probes,size_t huge_page_tlb_size,double hash_table_ratio,bool store_index_in_file)59 PlainTableBuilder::PlainTableBuilder(
60     const ImmutableCFOptions& ioptions, const MutableCFOptions& moptions,
61     const std::vector<std::unique_ptr<IntTblPropCollectorFactory>>*
62         int_tbl_prop_collector_factories,
63     uint32_t column_family_id, WritableFileWriter* file, uint32_t user_key_len,
64     EncodingType encoding_type, size_t index_sparseness,
65     uint32_t bloom_bits_per_key, const std::string& column_family_name,
66     uint32_t num_probes, size_t huge_page_tlb_size, double hash_table_ratio,
67     bool store_index_in_file)
68     : ioptions_(ioptions),
69       moptions_(moptions),
70       bloom_block_(num_probes),
71       file_(file),
72       bloom_bits_per_key_(bloom_bits_per_key),
73       huge_page_tlb_size_(huge_page_tlb_size),
74       encoder_(encoding_type, user_key_len, moptions.prefix_extractor.get(),
75                index_sparseness),
76       store_index_in_file_(store_index_in_file),
77       prefix_extractor_(moptions.prefix_extractor.get()) {
78   // Build index block and save it in the file if hash_table_ratio > 0
79   if (store_index_in_file_) {
80     assert(hash_table_ratio > 0 || IsTotalOrderMode());
81     index_builder_.reset(new PlainTableIndexBuilder(
82         &arena_, ioptions, moptions.prefix_extractor.get(), index_sparseness,
83         hash_table_ratio, huge_page_tlb_size_));
84     properties_.user_collected_properties
85         [PlainTablePropertyNames::kBloomVersion] = "1";  // For future use
86   }
87 
88   properties_.fixed_key_len = user_key_len;
89 
90   // for plain table, we put all the data in a big chuck.
91   properties_.num_data_blocks = 1;
92   // Fill it later if store_index_in_file_ == true
93   properties_.index_size = 0;
94   properties_.filter_size = 0;
95   // To support roll-back to previous version, now still use version 0 for
96   // plain encoding.
97   properties_.format_version = (encoding_type == kPlain) ? 0 : 1;
98   properties_.column_family_id = column_family_id;
99   properties_.column_family_name = column_family_name;
100   properties_.prefix_extractor_name = moptions_.prefix_extractor != nullptr
101                                           ? moptions_.prefix_extractor->Name()
102                                           : "nullptr";
103 
104   std::string val;
105   PutFixed32(&val, static_cast<uint32_t>(encoder_.GetEncodingType()));
106   properties_.user_collected_properties
107       [PlainTablePropertyNames::kEncodingType] = val;
108 
109   for (auto& collector_factories : *int_tbl_prop_collector_factories) {
110     table_properties_collectors_.emplace_back(
111         collector_factories->CreateIntTblPropCollector(column_family_id));
112   }
113 }
114 
~PlainTableBuilder()115 PlainTableBuilder::~PlainTableBuilder() {
116 }
117 
Add(const Slice & key,const Slice & value)118 void PlainTableBuilder::Add(const Slice& key, const Slice& value) {
119   // temp buffer for metadata bytes between key and value.
120   char meta_bytes_buf[6];
121   size_t meta_bytes_buf_size = 0;
122 
123   ParsedInternalKey internal_key;
124   if (!ParseInternalKey(key, &internal_key)) {
125     assert(false);
126     return;
127   }
128   if (internal_key.type == kTypeRangeDeletion) {
129     status_ = Status::NotSupported("Range deletion unsupported");
130     return;
131   }
132 
133   // Store key hash
134   if (store_index_in_file_) {
135     if (moptions_.prefix_extractor == nullptr) {
136       keys_or_prefixes_hashes_.push_back(GetSliceHash(internal_key.user_key));
137     } else {
138       Slice prefix =
139           moptions_.prefix_extractor->Transform(internal_key.user_key);
140       keys_or_prefixes_hashes_.push_back(GetSliceHash(prefix));
141     }
142   }
143 
144   // Write value
145   assert(offset_ <= std::numeric_limits<uint32_t>::max());
146   auto prev_offset = static_cast<uint32_t>(offset_);
147   // Write out the key
148   io_status_ = encoder_.AppendKey(key, file_, &offset_, meta_bytes_buf,
149                                   &meta_bytes_buf_size);
150   if (SaveIndexInFile()) {
151     index_builder_->AddKeyPrefix(GetPrefix(internal_key), prev_offset);
152   }
153 
154   // Write value length
155   uint32_t value_size = static_cast<uint32_t>(value.size());
156   if (io_status_.ok()) {
157     char* end_ptr =
158         EncodeVarint32(meta_bytes_buf + meta_bytes_buf_size, value_size);
159     assert(end_ptr <= meta_bytes_buf + sizeof(meta_bytes_buf));
160     meta_bytes_buf_size = end_ptr - meta_bytes_buf;
161     io_status_ = file_->Append(Slice(meta_bytes_buf, meta_bytes_buf_size));
162   }
163 
164   // Write value
165   if (io_status_.ok()) {
166     io_status_ = file_->Append(value);
167     offset_ += value_size + meta_bytes_buf_size;
168   }
169 
170   if (io_status_.ok()) {
171     properties_.num_entries++;
172     properties_.raw_key_size += key.size();
173     properties_.raw_value_size += value.size();
174     if (internal_key.type == kTypeDeletion ||
175         internal_key.type == kTypeSingleDeletion) {
176       properties_.num_deletions++;
177     } else if (internal_key.type == kTypeMerge) {
178       properties_.num_merge_operands++;
179     }
180   }
181 
182   // notify property collectors
183   NotifyCollectTableCollectorsOnAdd(
184       key, value, offset_, table_properties_collectors_, ioptions_.info_log);
185   status_ = io_status_;
186 }
187 
Finish()188 Status PlainTableBuilder::Finish() {
189   assert(!closed_);
190   closed_ = true;
191 
192   properties_.data_size = offset_;
193 
194   //  Write the following blocks
195   //  1. [meta block: bloom] - optional
196   //  2. [meta block: index] - optional
197   //  3. [meta block: properties]
198   //  4. [metaindex block]
199   //  5. [footer]
200 
201   MetaIndexBuilder meta_index_builer;
202 
203   if (store_index_in_file_ && (properties_.num_entries > 0)) {
204     assert(properties_.num_entries <= std::numeric_limits<uint32_t>::max());
205     Status s;
206     BlockHandle bloom_block_handle;
207     if (bloom_bits_per_key_ > 0) {
208       bloom_block_.SetTotalBits(
209           &arena_,
210           static_cast<uint32_t>(properties_.num_entries) * bloom_bits_per_key_,
211           ioptions_.bloom_locality, huge_page_tlb_size_, ioptions_.info_log);
212 
213       PutVarint32(&properties_.user_collected_properties
214                        [PlainTablePropertyNames::kNumBloomBlocks],
215                   bloom_block_.GetNumBlocks());
216 
217       bloom_block_.AddKeysHashes(keys_or_prefixes_hashes_);
218 
219       Slice bloom_finish_result = bloom_block_.Finish();
220 
221       properties_.filter_size = bloom_finish_result.size();
222       io_status_ =
223           WriteBlock(bloom_finish_result, file_, &offset_, &bloom_block_handle);
224 
225       if (!io_status_.ok()) {
226         status_ = io_status_;
227         return status_;
228       }
229       meta_index_builer.Add(BloomBlockBuilder::kBloomBlock, bloom_block_handle);
230     }
231     BlockHandle index_block_handle;
232     Slice index_finish_result = index_builder_->Finish();
233 
234     properties_.index_size = index_finish_result.size();
235     io_status_ =
236         WriteBlock(index_finish_result, file_, &offset_, &index_block_handle);
237 
238     if (!io_status_.ok()) {
239       status_ = io_status_;
240       return status_;
241     }
242 
243     meta_index_builer.Add(PlainTableIndexBuilder::kPlainTableIndexBlock,
244                           index_block_handle);
245   }
246 
247   // Calculate bloom block size and index block size
248   PropertyBlockBuilder property_block_builder;
249   // -- Add basic properties
250   property_block_builder.AddTableProperty(properties_);
251 
252   property_block_builder.Add(properties_.user_collected_properties);
253 
254   // -- Add user collected properties
255   NotifyCollectTableCollectorsOnFinish(table_properties_collectors_,
256                                        ioptions_.info_log,
257                                        &property_block_builder);
258 
259   // -- Write property block
260   BlockHandle property_block_handle;
261   IOStatus s = WriteBlock(property_block_builder.Finish(), file_, &offset_,
262                           &property_block_handle);
263   if (!s.ok()) {
264     return std::move(s);
265   }
266   meta_index_builer.Add(kPropertiesBlock, property_block_handle);
267 
268   // -- write metaindex block
269   BlockHandle metaindex_block_handle;
270   io_status_ = WriteBlock(meta_index_builer.Finish(), file_, &offset_,
271                           &metaindex_block_handle);
272   if (!io_status_.ok()) {
273     status_ = io_status_;
274     return status_;
275   }
276 
277   // Write Footer
278   // no need to write out new footer if we're using default checksum
279   Footer footer(kLegacyPlainTableMagicNumber, 0);
280   footer.set_metaindex_handle(metaindex_block_handle);
281   footer.set_index_handle(BlockHandle::NullBlockHandle());
282   std::string footer_encoding;
283   footer.EncodeTo(&footer_encoding);
284   io_status_ = file_->Append(footer_encoding);
285   if (io_status_.ok()) {
286     offset_ += footer_encoding.size();
287   }
288   status_ = io_status_;
289   return status_;
290 }
291 
Abandon()292 void PlainTableBuilder::Abandon() {
293   closed_ = true;
294 }
295 
NumEntries() const296 uint64_t PlainTableBuilder::NumEntries() const {
297   return properties_.num_entries;
298 }
299 
FileSize() const300 uint64_t PlainTableBuilder::FileSize() const {
301   return offset_;
302 }
303 
GetFileChecksum() const304 std::string PlainTableBuilder::GetFileChecksum() const {
305   if (file_ != nullptr) {
306     return file_->GetFileChecksum();
307   } else {
308     return kUnknownFileChecksum;
309   }
310 }
311 
GetFileChecksumFuncName() const312 const char* PlainTableBuilder::GetFileChecksumFuncName() const {
313   if (file_ != nullptr) {
314     return file_->GetFileChecksumFuncName();
315   } else {
316     return kUnknownFileChecksumFuncName.c_str();
317   }
318 }
319 
320 }  // namespace ROCKSDB_NAMESPACE
321 #endif  // ROCKSDB_LITE
322