1 //  Copyright (c) 2011-present, Facebook, Inc.  All rights reserved.
2 //  This source code is licensed under both the GPLv2 (found in the
3 //  COPYING file in the root directory) and Apache 2.0 License
4 //  (found in the LICENSE.Apache file in the root directory).
5 //
6 // Copyright (c) 2011 The LevelDB Authors. All rights reserved.
7 // Use of this source code is governed by a BSD-style license that can be
8 // found in the LICENSE file. See the AUTHORS file for names of contributors.
9 //
10 // Endian-neutral encoding:
11 // * Fixed-length numbers are encoded with least-significant byte first
12 // * In addition we support variable length "varint" encoding
13 // * Strings are encoded prefixed by their length in varint format
14 
15 #pragma once
16 #include <algorithm>
17 #include <stdint.h>
18 #include <string.h>
19 #include <string>
20 
21 #include "rocksdb/write_batch.h"
22 #include "port/port.h"
23 
24 // Some processors does not allow unaligned access to memory
25 #if defined(__sparc)
26   #define PLATFORM_UNALIGNED_ACCESS_NOT_ALLOWED
27 #endif
28 
29 namespace ROCKSDB_NAMESPACE {
30 
31 // The maximum length of a varint in bytes for 64-bit.
32 const unsigned int kMaxVarint64Length = 10;
33 
34 // Standard Put... routines append to a string
35 extern void PutFixed16(std::string* dst, uint16_t value);
36 extern void PutFixed32(std::string* dst, uint32_t value);
37 extern void PutFixed64(std::string* dst, uint64_t value);
38 extern void PutVarint32(std::string* dst, uint32_t value);
39 extern void PutVarint32Varint32(std::string* dst, uint32_t value1,
40                                 uint32_t value2);
41 extern void PutVarint32Varint32Varint32(std::string* dst, uint32_t value1,
42                                         uint32_t value2, uint32_t value3);
43 extern void PutVarint64(std::string* dst, uint64_t value);
44 extern void PutVarint64Varint64(std::string* dst, uint64_t value1,
45                                 uint64_t value2);
46 extern void PutVarint32Varint64(std::string* dst, uint32_t value1,
47                                 uint64_t value2);
48 extern void PutVarint32Varint32Varint64(std::string* dst, uint32_t value1,
49                                         uint32_t value2, uint64_t value3);
50 extern void PutLengthPrefixedSlice(std::string* dst, const Slice& value);
51 extern void PutLengthPrefixedSliceParts(std::string* dst,
52                                         const SliceParts& slice_parts);
53 extern void PutLengthPrefixedSlicePartsWithPadding(
54     std::string* dst, const SliceParts& slice_parts, size_t pad_sz);
55 
56 // Standard Get... routines parse a value from the beginning of a Slice
57 // and advance the slice past the parsed value.
58 extern bool GetFixed64(Slice* input, uint64_t* value);
59 extern bool GetFixed32(Slice* input, uint32_t* value);
60 extern bool GetFixed16(Slice* input, uint16_t* value);
61 extern bool GetVarint32(Slice* input, uint32_t* value);
62 extern bool GetVarint64(Slice* input, uint64_t* value);
63 extern bool GetVarsignedint64(Slice* input, int64_t* value);
64 extern bool GetLengthPrefixedSlice(Slice* input, Slice* result);
65 // This function assumes data is well-formed.
66 extern Slice GetLengthPrefixedSlice(const char* data);
67 
68 extern Slice GetSliceUntil(Slice* slice, char delimiter);
69 
70 // Borrowed from
71 // https://github.com/facebook/fbthrift/blob/449a5f77f9f9bae72c9eb5e78093247eef185c04/thrift/lib/cpp/util/VarintUtils-inl.h#L202-L208
i64ToZigzag(const int64_t l)72 constexpr inline uint64_t i64ToZigzag(const int64_t l) {
73   return (static_cast<uint64_t>(l) << 1) ^ static_cast<uint64_t>(l >> 63);
74 }
zigzagToI64(uint64_t n)75 inline int64_t zigzagToI64(uint64_t n) {
76   return (n >> 1) ^ -static_cast<int64_t>(n & 1);
77 }
78 
79 // Pointer-based variants of GetVarint...  These either store a value
80 // in *v and return a pointer just past the parsed value, or return
81 // nullptr on error.  These routines only look at bytes in the range
82 // [p..limit-1]
83 extern const char* GetVarint32Ptr(const char* p,const char* limit, uint32_t* v);
84 extern const char* GetVarint64Ptr(const char* p,const char* limit, uint64_t* v);
GetVarsignedint64Ptr(const char * p,const char * limit,int64_t * value)85 inline const char* GetVarsignedint64Ptr(const char* p, const char* limit,
86                                         int64_t* value) {
87   uint64_t u = 0;
88   const char* ret = GetVarint64Ptr(p, limit, &u);
89   *value = zigzagToI64(u);
90   return ret;
91 }
92 
93 // Returns the length of the varint32 or varint64 encoding of "v"
94 extern int VarintLength(uint64_t v);
95 
96 // Lower-level versions of Put... that write directly into a character buffer
97 // REQUIRES: dst has enough space for the value being written
98 extern void EncodeFixed16(char* dst, uint16_t value);
99 extern void EncodeFixed32(char* dst, uint32_t value);
100 extern void EncodeFixed64(char* dst, uint64_t value);
101 
102 // Lower-level versions of Put... that write directly into a character buffer
103 // and return a pointer just past the last byte written.
104 // REQUIRES: dst has enough space for the value being written
105 extern char* EncodeVarint32(char* dst, uint32_t value);
106 extern char* EncodeVarint64(char* dst, uint64_t value);
107 
108 // Lower-level versions of Get... that read directly from a character buffer
109 // without any bounds checking.
110 
DecodeFixed16(const char * ptr)111 inline uint16_t DecodeFixed16(const char* ptr) {
112   if (port::kLittleEndian) {
113     // Load the raw bytes
114     uint16_t result;
115     memcpy(&result, ptr, sizeof(result));  // gcc optimizes this to a plain load
116     return result;
117   } else {
118     return ((static_cast<uint16_t>(static_cast<unsigned char>(ptr[0]))) |
119             (static_cast<uint16_t>(static_cast<unsigned char>(ptr[1])) << 8));
120   }
121 }
122 
DecodeFixed32(const char * ptr)123 inline uint32_t DecodeFixed32(const char* ptr) {
124   if (port::kLittleEndian) {
125     // Load the raw bytes
126     uint32_t result;
127     memcpy(&result, ptr, sizeof(result));  // gcc optimizes this to a plain load
128     return result;
129   } else {
130     return ((static_cast<uint32_t>(static_cast<unsigned char>(ptr[0])))
131         | (static_cast<uint32_t>(static_cast<unsigned char>(ptr[1])) << 8)
132         | (static_cast<uint32_t>(static_cast<unsigned char>(ptr[2])) << 16)
133         | (static_cast<uint32_t>(static_cast<unsigned char>(ptr[3])) << 24));
134   }
135 }
136 
DecodeFixed64(const char * ptr)137 inline uint64_t DecodeFixed64(const char* ptr) {
138   if (port::kLittleEndian) {
139     // Load the raw bytes
140     uint64_t result;
141     memcpy(&result, ptr, sizeof(result));  // gcc optimizes this to a plain load
142     return result;
143   } else {
144     uint64_t lo = DecodeFixed32(ptr);
145     uint64_t hi = DecodeFixed32(ptr + 4);
146     return (hi << 32) | lo;
147   }
148 }
149 
150 // Internal routine for use by fallback path of GetVarint32Ptr
151 extern const char* GetVarint32PtrFallback(const char* p,
152                                           const char* limit,
153                                           uint32_t* value);
GetVarint32Ptr(const char * p,const char * limit,uint32_t * value)154 inline const char* GetVarint32Ptr(const char* p,
155                                   const char* limit,
156                                   uint32_t* value) {
157   if (p < limit) {
158     uint32_t result = *(reinterpret_cast<const unsigned char*>(p));
159     if ((result & 128) == 0) {
160       *value = result;
161       return p + 1;
162     }
163   }
164   return GetVarint32PtrFallback(p, limit, value);
165 }
166 
167 // -- Implementation of the functions declared above
EncodeFixed16(char * buf,uint16_t value)168 inline void EncodeFixed16(char* buf, uint16_t value) {
169   if (port::kLittleEndian) {
170     memcpy(buf, &value, sizeof(value));
171   } else {
172     buf[0] = value & 0xff;
173     buf[1] = (value >> 8) & 0xff;
174   }
175 }
176 
EncodeFixed32(char * buf,uint32_t value)177 inline void EncodeFixed32(char* buf, uint32_t value) {
178   if (port::kLittleEndian) {
179     memcpy(buf, &value, sizeof(value));
180   } else {
181     buf[0] = value & 0xff;
182     buf[1] = (value >> 8) & 0xff;
183     buf[2] = (value >> 16) & 0xff;
184     buf[3] = (value >> 24) & 0xff;
185   }
186 }
187 
EncodeFixed64(char * buf,uint64_t value)188 inline void EncodeFixed64(char* buf, uint64_t value) {
189   if (port::kLittleEndian) {
190     memcpy(buf, &value, sizeof(value));
191   } else {
192     buf[0] = value & 0xff;
193     buf[1] = (value >> 8) & 0xff;
194     buf[2] = (value >> 16) & 0xff;
195     buf[3] = (value >> 24) & 0xff;
196     buf[4] = (value >> 32) & 0xff;
197     buf[5] = (value >> 40) & 0xff;
198     buf[6] = (value >> 48) & 0xff;
199     buf[7] = (value >> 56) & 0xff;
200   }
201 }
202 
203 // Pull the last 8 bits and cast it to a character
PutFixed16(std::string * dst,uint16_t value)204 inline void PutFixed16(std::string* dst, uint16_t value) {
205   if (port::kLittleEndian) {
206     dst->append(const_cast<const char*>(reinterpret_cast<char*>(&value)),
207                 sizeof(value));
208   } else {
209     char buf[sizeof(value)];
210     EncodeFixed16(buf, value);
211     dst->append(buf, sizeof(buf));
212   }
213 }
214 
PutFixed32(std::string * dst,uint32_t value)215 inline void PutFixed32(std::string* dst, uint32_t value) {
216   if (port::kLittleEndian) {
217     dst->append(const_cast<const char*>(reinterpret_cast<char*>(&value)),
218       sizeof(value));
219   } else {
220     char buf[sizeof(value)];
221     EncodeFixed32(buf, value);
222     dst->append(buf, sizeof(buf));
223   }
224 }
225 
PutFixed64(std::string * dst,uint64_t value)226 inline void PutFixed64(std::string* dst, uint64_t value) {
227   if (port::kLittleEndian) {
228     dst->append(const_cast<const char*>(reinterpret_cast<char*>(&value)),
229       sizeof(value));
230   } else {
231     char buf[sizeof(value)];
232     EncodeFixed64(buf, value);
233     dst->append(buf, sizeof(buf));
234   }
235 }
236 
PutVarint32(std::string * dst,uint32_t v)237 inline void PutVarint32(std::string* dst, uint32_t v) {
238   char buf[5];
239   char* ptr = EncodeVarint32(buf, v);
240   dst->append(buf, static_cast<size_t>(ptr - buf));
241 }
242 
PutVarint32Varint32(std::string * dst,uint32_t v1,uint32_t v2)243 inline void PutVarint32Varint32(std::string* dst, uint32_t v1, uint32_t v2) {
244   char buf[10];
245   char* ptr = EncodeVarint32(buf, v1);
246   ptr = EncodeVarint32(ptr, v2);
247   dst->append(buf, static_cast<size_t>(ptr - buf));
248 }
249 
PutVarint32Varint32Varint32(std::string * dst,uint32_t v1,uint32_t v2,uint32_t v3)250 inline void PutVarint32Varint32Varint32(std::string* dst, uint32_t v1,
251                                         uint32_t v2, uint32_t v3) {
252   char buf[15];
253   char* ptr = EncodeVarint32(buf, v1);
254   ptr = EncodeVarint32(ptr, v2);
255   ptr = EncodeVarint32(ptr, v3);
256   dst->append(buf, static_cast<size_t>(ptr - buf));
257 }
258 
EncodeVarint64(char * dst,uint64_t v)259 inline char* EncodeVarint64(char* dst, uint64_t v) {
260   static const unsigned int B = 128;
261   unsigned char* ptr = reinterpret_cast<unsigned char*>(dst);
262   while (v >= B) {
263     *(ptr++) = (v & (B - 1)) | B;
264     v >>= 7;
265   }
266   *(ptr++) = static_cast<unsigned char>(v);
267   return reinterpret_cast<char*>(ptr);
268 }
269 
PutVarint64(std::string * dst,uint64_t v)270 inline void PutVarint64(std::string* dst, uint64_t v) {
271   char buf[kMaxVarint64Length];
272   char* ptr = EncodeVarint64(buf, v);
273   dst->append(buf, static_cast<size_t>(ptr - buf));
274 }
275 
PutVarsignedint64(std::string * dst,int64_t v)276 inline void PutVarsignedint64(std::string* dst, int64_t v) {
277   char buf[kMaxVarint64Length];
278   // Using Zigzag format to convert signed to unsigned
279   char* ptr = EncodeVarint64(buf, i64ToZigzag(v));
280   dst->append(buf, static_cast<size_t>(ptr - buf));
281 }
282 
PutVarint64Varint64(std::string * dst,uint64_t v1,uint64_t v2)283 inline void PutVarint64Varint64(std::string* dst, uint64_t v1, uint64_t v2) {
284   char buf[20];
285   char* ptr = EncodeVarint64(buf, v1);
286   ptr = EncodeVarint64(ptr, v2);
287   dst->append(buf, static_cast<size_t>(ptr - buf));
288 }
289 
PutVarint32Varint64(std::string * dst,uint32_t v1,uint64_t v2)290 inline void PutVarint32Varint64(std::string* dst, uint32_t v1, uint64_t v2) {
291   char buf[15];
292   char* ptr = EncodeVarint32(buf, v1);
293   ptr = EncodeVarint64(ptr, v2);
294   dst->append(buf, static_cast<size_t>(ptr - buf));
295 }
296 
PutVarint32Varint32Varint64(std::string * dst,uint32_t v1,uint32_t v2,uint64_t v3)297 inline void PutVarint32Varint32Varint64(std::string* dst, uint32_t v1,
298                                         uint32_t v2, uint64_t v3) {
299   char buf[20];
300   char* ptr = EncodeVarint32(buf, v1);
301   ptr = EncodeVarint32(ptr, v2);
302   ptr = EncodeVarint64(ptr, v3);
303   dst->append(buf, static_cast<size_t>(ptr - buf));
304 }
305 
PutLengthPrefixedSlice(std::string * dst,const Slice & value)306 inline void PutLengthPrefixedSlice(std::string* dst, const Slice& value) {
307   PutVarint32(dst, static_cast<uint32_t>(value.size()));
308   dst->append(value.data(), value.size());
309 }
310 
PutLengthPrefixedSliceParts(std::string * dst,size_t total_bytes,const SliceParts & slice_parts)311 inline void PutLengthPrefixedSliceParts(std::string* dst, size_t total_bytes,
312                                         const SliceParts& slice_parts) {
313   for (int i = 0; i < slice_parts.num_parts; ++i) {
314     total_bytes += slice_parts.parts[i].size();
315   }
316   PutVarint32(dst, static_cast<uint32_t>(total_bytes));
317   for (int i = 0; i < slice_parts.num_parts; ++i) {
318     dst->append(slice_parts.parts[i].data(), slice_parts.parts[i].size());
319   }
320 }
321 
PutLengthPrefixedSliceParts(std::string * dst,const SliceParts & slice_parts)322 inline void PutLengthPrefixedSliceParts(std::string* dst,
323                                         const SliceParts& slice_parts) {
324   PutLengthPrefixedSliceParts(dst, /*total_bytes=*/0, slice_parts);
325 }
326 
PutLengthPrefixedSlicePartsWithPadding(std::string * dst,const SliceParts & slice_parts,size_t pad_sz)327 inline void PutLengthPrefixedSlicePartsWithPadding(
328     std::string* dst, const SliceParts& slice_parts, size_t pad_sz) {
329   PutLengthPrefixedSliceParts(dst, /*total_bytes=*/pad_sz, slice_parts);
330   dst->append(pad_sz, '\0');
331 }
332 
VarintLength(uint64_t v)333 inline int VarintLength(uint64_t v) {
334   int len = 1;
335   while (v >= 128) {
336     v >>= 7;
337     len++;
338   }
339   return len;
340 }
341 
GetFixed64(Slice * input,uint64_t * value)342 inline bool GetFixed64(Slice* input, uint64_t* value) {
343   if (input->size() < sizeof(uint64_t)) {
344     return false;
345   }
346   *value = DecodeFixed64(input->data());
347   input->remove_prefix(sizeof(uint64_t));
348   return true;
349 }
350 
GetFixed32(Slice * input,uint32_t * value)351 inline bool GetFixed32(Slice* input, uint32_t* value) {
352   if (input->size() < sizeof(uint32_t)) {
353     return false;
354   }
355   *value = DecodeFixed32(input->data());
356   input->remove_prefix(sizeof(uint32_t));
357   return true;
358 }
359 
GetFixed16(Slice * input,uint16_t * value)360 inline bool GetFixed16(Slice* input, uint16_t* value) {
361   if (input->size() < sizeof(uint16_t)) {
362     return false;
363   }
364   *value = DecodeFixed16(input->data());
365   input->remove_prefix(sizeof(uint16_t));
366   return true;
367 }
368 
GetVarint32(Slice * input,uint32_t * value)369 inline bool GetVarint32(Slice* input, uint32_t* value) {
370   const char* p = input->data();
371   const char* limit = p + input->size();
372   const char* q = GetVarint32Ptr(p, limit, value);
373   if (q == nullptr) {
374     return false;
375   } else {
376     *input = Slice(q, static_cast<size_t>(limit - q));
377     return true;
378   }
379 }
380 
GetVarint64(Slice * input,uint64_t * value)381 inline bool GetVarint64(Slice* input, uint64_t* value) {
382   const char* p = input->data();
383   const char* limit = p + input->size();
384   const char* q = GetVarint64Ptr(p, limit, value);
385   if (q == nullptr) {
386     return false;
387   } else {
388     *input = Slice(q, static_cast<size_t>(limit - q));
389     return true;
390   }
391 }
392 
GetVarsignedint64(Slice * input,int64_t * value)393 inline bool GetVarsignedint64(Slice* input, int64_t* value) {
394   const char* p = input->data();
395   const char* limit = p + input->size();
396   const char* q = GetVarsignedint64Ptr(p, limit, value);
397   if (q == nullptr) {
398     return false;
399   } else {
400     *input = Slice(q, static_cast<size_t>(limit - q));
401     return true;
402   }
403 }
404 
405 // Provide an interface for platform independent endianness transformation
EndianTransform(uint64_t input,size_t size)406 inline uint64_t EndianTransform(uint64_t input, size_t size) {
407   char* pos = reinterpret_cast<char*>(&input);
408   uint64_t ret_val = 0;
409   for (size_t i = 0; i < size; ++i) {
410     ret_val |= (static_cast<uint64_t>(static_cast<unsigned char>(pos[i]))
411                 << ((size - i - 1) << 3));
412   }
413   return ret_val;
414 }
415 
GetLengthPrefixedSlice(Slice * input,Slice * result)416 inline bool GetLengthPrefixedSlice(Slice* input, Slice* result) {
417   uint32_t len = 0;
418   if (GetVarint32(input, &len) && input->size() >= len) {
419     *result = Slice(input->data(), len);
420     input->remove_prefix(len);
421     return true;
422   } else {
423     return false;
424   }
425 }
426 
GetLengthPrefixedSlice(const char * data)427 inline Slice GetLengthPrefixedSlice(const char* data) {
428   uint32_t len = 0;
429   // +5: we assume "data" is not corrupted
430   // unsigned char is 7 bits, uint32_t is 32 bits, need 5 unsigned char
431   auto p = GetVarint32Ptr(data, data + 5 /* limit */, &len);
432   return Slice(p, len);
433 }
434 
GetSliceUntil(Slice * slice,char delimiter)435 inline Slice GetSliceUntil(Slice* slice, char delimiter) {
436   uint32_t len = 0;
437   for (len = 0; len < slice->size() && slice->data()[len] != delimiter; ++len) {
438     // nothing
439   }
440 
441   Slice ret(slice->data(), len);
442   slice->remove_prefix(len + ((len < slice->size()) ? 1 : 0));
443   return ret;
444 }
445 
446 template<class T>
447 #ifdef ROCKSDB_UBSAN_RUN
448 #if defined(__clang__)
449 __attribute__((__no_sanitize__("alignment")))
450 #elif defined(__GNUC__)
451 __attribute__((__no_sanitize_undefined__))
452 #endif
453 #endif
PutUnaligned(T * memory,const T & value)454 inline void PutUnaligned(T *memory, const T &value) {
455 #if defined(PLATFORM_UNALIGNED_ACCESS_NOT_ALLOWED)
456   char *nonAlignedMemory = reinterpret_cast<char*>(memory);
457   memcpy(nonAlignedMemory, reinterpret_cast<const char*>(&value), sizeof(T));
458 #else
459   *memory = value;
460 #endif
461 }
462 
463 template<class T>
464 #ifdef ROCKSDB_UBSAN_RUN
465 #if defined(__clang__)
466 __attribute__((__no_sanitize__("alignment")))
467 #elif defined(__GNUC__)
468 __attribute__((__no_sanitize_undefined__))
469 #endif
470 #endif
GetUnaligned(const T * memory,T * value)471 inline void GetUnaligned(const T *memory, T *value) {
472 #if defined(PLATFORM_UNALIGNED_ACCESS_NOT_ALLOWED)
473   char *nonAlignedMemory = reinterpret_cast<char*>(value);
474   memcpy(nonAlignedMemory, reinterpret_cast<const char*>(memory), sizeof(T));
475 #else
476   *value = *memory;
477 #endif
478 }
479 
480 }  // namespace ROCKSDB_NAMESPACE
481