1 /* Listpack -- A lists of strings serialization format
2 *
3 * This file implements the specification you can find at:
4 *
5 * https://github.com/antirez/listpack
6 *
7 * Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com>
8 * All rights reserved.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions are met:
12 *
13 * * Redistributions of source code must retain the above copyright notice,
14 * this list of conditions and the following disclaimer.
15 * * Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * * Neither the name of Redis nor the names of its contributors may be used
19 * to endorse or promote products derived from this software without
20 * specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32 * POSSIBILITY OF SUCH DAMAGE.
33 */
34
35 #include <stdint.h>
36 #include <limits.h>
37 #include <sys/types.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <stdio.h>
41
42 #include "listpack.h"
43 #include "listpack_malloc.h"
44
45 #define LP_HDR_SIZE 6 /* 32 bit total len + 16 bit number of elements. */
46 #define LP_HDR_NUMELE_UNKNOWN UINT16_MAX
47 #define LP_MAX_INT_ENCODING_LEN 9
48 #define LP_MAX_BACKLEN_SIZE 5
49 #define LP_MAX_ENTRY_BACKLEN 34359738367ULL
50 #define LP_ENCODING_INT 0
51 #define LP_ENCODING_STRING 1
52
53 #define LP_ENCODING_7BIT_UINT 0
54 #define LP_ENCODING_7BIT_UINT_MASK 0x80
55 #define LP_ENCODING_IS_7BIT_UINT(byte) (((byte)&LP_ENCODING_7BIT_UINT_MASK)==LP_ENCODING_7BIT_UINT)
56
57 #define LP_ENCODING_6BIT_STR 0x80
58 #define LP_ENCODING_6BIT_STR_MASK 0xC0
59 #define LP_ENCODING_IS_6BIT_STR(byte) (((byte)&LP_ENCODING_6BIT_STR_MASK)==LP_ENCODING_6BIT_STR)
60
61 #define LP_ENCODING_13BIT_INT 0xC0
62 #define LP_ENCODING_13BIT_INT_MASK 0xE0
63 #define LP_ENCODING_IS_13BIT_INT(byte) (((byte)&LP_ENCODING_13BIT_INT_MASK)==LP_ENCODING_13BIT_INT)
64
65 #define LP_ENCODING_12BIT_STR 0xE0
66 #define LP_ENCODING_12BIT_STR_MASK 0xF0
67 #define LP_ENCODING_IS_12BIT_STR(byte) (((byte)&LP_ENCODING_12BIT_STR_MASK)==LP_ENCODING_12BIT_STR)
68
69 #define LP_ENCODING_16BIT_INT 0xF1
70 #define LP_ENCODING_16BIT_INT_MASK 0xFF
71 #define LP_ENCODING_IS_16BIT_INT(byte) (((byte)&LP_ENCODING_16BIT_INT_MASK)==LP_ENCODING_16BIT_INT)
72
73 #define LP_ENCODING_24BIT_INT 0xF2
74 #define LP_ENCODING_24BIT_INT_MASK 0xFF
75 #define LP_ENCODING_IS_24BIT_INT(byte) (((byte)&LP_ENCODING_24BIT_INT_MASK)==LP_ENCODING_24BIT_INT)
76
77 #define LP_ENCODING_32BIT_INT 0xF3
78 #define LP_ENCODING_32BIT_INT_MASK 0xFF
79 #define LP_ENCODING_IS_32BIT_INT(byte) (((byte)&LP_ENCODING_32BIT_INT_MASK)==LP_ENCODING_32BIT_INT)
80
81 #define LP_ENCODING_64BIT_INT 0xF4
82 #define LP_ENCODING_64BIT_INT_MASK 0xFF
83 #define LP_ENCODING_IS_64BIT_INT(byte) (((byte)&LP_ENCODING_64BIT_INT_MASK)==LP_ENCODING_64BIT_INT)
84
85 #define LP_ENCODING_32BIT_STR 0xF0
86 #define LP_ENCODING_32BIT_STR_MASK 0xFF
87 #define LP_ENCODING_IS_32BIT_STR(byte) (((byte)&LP_ENCODING_32BIT_STR_MASK)==LP_ENCODING_32BIT_STR)
88
89 #define LP_EOF 0xFF
90
91 #define LP_ENCODING_6BIT_STR_LEN(p) ((p)[0] & 0x3F)
92 #define LP_ENCODING_12BIT_STR_LEN(p) ((((p)[0] & 0xF) << 8) | (p)[1])
93 #define LP_ENCODING_32BIT_STR_LEN(p) (((uint32_t)(p)[1]<<0) | \
94 ((uint32_t)(p)[2]<<8) | \
95 ((uint32_t)(p)[3]<<16) | \
96 ((uint32_t)(p)[4]<<24))
97
98 #define lpGetTotalBytes(p) (((uint32_t)(p)[0]<<0) | \
99 ((uint32_t)(p)[1]<<8) | \
100 ((uint32_t)(p)[2]<<16) | \
101 ((uint32_t)(p)[3]<<24))
102
103 #define lpGetNumElements(p) (((uint32_t)(p)[4]<<0) | \
104 ((uint32_t)(p)[5]<<8))
105 #define lpSetTotalBytes(p,v) do { \
106 (p)[0] = (v)&0xff; \
107 (p)[1] = ((v)>>8)&0xff; \
108 (p)[2] = ((v)>>16)&0xff; \
109 (p)[3] = ((v)>>24)&0xff; \
110 } while(0)
111
112 #define lpSetNumElements(p,v) do { \
113 (p)[4] = (v)&0xff; \
114 (p)[5] = ((v)>>8)&0xff; \
115 } while(0)
116
117 /* Convert a string into a signed 64 bit integer.
118 * The function returns 1 if the string could be parsed into a (non-overflowing)
119 * signed 64 bit int, 0 otherwise. The 'value' will be set to the parsed value
120 * when the function returns success.
121 *
122 * Note that this function demands that the string strictly represents
123 * a int64 value: no spaces or other characters before or after the string
124 * representing the number are accepted, nor zeroes at the start if not
125 * for the string "0" representing the zero number.
126 *
127 * Because of its strictness, it is safe to use this function to check if
128 * you can convert a string into a long long, and obtain back the string
129 * from the number without any loss in the string representation. *
130 *
131 * -----------------------------------------------------------------------------
132 *
133 * Credits: this function was adapted from the Redis source code, file
134 * "utils.c", function string2ll(), and is copyright:
135 *
136 * Copyright(C) 2011, Pieter Noordhuis
137 * Copyright(C) 2011, Salvatore Sanfilippo
138 *
139 * The function is released under the BSD 3-clause license.
140 */
lpStringToInt64(const char * s,unsigned long slen,int64_t * value)141 int lpStringToInt64(const char *s, unsigned long slen, int64_t *value) {
142 const char *p = s;
143 unsigned long plen = 0;
144 int negative = 0;
145 uint64_t v;
146
147 if (plen == slen)
148 return 0;
149
150 /* Special case: first and only digit is 0. */
151 if (slen == 1 && p[0] == '0') {
152 if (value != NULL) *value = 0;
153 return 1;
154 }
155
156 if (p[0] == '-') {
157 negative = 1;
158 p++; plen++;
159
160 /* Abort on only a negative sign. */
161 if (plen == slen)
162 return 0;
163 }
164
165 /* First digit should be 1-9, otherwise the string should just be 0. */
166 if (p[0] >= '1' && p[0] <= '9') {
167 v = p[0]-'0';
168 p++; plen++;
169 } else if (p[0] == '0' && slen == 1) {
170 *value = 0;
171 return 1;
172 } else {
173 return 0;
174 }
175
176 while (plen < slen && p[0] >= '0' && p[0] <= '9') {
177 if (v > (UINT64_MAX / 10)) /* Overflow. */
178 return 0;
179 v *= 10;
180
181 if (v > (UINT64_MAX - (p[0]-'0'))) /* Overflow. */
182 return 0;
183 v += p[0]-'0';
184
185 p++; plen++;
186 }
187
188 /* Return if not all bytes were used. */
189 if (plen < slen)
190 return 0;
191
192 if (negative) {
193 if (v > ((uint64_t)(-(INT64_MIN+1))+1)) /* Overflow. */
194 return 0;
195 if (value != NULL) *value = -v;
196 } else {
197 if (v > INT64_MAX) /* Overflow. */
198 return 0;
199 if (value != NULL) *value = v;
200 }
201 return 1;
202 }
203
204 /* Create a new, empty listpack.
205 * On success the new listpack is returned, otherwise an error is returned. */
lpNew(void)206 unsigned char *lpNew(void) {
207 unsigned char *lp = lp_malloc(LP_HDR_SIZE+1);
208 if (lp == NULL) return NULL;
209 lpSetTotalBytes(lp,LP_HDR_SIZE+1);
210 lpSetNumElements(lp,0);
211 lp[LP_HDR_SIZE] = LP_EOF;
212 return lp;
213 }
214
215 /* Free the specified listpack. */
lpFree(unsigned char * lp)216 void lpFree(unsigned char *lp) {
217 lp_free(lp);
218 }
219
220 /* Given an element 'ele' of size 'size', determine if the element can be
221 * represented inside the listpack encoded as integer, and returns
222 * LP_ENCODING_INT if so. Otherwise returns LP_ENCODING_STR if no integer
223 * encoding is possible.
224 *
225 * If the LP_ENCODING_INT is returned, the function stores the integer encoded
226 * representation of the element in the 'intenc' buffer.
227 *
228 * Regardless of the returned encoding, 'enclen' is populated by reference to
229 * the number of bytes that the string or integer encoded element will require
230 * in order to be represented. */
lpEncodeGetType(unsigned char * ele,uint32_t size,unsigned char * intenc,uint64_t * enclen)231 int lpEncodeGetType(unsigned char *ele, uint32_t size, unsigned char *intenc, uint64_t *enclen) {
232 int64_t v;
233 if (lpStringToInt64((const char*)ele, size, &v)) {
234 if (v >= 0 && v <= 127) {
235 /* Single byte 0-127 integer. */
236 intenc[0] = v;
237 *enclen = 1;
238 } else if (v >= -4096 && v <= 4095) {
239 /* 13 bit integer. */
240 if (v < 0) v = ((int64_t)1<<13)+v;
241 intenc[0] = (v>>8)|LP_ENCODING_13BIT_INT;
242 intenc[1] = v&0xff;
243 *enclen = 2;
244 } else if (v >= -32768 && v <= 32767) {
245 /* 16 bit integer. */
246 if (v < 0) v = ((int64_t)1<<16)+v;
247 intenc[0] = LP_ENCODING_16BIT_INT;
248 intenc[1] = v&0xff;
249 intenc[2] = v>>8;
250 *enclen = 3;
251 } else if (v >= -8388608 && v <= 8388607) {
252 /* 24 bit integer. */
253 if (v < 0) v = ((int64_t)1<<24)+v;
254 intenc[0] = LP_ENCODING_24BIT_INT;
255 intenc[1] = v&0xff;
256 intenc[2] = (v>>8)&0xff;
257 intenc[3] = v>>16;
258 *enclen = 4;
259 } else if (v >= -2147483648 && v <= 2147483647) {
260 /* 32 bit integer. */
261 if (v < 0) v = ((int64_t)1<<32)+v;
262 intenc[0] = LP_ENCODING_32BIT_INT;
263 intenc[1] = v&0xff;
264 intenc[2] = (v>>8)&0xff;
265 intenc[3] = (v>>16)&0xff;
266 intenc[4] = v>>24;
267 *enclen = 5;
268 } else {
269 /* 64 bit integer. */
270 uint64_t uv = v;
271 intenc[0] = LP_ENCODING_64BIT_INT;
272 intenc[1] = uv&0xff;
273 intenc[2] = (uv>>8)&0xff;
274 intenc[3] = (uv>>16)&0xff;
275 intenc[4] = (uv>>24)&0xff;
276 intenc[5] = (uv>>32)&0xff;
277 intenc[6] = (uv>>40)&0xff;
278 intenc[7] = (uv>>48)&0xff;
279 intenc[8] = uv>>56;
280 *enclen = 9;
281 }
282 return LP_ENCODING_INT;
283 } else {
284 if (size < 64) *enclen = 1+size;
285 else if (size < 4096) *enclen = 2+size;
286 else *enclen = 5+size;
287 return LP_ENCODING_STRING;
288 }
289 }
290
291 /* Store a reverse-encoded variable length field, representing the length
292 * of the previous element of size 'l', in the target buffer 'buf'.
293 * The function returns the number of bytes used to encode it, from
294 * 1 to 5. If 'buf' is NULL the function just returns the number of bytes
295 * needed in order to encode the backlen. */
lpEncodeBacklen(unsigned char * buf,uint64_t l)296 unsigned long lpEncodeBacklen(unsigned char *buf, uint64_t l) {
297 if (l <= 127) {
298 if (buf) buf[0] = l;
299 return 1;
300 } else if (l < 16383) {
301 if (buf) {
302 buf[0] = l>>7;
303 buf[1] = (l&127)|128;
304 }
305 return 2;
306 } else if (l < 2097151) {
307 if (buf) {
308 buf[0] = l>>14;
309 buf[1] = ((l>>7)&127)|128;
310 buf[2] = (l&127)|128;
311 }
312 return 3;
313 } else if (l < 268435455) {
314 if (buf) {
315 buf[0] = l>>21;
316 buf[1] = ((l>>14)&127)|128;
317 buf[2] = ((l>>7)&127)|128;
318 buf[3] = (l&127)|128;
319 }
320 return 4;
321 } else {
322 if (buf) {
323 buf[0] = l>>28;
324 buf[1] = ((l>>21)&127)|128;
325 buf[2] = ((l>>14)&127)|128;
326 buf[3] = ((l>>7)&127)|128;
327 buf[4] = (l&127)|128;
328 }
329 return 5;
330 }
331 }
332
333 /* Decode the backlen and returns it. If the encoding looks invalid (more than
334 * 5 bytes are used), UINT64_MAX is returned to report the problem. */
lpDecodeBacklen(unsigned char * p)335 uint64_t lpDecodeBacklen(unsigned char *p) {
336 uint64_t val = 0;
337 uint64_t shift = 0;
338 do {
339 val |= (uint64_t)(p[0] & 127) << shift;
340 if (!(p[0] & 128)) break;
341 shift += 7;
342 p--;
343 if (shift > 28) return UINT64_MAX;
344 } while(1);
345 return val;
346 }
347
348 /* Encode the string element pointed by 's' of size 'len' in the target
349 * buffer 's'. The function should be called with 'buf' having always enough
350 * space for encoding the string. This is done by calling lpEncodeGetType()
351 * before calling this function. */
lpEncodeString(unsigned char * buf,unsigned char * s,uint32_t len)352 void lpEncodeString(unsigned char *buf, unsigned char *s, uint32_t len) {
353 if (len < 64) {
354 buf[0] = len | LP_ENCODING_6BIT_STR;
355 memcpy(buf+1,s,len);
356 } else if (len < 4096) {
357 buf[0] = (len >> 8) | LP_ENCODING_12BIT_STR;
358 buf[1] = len & 0xff;
359 memcpy(buf+2,s,len);
360 } else {
361 buf[0] = LP_ENCODING_32BIT_STR;
362 buf[1] = len & 0xff;
363 buf[2] = (len >> 8) & 0xff;
364 buf[3] = (len >> 16) & 0xff;
365 buf[4] = (len >> 24) & 0xff;
366 memcpy(buf+5,s,len);
367 }
368 }
369
370 /* Return the encoded length of the listpack element pointed by 'p'. If the
371 * element encoding is wrong then 0 is returned. */
lpCurrentEncodedSize(unsigned char * p)372 uint32_t lpCurrentEncodedSize(unsigned char *p) {
373 if (LP_ENCODING_IS_7BIT_UINT(p[0])) return 1;
374 if (LP_ENCODING_IS_6BIT_STR(p[0])) return 1+LP_ENCODING_6BIT_STR_LEN(p);
375 if (LP_ENCODING_IS_13BIT_INT(p[0])) return 2;
376 if (LP_ENCODING_IS_16BIT_INT(p[0])) return 3;
377 if (LP_ENCODING_IS_24BIT_INT(p[0])) return 4;
378 if (LP_ENCODING_IS_32BIT_INT(p[0])) return 5;
379 if (LP_ENCODING_IS_64BIT_INT(p[0])) return 9;
380 if (LP_ENCODING_IS_12BIT_STR(p[0])) return 2+LP_ENCODING_12BIT_STR_LEN(p);
381 if (LP_ENCODING_IS_32BIT_STR(p[0])) return 5+LP_ENCODING_32BIT_STR_LEN(p);
382 if (p[0] == LP_EOF) return 1;
383 return 0;
384 }
385
386 /* Skip the current entry returning the next. It is invalid to call this
387 * function if the current element is the EOF element at the end of the
388 * listpack, however, while this function is used to implement lpNext(),
389 * it does not return NULL when the EOF element is encountered. */
lpSkip(unsigned char * p)390 unsigned char *lpSkip(unsigned char *p) {
391 unsigned long entrylen = lpCurrentEncodedSize(p);
392 entrylen += lpEncodeBacklen(NULL,entrylen);
393 p += entrylen;
394 return p;
395 }
396
397 /* If 'p' points to an element of the listpack, calling lpNext() will return
398 * the pointer to the next element (the one on the right), or NULL if 'p'
399 * already pointed to the last element of the listpack. */
lpNext(unsigned char * lp,unsigned char * p)400 unsigned char *lpNext(unsigned char *lp, unsigned char *p) {
401 ((void) lp); /* lp is not used for now. However lpPrev() uses it. */
402 p = lpSkip(p);
403 if (p[0] == LP_EOF) return NULL;
404 return p;
405 }
406
407 /* If 'p' points to an element of the listpack, calling lpPrev() will return
408 * the pointer to the preivous element (the one on the left), or NULL if 'p'
409 * already pointed to the first element of the listpack. */
lpPrev(unsigned char * lp,unsigned char * p)410 unsigned char *lpPrev(unsigned char *lp, unsigned char *p) {
411 if (p-lp == LP_HDR_SIZE) return NULL;
412 p--; /* Seek the first backlen byte of the last element. */
413 uint64_t prevlen = lpDecodeBacklen(p);
414 prevlen += lpEncodeBacklen(NULL,prevlen);
415 return p-prevlen+1; /* Seek the first byte of the previous entry. */
416 }
417
418 /* Return a pointer to the first element of the listpack, or NULL if the
419 * listpack has no elements. */
lpFirst(unsigned char * lp)420 unsigned char *lpFirst(unsigned char *lp) {
421 lp += LP_HDR_SIZE; /* Skip the header. */
422 if (lp[0] == LP_EOF) return NULL;
423 return lp;
424 }
425
426 /* Return a pointer to the last element of the listpack, or NULL if the
427 * listpack has no elements. */
lpLast(unsigned char * lp)428 unsigned char *lpLast(unsigned char *lp) {
429 unsigned char *p = lp+lpGetTotalBytes(lp)-1; /* Seek EOF element. */
430 return lpPrev(lp,p); /* Will return NULL if EOF is the only element. */
431 }
432
433 /* Return the number of elements inside the listpack. This function attempts
434 * to use the cached value when within range, otherwise a full scan is
435 * needed. As a side effect of calling this function, the listpack header
436 * could be modified, because if the count is found to be already within
437 * the 'numele' header field range, the new value is set. */
lpLength(unsigned char * lp)438 uint32_t lpLength(unsigned char *lp) {
439 uint32_t numele = lpGetNumElements(lp);
440 if (numele != LP_HDR_NUMELE_UNKNOWN) return numele;
441
442 /* Too many elements inside the listpack. We need to scan in order
443 * to get the total number. */
444 uint32_t count = 0;
445 unsigned char *p = lpFirst(lp);
446 while(p) {
447 count++;
448 p = lpNext(lp,p);
449 }
450
451 /* If the count is again within range of the header numele field,
452 * set it. */
453 if (count < LP_HDR_NUMELE_UNKNOWN) lpSetNumElements(lp,count);
454 return count;
455 }
456
457 /* Return the listpack element pointed by 'p'.
458 *
459 * The function changes behavior depending on the passed 'intbuf' value.
460 * Specifically, if 'intbuf' is NULL:
461 *
462 * If the element is internally encoded as an integer, the function returns
463 * NULL and populates the integer value by reference in 'count'. Otherwise if
464 * the element is encoded as a string a pointer to the string (pointing inside
465 * the listpack itself) is returned, and 'count' is set to the length of the
466 * string.
467 *
468 * If instead 'intbuf' points to a buffer passed by the caller, that must be
469 * at least LP_INTBUF_SIZE bytes, the function always returns the element as
470 * it was a string (returning the pointer to the string and setting the
471 * 'count' argument to the string length by reference). However if the element
472 * is encoded as an integer, the 'intbuf' buffer is used in order to store
473 * the string representation.
474 *
475 * The user should use one or the other form depending on what the value will
476 * be used for. If there is immediate usage for an integer value returned
477 * by the function, than to pass a buffer (and convert it back to a number)
478 * is of course useless.
479 *
480 * If the function is called against a badly encoded ziplist, so that there
481 * is no valid way to parse it, the function returns like if there was an
482 * integer encoded with value 12345678900000000 + <unrecognized byte>, this may
483 * be an hint to understand that something is wrong. To crash in this case is
484 * not sensible because of the different requirements of the application using
485 * this lib.
486 *
487 * Similarly, there is no error returned since the listpack normally can be
488 * assumed to be valid, so that would be a very high API cost. However a function
489 * in order to check the integrity of the listpack at load time is provided,
490 * check lpIsValid(). */
lpGet(unsigned char * p,int64_t * count,unsigned char * intbuf)491 unsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf) {
492 int64_t val;
493 uint64_t uval, negstart, negmax;
494
495 if (LP_ENCODING_IS_7BIT_UINT(p[0])) {
496 negstart = UINT64_MAX; /* 7 bit ints are always positive. */
497 negmax = 0;
498 uval = p[0] & 0x7f;
499 } else if (LP_ENCODING_IS_6BIT_STR(p[0])) {
500 *count = LP_ENCODING_6BIT_STR_LEN(p);
501 return p+1;
502 } else if (LP_ENCODING_IS_13BIT_INT(p[0])) {
503 uval = ((p[0]&0x1f)<<8) | p[1];
504 negstart = (uint64_t)1<<12;
505 negmax = 8191;
506 } else if (LP_ENCODING_IS_16BIT_INT(p[0])) {
507 uval = (uint64_t)p[1] |
508 (uint64_t)p[2]<<8;
509 negstart = (uint64_t)1<<15;
510 negmax = UINT16_MAX;
511 } else if (LP_ENCODING_IS_24BIT_INT(p[0])) {
512 uval = (uint64_t)p[1] |
513 (uint64_t)p[2]<<8 |
514 (uint64_t)p[3]<<16;
515 negstart = (uint64_t)1<<23;
516 negmax = UINT32_MAX>>8;
517 } else if (LP_ENCODING_IS_32BIT_INT(p[0])) {
518 uval = (uint64_t)p[1] |
519 (uint64_t)p[2]<<8 |
520 (uint64_t)p[3]<<16 |
521 (uint64_t)p[4]<<24;
522 negstart = (uint64_t)1<<31;
523 negmax = UINT32_MAX;
524 } else if (LP_ENCODING_IS_64BIT_INT(p[0])) {
525 uval = (uint64_t)p[1] |
526 (uint64_t)p[2]<<8 |
527 (uint64_t)p[3]<<16 |
528 (uint64_t)p[4]<<24 |
529 (uint64_t)p[5]<<32 |
530 (uint64_t)p[6]<<40 |
531 (uint64_t)p[7]<<48 |
532 (uint64_t)p[8]<<56;
533 negstart = (uint64_t)1<<63;
534 negmax = UINT64_MAX;
535 } else if (LP_ENCODING_IS_12BIT_STR(p[0])) {
536 *count = LP_ENCODING_12BIT_STR_LEN(p);
537 return p+2;
538 } else if (LP_ENCODING_IS_32BIT_STR(p[0])) {
539 *count = LP_ENCODING_32BIT_STR_LEN(p);
540 return p+5;
541 } else {
542 uval = 12345678900000000ULL + p[0];
543 negstart = UINT64_MAX;
544 negmax = 0;
545 }
546
547 /* We reach this code path only for integer encodings.
548 * Convert the unsigned value to the signed one using two's complement
549 * rule. */
550 if (uval >= negstart) {
551 /* This three steps conversion should avoid undefined behaviors
552 * in the unsigned -> signed conversion. */
553 uval = negmax-uval;
554 val = uval;
555 val = -val-1;
556 } else {
557 val = uval;
558 }
559
560 /* Return the string representation of the integer or the value itself
561 * depending on intbuf being NULL or not. */
562 if (intbuf) {
563 *count = snprintf((char*)intbuf,LP_INTBUF_SIZE,"%lld",(long long)val);
564 return intbuf;
565 } else {
566 *count = val;
567 return NULL;
568 }
569 }
570
571 /* Insert, delete or replace the specified element 'ele' of length 'len' at
572 * the specified position 'p', with 'p' being a listpack element pointer
573 * obtained with lpFirst(), lpLast(), lpIndex(), lpNext(), lpPrev() or
574 * lpSeek().
575 *
576 * The element is inserted before, after, or replaces the element pointed
577 * by 'p' depending on the 'where' argument, that can be LP_BEFORE, LP_AFTER
578 * or LP_REPLACE.
579 *
580 * If 'ele' is set to NULL, the function removes the element pointed by 'p'
581 * instead of inserting one.
582 *
583 * Returns NULL on out of memory or when the listpack total length would exceed
584 * the max allowed size of 2^32-1, otherwise the new pointer to the listpack
585 * holding the new element is returned (and the old pointer passed is no longer
586 * considered valid)
587 *
588 * If 'newp' is not NULL, at the end of a successful call '*newp' will be set
589 * to the address of the element just added, so that it will be possible to
590 * continue an interation with lpNext() and lpPrev().
591 *
592 * For deletion operations ('ele' set to NULL) 'newp' is set to the next
593 * element, on the right of the deleted one, or to NULL if the deleted element
594 * was the last one. */
lpInsert(unsigned char * lp,unsigned char * ele,uint32_t size,unsigned char * p,int where,unsigned char ** newp)595 unsigned char *lpInsert(unsigned char *lp, unsigned char *ele, uint32_t size, unsigned char *p, int where, unsigned char **newp) {
596 unsigned char intenc[LP_MAX_INT_ENCODING_LEN];
597 unsigned char backlen[LP_MAX_BACKLEN_SIZE];
598
599 uint64_t enclen; /* The length of the encoded element. */
600
601 /* An element pointer set to NULL means deletion, which is conceptually
602 * replacing the element with a zero-length element. So whatever we
603 * get passed as 'where', set it to LP_REPLACE. */
604 if (ele == NULL) where = LP_REPLACE;
605
606 /* If we need to insert after the current element, we just jump to the
607 * next element (that could be the EOF one) and handle the case of
608 * inserting before. So the function will actually deal with just two
609 * cases: LP_BEFORE and LP_REPLACE. */
610 if (where == LP_AFTER) {
611 p = lpSkip(p);
612 where = LP_BEFORE;
613 }
614
615 /* Store the offset of the element 'p', so that we can obtain its
616 * address again after a reallocation. */
617 unsigned long poff = p-lp;
618
619 /* Calling lpEncodeGetType() results into the encoded version of the
620 * element to be stored into 'intenc' in case it is representable as
621 * an integer: in that case, the function returns LP_ENCODING_INT.
622 * Otherwise if LP_ENCODING_STR is returned, we'll have to call
623 * lpEncodeString() to actually write the encoded string on place later.
624 *
625 * Whatever the returned encoding is, 'enclen' is populated with the
626 * length of the encoded element. */
627 int enctype;
628 if (ele) {
629 enctype = lpEncodeGetType(ele,size,intenc,&enclen);
630 } else {
631 enctype = -1;
632 enclen = 0;
633 }
634
635 /* We need to also encode the backward-parsable length of the element
636 * and append it to the end: this allows to traverse the listpack from
637 * the end to the start. */
638 unsigned long backlen_size = ele ? lpEncodeBacklen(backlen,enclen) : 0;
639 uint64_t old_listpack_bytes = lpGetTotalBytes(lp);
640 uint32_t replaced_len = 0;
641 if (where == LP_REPLACE) {
642 replaced_len = lpCurrentEncodedSize(p);
643 replaced_len += lpEncodeBacklen(NULL,replaced_len);
644 }
645
646 uint64_t new_listpack_bytes = old_listpack_bytes + enclen + backlen_size
647 - replaced_len;
648 if (new_listpack_bytes > UINT32_MAX) return NULL;
649
650 /* We now need to reallocate in order to make space or shrink the
651 * allocation (in case 'when' value is LP_REPLACE and the new element is
652 * smaller). However we do that before memmoving the memory to
653 * make room for the new element if the final allocation will get
654 * larger, or we do it after if the final allocation will get smaller. */
655
656 unsigned char *dst = lp + poff; /* May be updated after reallocation. */
657
658 /* Realloc before: we need more room. */
659 if (new_listpack_bytes > old_listpack_bytes) {
660 if ((lp = lp_realloc(lp,new_listpack_bytes)) == NULL) return NULL;
661 dst = lp + poff;
662 }
663
664 /* Setup the listpack relocating the elements to make the exact room
665 * we need to store the new one. */
666 if (where == LP_BEFORE) {
667 memmove(dst+enclen+backlen_size,dst,old_listpack_bytes-poff);
668 } else { /* LP_REPLACE. */
669 long lendiff = (enclen+backlen_size)-replaced_len;
670 memmove(dst+replaced_len+lendiff,
671 dst+replaced_len,
672 old_listpack_bytes-poff-replaced_len);
673 }
674
675 /* Realloc after: we need to free space. */
676 if (new_listpack_bytes < old_listpack_bytes) {
677 if ((lp = lp_realloc(lp,new_listpack_bytes)) == NULL) return NULL;
678 dst = lp + poff;
679 }
680
681 /* Store the entry. */
682 if (newp) {
683 *newp = dst;
684 /* In case of deletion, set 'newp' to NULL if the next element is
685 * the EOF element. */
686 if (!ele && dst[0] == LP_EOF) *newp = NULL;
687 }
688 if (ele) {
689 if (enctype == LP_ENCODING_INT) {
690 memcpy(dst,intenc,enclen);
691 } else {
692 lpEncodeString(dst,ele,size);
693 }
694 dst += enclen;
695 memcpy(dst,backlen,backlen_size);
696 dst += backlen_size;
697 }
698
699 /* Update header. */
700 if (where != LP_REPLACE || ele == NULL) {
701 uint32_t num_elements = lpGetNumElements(lp);
702 if (num_elements != LP_HDR_NUMELE_UNKNOWN) {
703 if (ele)
704 lpSetNumElements(lp,num_elements+1);
705 else
706 lpSetNumElements(lp,num_elements-1);
707 }
708 }
709 lpSetTotalBytes(lp,new_listpack_bytes);
710
711 #if 0
712 /* This code path is normally disabled: what it does is to force listpack
713 * to return *always* a new pointer after performing some modification to
714 * the listpack, even if the previous allocation was enough. This is useful
715 * in order to spot bugs in code using listpacks: by doing so we can find
716 * if the caller forgets to set the new pointer where the listpack reference
717 * is stored, after an update. */
718 unsigned char *oldlp = lp;
719 lp = lp_malloc(new_listpack_bytes);
720 memcpy(lp,oldlp,new_listpack_bytes);
721 if (newp) {
722 unsigned long offset = (*newp)-oldlp;
723 *newp = lp + offset;
724 }
725 /* Make sure the old allocation contains garbage. */
726 memset(oldlp,'A',new_listpack_bytes);
727 lp_free(oldlp);
728 #endif
729
730 return lp;
731 }
732
733 /* Append the specified element 'ele' of length 'len' at the end of the
734 * listpack. It is implemented in terms of lpInsert(), so the return value is
735 * the same as lpInsert(). */
lpAppend(unsigned char * lp,unsigned char * ele,uint32_t size)736 unsigned char *lpAppend(unsigned char *lp, unsigned char *ele, uint32_t size) {
737 uint64_t listpack_bytes = lpGetTotalBytes(lp);
738 unsigned char *eofptr = lp + listpack_bytes - 1;
739 return lpInsert(lp,ele,size,eofptr,LP_BEFORE,NULL);
740 }
741
742 /* Remove the element pointed by 'p', and return the resulting listpack.
743 * If 'newp' is not NULL, the next element pointer (to the right of the
744 * deleted one) is returned by reference. If the deleted element was the
745 * last one, '*newp' is set to NULL. */
lpDelete(unsigned char * lp,unsigned char * p,unsigned char ** newp)746 unsigned char *lpDelete(unsigned char *lp, unsigned char *p, unsigned char **newp) {
747 return lpInsert(lp,NULL,0,p,LP_REPLACE,newp);
748 }
749
750 /* Return the total number of bytes the listpack is composed of. */
lpBytes(unsigned char * lp)751 uint32_t lpBytes(unsigned char *lp) {
752 return lpGetTotalBytes(lp);
753 }
754
755 /* Seek the specified element and returns the pointer to the seeked element.
756 * Positive indexes specify the zero-based element to seek from the head to
757 * the tail, negative indexes specify elements starting from the tail, where
758 * -1 means the last element, -2 the penultimate and so forth. If the index
759 * is out of range, NULL is returned. */
lpSeek(unsigned char * lp,long index)760 unsigned char *lpSeek(unsigned char *lp, long index) {
761 int forward = 1; /* Seek forward by default. */
762
763 /* We want to seek from left to right or the other way around
764 * depending on the listpack length and the element position.
765 * However if the listpack length cannot be obtained in constant time,
766 * we always seek from left to right. */
767 uint32_t numele = lpGetNumElements(lp);
768 if (numele != LP_HDR_NUMELE_UNKNOWN) {
769 if (index < 0) index = (long)numele+index;
770 if (index < 0) return NULL; /* Index still < 0 means out of range. */
771 if (index >= numele) return NULL; /* Out of range the other side. */
772 /* We want to scan right-to-left if the element we are looking for
773 * is past the half of the listpack. */
774 if (index > numele/2) {
775 forward = 0;
776 /* Left to right scanning always expects a negative index. Convert
777 * our index to negative form. */
778 index -= numele;
779 }
780 } else {
781 /* If the listpack length is unspecified, for negative indexes we
782 * want to always scan left-to-right. */
783 if (index < 0) forward = 0;
784 }
785
786 /* Forward and backward scanning is trivially based on lpNext()/lpPrev(). */
787 if (forward) {
788 unsigned char *ele = lpFirst(lp);
789 while (index > 0 && ele) {
790 ele = lpNext(lp,ele);
791 index--;
792 }
793 return ele;
794 } else {
795 unsigned char *ele = lpLast(lp);
796 while (index < -1 && ele) {
797 ele = lpPrev(lp,ele);
798 index++;
799 }
800 return ele;
801 }
802 }
803
804