xref: /f-stack/app/redis-5.0.5/src/listpack.c (revision 572c4311)
1 /* Listpack -- A lists of strings serialization format
2  *
3  * This file implements the specification you can find at:
4  *
5  *  https://github.com/antirez/listpack
6  *
7  * Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com>
8  * All rights reserved.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions are met:
12  *
13  *   * Redistributions of source code must retain the above copyright notice,
14  *     this list of conditions and the following disclaimer.
15  *   * Redistributions in binary form must reproduce the above copyright
16  *     notice, this list of conditions and the following disclaimer in the
17  *     documentation and/or other materials provided with the distribution.
18  *   * Neither the name of Redis nor the names of its contributors may be used
19  *     to endorse or promote products derived from this software without
20  *     specific prior written permission.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
23  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
26  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
27  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
28  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
29  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
30  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
31  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
32  * POSSIBILITY OF SUCH DAMAGE.
33  */
34 
35 #include <stdint.h>
36 #include <limits.h>
37 #include <sys/types.h>
38 #include <stdlib.h>
39 #include <string.h>
40 #include <stdio.h>
41 
42 #include "listpack.h"
43 #include "listpack_malloc.h"
44 
45 #define LP_HDR_SIZE 6       /* 32 bit total len + 16 bit number of elements. */
46 #define LP_HDR_NUMELE_UNKNOWN UINT16_MAX
47 #define LP_MAX_INT_ENCODING_LEN 9
48 #define LP_MAX_BACKLEN_SIZE 5
49 #define LP_MAX_ENTRY_BACKLEN 34359738367ULL
50 #define LP_ENCODING_INT 0
51 #define LP_ENCODING_STRING 1
52 
53 #define LP_ENCODING_7BIT_UINT 0
54 #define LP_ENCODING_7BIT_UINT_MASK 0x80
55 #define LP_ENCODING_IS_7BIT_UINT(byte) (((byte)&LP_ENCODING_7BIT_UINT_MASK)==LP_ENCODING_7BIT_UINT)
56 
57 #define LP_ENCODING_6BIT_STR 0x80
58 #define LP_ENCODING_6BIT_STR_MASK 0xC0
59 #define LP_ENCODING_IS_6BIT_STR(byte) (((byte)&LP_ENCODING_6BIT_STR_MASK)==LP_ENCODING_6BIT_STR)
60 
61 #define LP_ENCODING_13BIT_INT 0xC0
62 #define LP_ENCODING_13BIT_INT_MASK 0xE0
63 #define LP_ENCODING_IS_13BIT_INT(byte) (((byte)&LP_ENCODING_13BIT_INT_MASK)==LP_ENCODING_13BIT_INT)
64 
65 #define LP_ENCODING_12BIT_STR 0xE0
66 #define LP_ENCODING_12BIT_STR_MASK 0xF0
67 #define LP_ENCODING_IS_12BIT_STR(byte) (((byte)&LP_ENCODING_12BIT_STR_MASK)==LP_ENCODING_12BIT_STR)
68 
69 #define LP_ENCODING_16BIT_INT 0xF1
70 #define LP_ENCODING_16BIT_INT_MASK 0xFF
71 #define LP_ENCODING_IS_16BIT_INT(byte) (((byte)&LP_ENCODING_16BIT_INT_MASK)==LP_ENCODING_16BIT_INT)
72 
73 #define LP_ENCODING_24BIT_INT 0xF2
74 #define LP_ENCODING_24BIT_INT_MASK 0xFF
75 #define LP_ENCODING_IS_24BIT_INT(byte) (((byte)&LP_ENCODING_24BIT_INT_MASK)==LP_ENCODING_24BIT_INT)
76 
77 #define LP_ENCODING_32BIT_INT 0xF3
78 #define LP_ENCODING_32BIT_INT_MASK 0xFF
79 #define LP_ENCODING_IS_32BIT_INT(byte) (((byte)&LP_ENCODING_32BIT_INT_MASK)==LP_ENCODING_32BIT_INT)
80 
81 #define LP_ENCODING_64BIT_INT 0xF4
82 #define LP_ENCODING_64BIT_INT_MASK 0xFF
83 #define LP_ENCODING_IS_64BIT_INT(byte) (((byte)&LP_ENCODING_64BIT_INT_MASK)==LP_ENCODING_64BIT_INT)
84 
85 #define LP_ENCODING_32BIT_STR 0xF0
86 #define LP_ENCODING_32BIT_STR_MASK 0xFF
87 #define LP_ENCODING_IS_32BIT_STR(byte) (((byte)&LP_ENCODING_32BIT_STR_MASK)==LP_ENCODING_32BIT_STR)
88 
89 #define LP_EOF 0xFF
90 
91 #define LP_ENCODING_6BIT_STR_LEN(p) ((p)[0] & 0x3F)
92 #define LP_ENCODING_12BIT_STR_LEN(p) ((((p)[0] & 0xF) << 8) | (p)[1])
93 #define LP_ENCODING_32BIT_STR_LEN(p) (((uint32_t)(p)[1]<<0) | \
94                                       ((uint32_t)(p)[2]<<8) | \
95                                       ((uint32_t)(p)[3]<<16) | \
96                                       ((uint32_t)(p)[4]<<24))
97 
98 #define lpGetTotalBytes(p)           (((uint32_t)(p)[0]<<0) | \
99                                       ((uint32_t)(p)[1]<<8) | \
100                                       ((uint32_t)(p)[2]<<16) | \
101                                       ((uint32_t)(p)[3]<<24))
102 
103 #define lpGetNumElements(p)          (((uint32_t)(p)[4]<<0) | \
104                                       ((uint32_t)(p)[5]<<8))
105 #define lpSetTotalBytes(p,v) do { \
106     (p)[0] = (v)&0xff; \
107     (p)[1] = ((v)>>8)&0xff; \
108     (p)[2] = ((v)>>16)&0xff; \
109     (p)[3] = ((v)>>24)&0xff; \
110 } while(0)
111 
112 #define lpSetNumElements(p,v) do { \
113     (p)[4] = (v)&0xff; \
114     (p)[5] = ((v)>>8)&0xff; \
115 } while(0)
116 
117 /* Convert a string into a signed 64 bit integer.
118  * The function returns 1 if the string could be parsed into a (non-overflowing)
119  * signed 64 bit int, 0 otherwise. The 'value' will be set to the parsed value
120  * when the function returns success.
121  *
122  * Note that this function demands that the string strictly represents
123  * a int64 value: no spaces or other characters before or after the string
124  * representing the number are accepted, nor zeroes at the start if not
125  * for the string "0" representing the zero number.
126  *
127  * Because of its strictness, it is safe to use this function to check if
128  * you can convert a string into a long long, and obtain back the string
129  * from the number without any loss in the string representation. *
130  *
131  * -----------------------------------------------------------------------------
132  *
133  * Credits: this function was adapted from the Redis source code, file
134  * "utils.c", function string2ll(), and is copyright:
135  *
136  * Copyright(C) 2011, Pieter Noordhuis
137  * Copyright(C) 2011, Salvatore Sanfilippo
138  *
139  * The function is released under the BSD 3-clause license.
140  */
lpStringToInt64(const char * s,unsigned long slen,int64_t * value)141 int lpStringToInt64(const char *s, unsigned long slen, int64_t *value) {
142     const char *p = s;
143     unsigned long plen = 0;
144     int negative = 0;
145     uint64_t v;
146 
147     if (plen == slen)
148         return 0;
149 
150     /* Special case: first and only digit is 0. */
151     if (slen == 1 && p[0] == '0') {
152         if (value != NULL) *value = 0;
153         return 1;
154     }
155 
156     if (p[0] == '-') {
157         negative = 1;
158         p++; plen++;
159 
160         /* Abort on only a negative sign. */
161         if (plen == slen)
162             return 0;
163     }
164 
165     /* First digit should be 1-9, otherwise the string should just be 0. */
166     if (p[0] >= '1' && p[0] <= '9') {
167         v = p[0]-'0';
168         p++; plen++;
169     } else if (p[0] == '0' && slen == 1) {
170         *value = 0;
171         return 1;
172     } else {
173         return 0;
174     }
175 
176     while (plen < slen && p[0] >= '0' && p[0] <= '9') {
177         if (v > (UINT64_MAX / 10)) /* Overflow. */
178             return 0;
179         v *= 10;
180 
181         if (v > (UINT64_MAX - (p[0]-'0'))) /* Overflow. */
182             return 0;
183         v += p[0]-'0';
184 
185         p++; plen++;
186     }
187 
188     /* Return if not all bytes were used. */
189     if (plen < slen)
190         return 0;
191 
192     if (negative) {
193         if (v > ((uint64_t)(-(INT64_MIN+1))+1)) /* Overflow. */
194             return 0;
195         if (value != NULL) *value = -v;
196     } else {
197         if (v > INT64_MAX) /* Overflow. */
198             return 0;
199         if (value != NULL) *value = v;
200     }
201     return 1;
202 }
203 
204 /* Create a new, empty listpack.
205  * On success the new listpack is returned, otherwise an error is returned. */
lpNew(void)206 unsigned char *lpNew(void) {
207     unsigned char *lp = lp_malloc(LP_HDR_SIZE+1);
208     if (lp == NULL) return NULL;
209     lpSetTotalBytes(lp,LP_HDR_SIZE+1);
210     lpSetNumElements(lp,0);
211     lp[LP_HDR_SIZE] = LP_EOF;
212     return lp;
213 }
214 
215 /* Free the specified listpack. */
lpFree(unsigned char * lp)216 void lpFree(unsigned char *lp) {
217     lp_free(lp);
218 }
219 
220 /* Given an element 'ele' of size 'size', determine if the element can be
221  * represented inside the listpack encoded as integer, and returns
222  * LP_ENCODING_INT if so. Otherwise returns LP_ENCODING_STR if no integer
223  * encoding is possible.
224  *
225  * If the LP_ENCODING_INT is returned, the function stores the integer encoded
226  * representation of the element in the 'intenc' buffer.
227  *
228  * Regardless of the returned encoding, 'enclen' is populated by reference to
229  * the number of bytes that the string or integer encoded element will require
230  * in order to be represented. */
lpEncodeGetType(unsigned char * ele,uint32_t size,unsigned char * intenc,uint64_t * enclen)231 int lpEncodeGetType(unsigned char *ele, uint32_t size, unsigned char *intenc, uint64_t *enclen) {
232     int64_t v;
233     if (lpStringToInt64((const char*)ele, size, &v)) {
234         if (v >= 0 && v <= 127) {
235             /* Single byte 0-127 integer. */
236             intenc[0] = v;
237             *enclen = 1;
238         } else if (v >= -4096 && v <= 4095) {
239             /* 13 bit integer. */
240             if (v < 0) v = ((int64_t)1<<13)+v;
241             intenc[0] = (v>>8)|LP_ENCODING_13BIT_INT;
242             intenc[1] = v&0xff;
243             *enclen = 2;
244         } else if (v >= -32768 && v <= 32767) {
245             /* 16 bit integer. */
246             if (v < 0) v = ((int64_t)1<<16)+v;
247             intenc[0] = LP_ENCODING_16BIT_INT;
248             intenc[1] = v&0xff;
249             intenc[2] = v>>8;
250             *enclen = 3;
251         } else if (v >= -8388608 && v <= 8388607) {
252             /* 24 bit integer. */
253             if (v < 0) v = ((int64_t)1<<24)+v;
254             intenc[0] = LP_ENCODING_24BIT_INT;
255             intenc[1] = v&0xff;
256             intenc[2] = (v>>8)&0xff;
257             intenc[3] = v>>16;
258             *enclen = 4;
259         } else if (v >= -2147483648 && v <= 2147483647) {
260             /* 32 bit integer. */
261             if (v < 0) v = ((int64_t)1<<32)+v;
262             intenc[0] = LP_ENCODING_32BIT_INT;
263             intenc[1] = v&0xff;
264             intenc[2] = (v>>8)&0xff;
265             intenc[3] = (v>>16)&0xff;
266             intenc[4] = v>>24;
267             *enclen = 5;
268         } else {
269             /* 64 bit integer. */
270             uint64_t uv = v;
271             intenc[0] = LP_ENCODING_64BIT_INT;
272             intenc[1] = uv&0xff;
273             intenc[2] = (uv>>8)&0xff;
274             intenc[3] = (uv>>16)&0xff;
275             intenc[4] = (uv>>24)&0xff;
276             intenc[5] = (uv>>32)&0xff;
277             intenc[6] = (uv>>40)&0xff;
278             intenc[7] = (uv>>48)&0xff;
279             intenc[8] = uv>>56;
280             *enclen = 9;
281         }
282         return LP_ENCODING_INT;
283     } else {
284         if (size < 64) *enclen = 1+size;
285         else if (size < 4096) *enclen = 2+size;
286         else *enclen = 5+size;
287         return LP_ENCODING_STRING;
288     }
289 }
290 
291 /* Store a reverse-encoded variable length field, representing the length
292  * of the previous element of size 'l', in the target buffer 'buf'.
293  * The function returns the number of bytes used to encode it, from
294  * 1 to 5. If 'buf' is NULL the function just returns the number of bytes
295  * needed in order to encode the backlen. */
lpEncodeBacklen(unsigned char * buf,uint64_t l)296 unsigned long lpEncodeBacklen(unsigned char *buf, uint64_t l) {
297     if (l <= 127) {
298         if (buf) buf[0] = l;
299         return 1;
300     } else if (l < 16383) {
301         if (buf) {
302             buf[0] = l>>7;
303             buf[1] = (l&127)|128;
304         }
305         return 2;
306     } else if (l < 2097151) {
307         if (buf) {
308             buf[0] = l>>14;
309             buf[1] = ((l>>7)&127)|128;
310             buf[2] = (l&127)|128;
311         }
312         return 3;
313     } else if (l < 268435455) {
314         if (buf) {
315             buf[0] = l>>21;
316             buf[1] = ((l>>14)&127)|128;
317             buf[2] = ((l>>7)&127)|128;
318             buf[3] = (l&127)|128;
319         }
320         return 4;
321     } else {
322         if (buf) {
323             buf[0] = l>>28;
324             buf[1] = ((l>>21)&127)|128;
325             buf[2] = ((l>>14)&127)|128;
326             buf[3] = ((l>>7)&127)|128;
327             buf[4] = (l&127)|128;
328         }
329         return 5;
330     }
331 }
332 
333 /* Decode the backlen and returns it. If the encoding looks invalid (more than
334  * 5 bytes are used), UINT64_MAX is returned to report the problem. */
lpDecodeBacklen(unsigned char * p)335 uint64_t lpDecodeBacklen(unsigned char *p) {
336     uint64_t val = 0;
337     uint64_t shift = 0;
338     do {
339         val |= (uint64_t)(p[0] & 127) << shift;
340         if (!(p[0] & 128)) break;
341         shift += 7;
342         p--;
343         if (shift > 28) return UINT64_MAX;
344     } while(1);
345     return val;
346 }
347 
348 /* Encode the string element pointed by 's' of size 'len' in the target
349  * buffer 's'. The function should be called with 'buf' having always enough
350  * space for encoding the string. This is done by calling lpEncodeGetType()
351  * before calling this function. */
lpEncodeString(unsigned char * buf,unsigned char * s,uint32_t len)352 void lpEncodeString(unsigned char *buf, unsigned char *s, uint32_t len) {
353     if (len < 64) {
354         buf[0] = len | LP_ENCODING_6BIT_STR;
355         memcpy(buf+1,s,len);
356     } else if (len < 4096) {
357         buf[0] = (len >> 8) | LP_ENCODING_12BIT_STR;
358         buf[1] = len & 0xff;
359         memcpy(buf+2,s,len);
360     } else {
361         buf[0] = LP_ENCODING_32BIT_STR;
362         buf[1] = len & 0xff;
363         buf[2] = (len >> 8) & 0xff;
364         buf[3] = (len >> 16) & 0xff;
365         buf[4] = (len >> 24) & 0xff;
366         memcpy(buf+5,s,len);
367     }
368 }
369 
370 /* Return the encoded length of the listpack element pointed by 'p'. If the
371  * element encoding is wrong then 0 is returned. */
lpCurrentEncodedSize(unsigned char * p)372 uint32_t lpCurrentEncodedSize(unsigned char *p) {
373     if (LP_ENCODING_IS_7BIT_UINT(p[0])) return 1;
374     if (LP_ENCODING_IS_6BIT_STR(p[0])) return 1+LP_ENCODING_6BIT_STR_LEN(p);
375     if (LP_ENCODING_IS_13BIT_INT(p[0])) return 2;
376     if (LP_ENCODING_IS_16BIT_INT(p[0])) return 3;
377     if (LP_ENCODING_IS_24BIT_INT(p[0])) return 4;
378     if (LP_ENCODING_IS_32BIT_INT(p[0])) return 5;
379     if (LP_ENCODING_IS_64BIT_INT(p[0])) return 9;
380     if (LP_ENCODING_IS_12BIT_STR(p[0])) return 2+LP_ENCODING_12BIT_STR_LEN(p);
381     if (LP_ENCODING_IS_32BIT_STR(p[0])) return 5+LP_ENCODING_32BIT_STR_LEN(p);
382     if (p[0] == LP_EOF) return 1;
383     return 0;
384 }
385 
386 /* Skip the current entry returning the next. It is invalid to call this
387  * function if the current element is the EOF element at the end of the
388  * listpack, however, while this function is used to implement lpNext(),
389  * it does not return NULL when the EOF element is encountered. */
lpSkip(unsigned char * p)390 unsigned char *lpSkip(unsigned char *p) {
391     unsigned long entrylen = lpCurrentEncodedSize(p);
392     entrylen += lpEncodeBacklen(NULL,entrylen);
393     p += entrylen;
394     return p;
395 }
396 
397 /* If 'p' points to an element of the listpack, calling lpNext() will return
398  * the pointer to the next element (the one on the right), or NULL if 'p'
399  * already pointed to the last element of the listpack. */
lpNext(unsigned char * lp,unsigned char * p)400 unsigned char *lpNext(unsigned char *lp, unsigned char *p) {
401     ((void) lp); /* lp is not used for now. However lpPrev() uses it. */
402     p = lpSkip(p);
403     if (p[0] == LP_EOF) return NULL;
404     return p;
405 }
406 
407 /* If 'p' points to an element of the listpack, calling lpPrev() will return
408  * the pointer to the preivous element (the one on the left), or NULL if 'p'
409  * already pointed to the first element of the listpack. */
lpPrev(unsigned char * lp,unsigned char * p)410 unsigned char *lpPrev(unsigned char *lp, unsigned char *p) {
411     if (p-lp == LP_HDR_SIZE) return NULL;
412     p--; /* Seek the first backlen byte of the last element. */
413     uint64_t prevlen = lpDecodeBacklen(p);
414     prevlen += lpEncodeBacklen(NULL,prevlen);
415     return p-prevlen+1; /* Seek the first byte of the previous entry. */
416 }
417 
418 /* Return a pointer to the first element of the listpack, or NULL if the
419  * listpack has no elements. */
lpFirst(unsigned char * lp)420 unsigned char *lpFirst(unsigned char *lp) {
421     lp += LP_HDR_SIZE; /* Skip the header. */
422     if (lp[0] == LP_EOF) return NULL;
423     return lp;
424 }
425 
426 /* Return a pointer to the last element of the listpack, or NULL if the
427  * listpack has no elements. */
lpLast(unsigned char * lp)428 unsigned char *lpLast(unsigned char *lp) {
429     unsigned char *p = lp+lpGetTotalBytes(lp)-1; /* Seek EOF element. */
430     return lpPrev(lp,p); /* Will return NULL if EOF is the only element. */
431 }
432 
433 /* Return the number of elements inside the listpack. This function attempts
434  * to use the cached value when within range, otherwise a full scan is
435  * needed. As a side effect of calling this function, the listpack header
436  * could be modified, because if the count is found to be already within
437  * the 'numele' header field range, the new value is set. */
lpLength(unsigned char * lp)438 uint32_t lpLength(unsigned char *lp) {
439     uint32_t numele = lpGetNumElements(lp);
440     if (numele != LP_HDR_NUMELE_UNKNOWN) return numele;
441 
442     /* Too many elements inside the listpack. We need to scan in order
443      * to get the total number. */
444     uint32_t count = 0;
445     unsigned char *p = lpFirst(lp);
446     while(p) {
447         count++;
448         p = lpNext(lp,p);
449     }
450 
451     /* If the count is again within range of the header numele field,
452      * set it. */
453     if (count < LP_HDR_NUMELE_UNKNOWN) lpSetNumElements(lp,count);
454     return count;
455 }
456 
457 /* Return the listpack element pointed by 'p'.
458  *
459  * The function changes behavior depending on the passed 'intbuf' value.
460  * Specifically, if 'intbuf' is NULL:
461  *
462  * If the element is internally encoded as an integer, the function returns
463  * NULL and populates the integer value by reference in 'count'. Otherwise if
464  * the element is encoded as a string a pointer to the string (pointing inside
465  * the listpack itself) is returned, and 'count' is set to the length of the
466  * string.
467  *
468  * If instead 'intbuf' points to a buffer passed by the caller, that must be
469  * at least LP_INTBUF_SIZE bytes, the function always returns the element as
470  * it was a string (returning the pointer to the string and setting the
471  * 'count' argument to the string length by reference). However if the element
472  * is encoded as an integer, the 'intbuf' buffer is used in order to store
473  * the string representation.
474  *
475  * The user should use one or the other form depending on what the value will
476  * be used for. If there is immediate usage for an integer value returned
477  * by the function, than to pass a buffer (and convert it back to a number)
478  * is of course useless.
479  *
480  * If the function is called against a badly encoded ziplist, so that there
481  * is no valid way to parse it, the function returns like if there was an
482  * integer encoded with value 12345678900000000 + <unrecognized byte>, this may
483  * be an hint to understand that something is wrong. To crash in this case is
484  * not sensible because of the different requirements of the application using
485  * this lib.
486  *
487  * Similarly, there is no error returned since the listpack normally can be
488  * assumed to be valid, so that would be a very high API cost. However a function
489  * in order to check the integrity of the listpack at load time is provided,
490  * check lpIsValid(). */
lpGet(unsigned char * p,int64_t * count,unsigned char * intbuf)491 unsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf) {
492     int64_t val;
493     uint64_t uval, negstart, negmax;
494 
495     if (LP_ENCODING_IS_7BIT_UINT(p[0])) {
496         negstart = UINT64_MAX; /* 7 bit ints are always positive. */
497         negmax = 0;
498         uval = p[0] & 0x7f;
499     } else if (LP_ENCODING_IS_6BIT_STR(p[0])) {
500         *count = LP_ENCODING_6BIT_STR_LEN(p);
501         return p+1;
502     } else if (LP_ENCODING_IS_13BIT_INT(p[0])) {
503         uval = ((p[0]&0x1f)<<8) | p[1];
504         negstart = (uint64_t)1<<12;
505         negmax = 8191;
506     } else if (LP_ENCODING_IS_16BIT_INT(p[0])) {
507         uval = (uint64_t)p[1] |
508                (uint64_t)p[2]<<8;
509         negstart = (uint64_t)1<<15;
510         negmax = UINT16_MAX;
511     } else if (LP_ENCODING_IS_24BIT_INT(p[0])) {
512         uval = (uint64_t)p[1] |
513                (uint64_t)p[2]<<8 |
514                (uint64_t)p[3]<<16;
515         negstart = (uint64_t)1<<23;
516         negmax = UINT32_MAX>>8;
517     } else if (LP_ENCODING_IS_32BIT_INT(p[0])) {
518         uval = (uint64_t)p[1] |
519                (uint64_t)p[2]<<8 |
520                (uint64_t)p[3]<<16 |
521                (uint64_t)p[4]<<24;
522         negstart = (uint64_t)1<<31;
523         negmax = UINT32_MAX;
524     } else if (LP_ENCODING_IS_64BIT_INT(p[0])) {
525         uval = (uint64_t)p[1] |
526                (uint64_t)p[2]<<8 |
527                (uint64_t)p[3]<<16 |
528                (uint64_t)p[4]<<24 |
529                (uint64_t)p[5]<<32 |
530                (uint64_t)p[6]<<40 |
531                (uint64_t)p[7]<<48 |
532                (uint64_t)p[8]<<56;
533         negstart = (uint64_t)1<<63;
534         negmax = UINT64_MAX;
535     } else if (LP_ENCODING_IS_12BIT_STR(p[0])) {
536         *count = LP_ENCODING_12BIT_STR_LEN(p);
537         return p+2;
538     } else if (LP_ENCODING_IS_32BIT_STR(p[0])) {
539         *count = LP_ENCODING_32BIT_STR_LEN(p);
540         return p+5;
541     } else {
542         uval = 12345678900000000ULL + p[0];
543         negstart = UINT64_MAX;
544         negmax = 0;
545     }
546 
547     /* We reach this code path only for integer encodings.
548      * Convert the unsigned value to the signed one using two's complement
549      * rule. */
550     if (uval >= negstart) {
551         /* This three steps conversion should avoid undefined behaviors
552          * in the unsigned -> signed conversion. */
553         uval = negmax-uval;
554         val = uval;
555         val = -val-1;
556     } else {
557         val = uval;
558     }
559 
560     /* Return the string representation of the integer or the value itself
561      * depending on intbuf being NULL or not. */
562     if (intbuf) {
563         *count = snprintf((char*)intbuf,LP_INTBUF_SIZE,"%lld",(long long)val);
564         return intbuf;
565     } else {
566         *count = val;
567         return NULL;
568     }
569 }
570 
571 /* Insert, delete or replace the specified element 'ele' of length 'len' at
572  * the specified position 'p', with 'p' being a listpack element pointer
573  * obtained with lpFirst(), lpLast(), lpIndex(), lpNext(), lpPrev() or
574  * lpSeek().
575  *
576  * The element is inserted before, after, or replaces the element pointed
577  * by 'p' depending on the 'where' argument, that can be LP_BEFORE, LP_AFTER
578  * or LP_REPLACE.
579  *
580  * If 'ele' is set to NULL, the function removes the element pointed by 'p'
581  * instead of inserting one.
582  *
583  * Returns NULL on out of memory or when the listpack total length would exceed
584  * the max allowed size of 2^32-1, otherwise the new pointer to the listpack
585  * holding the new element is returned (and the old pointer passed is no longer
586  * considered valid)
587  *
588  * If 'newp' is not NULL, at the end of a successful call '*newp' will be set
589  * to the address of the element just added, so that it will be possible to
590  * continue an interation with lpNext() and lpPrev().
591  *
592  * For deletion operations ('ele' set to NULL) 'newp' is set to the next
593  * element, on the right of the deleted one, or to NULL if the deleted element
594  * was the last one. */
lpInsert(unsigned char * lp,unsigned char * ele,uint32_t size,unsigned char * p,int where,unsigned char ** newp)595 unsigned char *lpInsert(unsigned char *lp, unsigned char *ele, uint32_t size, unsigned char *p, int where, unsigned char **newp) {
596     unsigned char intenc[LP_MAX_INT_ENCODING_LEN];
597     unsigned char backlen[LP_MAX_BACKLEN_SIZE];
598 
599     uint64_t enclen; /* The length of the encoded element. */
600 
601     /* An element pointer set to NULL means deletion, which is conceptually
602      * replacing the element with a zero-length element. So whatever we
603      * get passed as 'where', set it to LP_REPLACE. */
604     if (ele == NULL) where = LP_REPLACE;
605 
606     /* If we need to insert after the current element, we just jump to the
607      * next element (that could be the EOF one) and handle the case of
608      * inserting before. So the function will actually deal with just two
609      * cases: LP_BEFORE and LP_REPLACE. */
610     if (where == LP_AFTER) {
611         p = lpSkip(p);
612         where = LP_BEFORE;
613     }
614 
615     /* Store the offset of the element 'p', so that we can obtain its
616      * address again after a reallocation. */
617     unsigned long poff = p-lp;
618 
619     /* Calling lpEncodeGetType() results into the encoded version of the
620      * element to be stored into 'intenc' in case it is representable as
621      * an integer: in that case, the function returns LP_ENCODING_INT.
622      * Otherwise if LP_ENCODING_STR is returned, we'll have to call
623      * lpEncodeString() to actually write the encoded string on place later.
624      *
625      * Whatever the returned encoding is, 'enclen' is populated with the
626      * length of the encoded element. */
627     int enctype;
628     if (ele) {
629         enctype = lpEncodeGetType(ele,size,intenc,&enclen);
630     } else {
631         enctype = -1;
632         enclen = 0;
633     }
634 
635     /* We need to also encode the backward-parsable length of the element
636      * and append it to the end: this allows to traverse the listpack from
637      * the end to the start. */
638     unsigned long backlen_size = ele ? lpEncodeBacklen(backlen,enclen) : 0;
639     uint64_t old_listpack_bytes = lpGetTotalBytes(lp);
640     uint32_t replaced_len  = 0;
641     if (where == LP_REPLACE) {
642         replaced_len = lpCurrentEncodedSize(p);
643         replaced_len += lpEncodeBacklen(NULL,replaced_len);
644     }
645 
646     uint64_t new_listpack_bytes = old_listpack_bytes + enclen + backlen_size
647                                   - replaced_len;
648     if (new_listpack_bytes > UINT32_MAX) return NULL;
649 
650     /* We now need to reallocate in order to make space or shrink the
651      * allocation (in case 'when' value is LP_REPLACE and the new element is
652      * smaller). However we do that before memmoving the memory to
653      * make room for the new element if the final allocation will get
654      * larger, or we do it after if the final allocation will get smaller. */
655 
656     unsigned char *dst = lp + poff; /* May be updated after reallocation. */
657 
658     /* Realloc before: we need more room. */
659     if (new_listpack_bytes > old_listpack_bytes) {
660         if ((lp = lp_realloc(lp,new_listpack_bytes)) == NULL) return NULL;
661         dst = lp + poff;
662     }
663 
664     /* Setup the listpack relocating the elements to make the exact room
665      * we need to store the new one. */
666     if (where == LP_BEFORE) {
667         memmove(dst+enclen+backlen_size,dst,old_listpack_bytes-poff);
668     } else { /* LP_REPLACE. */
669         long lendiff = (enclen+backlen_size)-replaced_len;
670         memmove(dst+replaced_len+lendiff,
671                 dst+replaced_len,
672                 old_listpack_bytes-poff-replaced_len);
673     }
674 
675     /* Realloc after: we need to free space. */
676     if (new_listpack_bytes < old_listpack_bytes) {
677         if ((lp = lp_realloc(lp,new_listpack_bytes)) == NULL) return NULL;
678         dst = lp + poff;
679     }
680 
681     /* Store the entry. */
682     if (newp) {
683         *newp = dst;
684         /* In case of deletion, set 'newp' to NULL if the next element is
685          * the EOF element. */
686         if (!ele && dst[0] == LP_EOF) *newp = NULL;
687     }
688     if (ele) {
689         if (enctype == LP_ENCODING_INT) {
690             memcpy(dst,intenc,enclen);
691         } else {
692             lpEncodeString(dst,ele,size);
693         }
694         dst += enclen;
695         memcpy(dst,backlen,backlen_size);
696         dst += backlen_size;
697     }
698 
699     /* Update header. */
700     if (where != LP_REPLACE || ele == NULL) {
701         uint32_t num_elements = lpGetNumElements(lp);
702         if (num_elements != LP_HDR_NUMELE_UNKNOWN) {
703             if (ele)
704                 lpSetNumElements(lp,num_elements+1);
705             else
706                 lpSetNumElements(lp,num_elements-1);
707         }
708     }
709     lpSetTotalBytes(lp,new_listpack_bytes);
710 
711 #if 0
712     /* This code path is normally disabled: what it does is to force listpack
713      * to return *always* a new pointer after performing some modification to
714      * the listpack, even if the previous allocation was enough. This is useful
715      * in order to spot bugs in code using listpacks: by doing so we can find
716      * if the caller forgets to set the new pointer where the listpack reference
717      * is stored, after an update. */
718     unsigned char *oldlp = lp;
719     lp = lp_malloc(new_listpack_bytes);
720     memcpy(lp,oldlp,new_listpack_bytes);
721     if (newp) {
722         unsigned long offset = (*newp)-oldlp;
723         *newp = lp + offset;
724     }
725     /* Make sure the old allocation contains garbage. */
726     memset(oldlp,'A',new_listpack_bytes);
727     lp_free(oldlp);
728 #endif
729 
730     return lp;
731 }
732 
733 /* Append the specified element 'ele' of length 'len' at the end of the
734  * listpack. It is implemented in terms of lpInsert(), so the return value is
735  * the same as lpInsert(). */
lpAppend(unsigned char * lp,unsigned char * ele,uint32_t size)736 unsigned char *lpAppend(unsigned char *lp, unsigned char *ele, uint32_t size) {
737     uint64_t listpack_bytes = lpGetTotalBytes(lp);
738     unsigned char *eofptr = lp + listpack_bytes - 1;
739     return lpInsert(lp,ele,size,eofptr,LP_BEFORE,NULL);
740 }
741 
742 /* Remove the element pointed by 'p', and return the resulting listpack.
743  * If 'newp' is not NULL, the next element pointer (to the right of the
744  * deleted one) is returned by reference. If the deleted element was the
745  * last one, '*newp' is set to NULL. */
lpDelete(unsigned char * lp,unsigned char * p,unsigned char ** newp)746 unsigned char *lpDelete(unsigned char *lp, unsigned char *p, unsigned char **newp) {
747     return lpInsert(lp,NULL,0,p,LP_REPLACE,newp);
748 }
749 
750 /* Return the total number of bytes the listpack is composed of. */
lpBytes(unsigned char * lp)751 uint32_t lpBytes(unsigned char *lp) {
752     return lpGetTotalBytes(lp);
753 }
754 
755 /* Seek the specified element and returns the pointer to the seeked element.
756  * Positive indexes specify the zero-based element to seek from the head to
757  * the tail, negative indexes specify elements starting from the tail, where
758  * -1 means the last element, -2 the penultimate and so forth. If the index
759  * is out of range, NULL is returned. */
lpSeek(unsigned char * lp,long index)760 unsigned char *lpSeek(unsigned char *lp, long index) {
761     int forward = 1; /* Seek forward by default. */
762 
763     /* We want to seek from left to right or the other way around
764      * depending on the listpack length and the element position.
765      * However if the listpack length cannot be obtained in constant time,
766      * we always seek from left to right. */
767     uint32_t numele = lpGetNumElements(lp);
768     if (numele != LP_HDR_NUMELE_UNKNOWN) {
769         if (index < 0) index = (long)numele+index;
770         if (index < 0) return NULL; /* Index still < 0 means out of range. */
771         if (index >= numele) return NULL; /* Out of range the other side. */
772         /* We want to scan right-to-left if the element we are looking for
773          * is past the half of the listpack. */
774         if (index > numele/2) {
775             forward = 0;
776             /* Left to right scanning always expects a negative index. Convert
777              * our index to negative form. */
778             index -= numele;
779         }
780     } else {
781         /* If the listpack length is unspecified, for negative indexes we
782          * want to always scan left-to-right. */
783         if (index < 0) forward = 0;
784     }
785 
786     /* Forward and backward scanning is trivially based on lpNext()/lpPrev(). */
787     if (forward) {
788         unsigned char *ele = lpFirst(lp);
789         while (index > 0 && ele) {
790             ele = lpNext(lp,ele);
791             index--;
792         }
793         return ele;
794     } else {
795         unsigned char *ele = lpLast(lp);
796         while (index < -1 && ele) {
797             ele = lpPrev(lp,ele);
798             index++;
799         }
800         return ele;
801     }
802 }
803 
804