1 /* Listpack -- A lists of strings serialization format 2 * 3 * This file implements the specification you can find at: 4 * 5 * https://github.com/antirez/listpack 6 * 7 * Copyright (c) 2017, Salvatore Sanfilippo <antirez at gmail dot com> 8 * All rights reserved. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions are met: 12 * 13 * * Redistributions of source code must retain the above copyright notice, 14 * this list of conditions and the following disclaimer. 15 * * Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * * Neither the name of Redis nor the names of its contributors may be used 19 * to endorse or promote products derived from this software without 20 * specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 23 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 26 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 27 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 28 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 29 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 30 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 31 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 32 * POSSIBILITY OF SUCH DAMAGE. 33 */ 34 35 #include <stdint.h> 36 #include <limits.h> 37 #include <sys/types.h> 38 #include <stdlib.h> 39 #include <string.h> 40 #include <stdio.h> 41 42 #include "listpack.h" 43 #include "listpack_malloc.h" 44 45 #define LP_HDR_SIZE 6 /* 32 bit total len + 16 bit number of elements. */ 46 #define LP_HDR_NUMELE_UNKNOWN UINT16_MAX 47 #define LP_MAX_INT_ENCODING_LEN 9 48 #define LP_MAX_BACKLEN_SIZE 5 49 #define LP_MAX_ENTRY_BACKLEN 34359738367ULL 50 #define LP_ENCODING_INT 0 51 #define LP_ENCODING_STRING 1 52 53 #define LP_ENCODING_7BIT_UINT 0 54 #define LP_ENCODING_7BIT_UINT_MASK 0x80 55 #define LP_ENCODING_IS_7BIT_UINT(byte) (((byte)&LP_ENCODING_7BIT_UINT_MASK)==LP_ENCODING_7BIT_UINT) 56 57 #define LP_ENCODING_6BIT_STR 0x80 58 #define LP_ENCODING_6BIT_STR_MASK 0xC0 59 #define LP_ENCODING_IS_6BIT_STR(byte) (((byte)&LP_ENCODING_6BIT_STR_MASK)==LP_ENCODING_6BIT_STR) 60 61 #define LP_ENCODING_13BIT_INT 0xC0 62 #define LP_ENCODING_13BIT_INT_MASK 0xE0 63 #define LP_ENCODING_IS_13BIT_INT(byte) (((byte)&LP_ENCODING_13BIT_INT_MASK)==LP_ENCODING_13BIT_INT) 64 65 #define LP_ENCODING_12BIT_STR 0xE0 66 #define LP_ENCODING_12BIT_STR_MASK 0xF0 67 #define LP_ENCODING_IS_12BIT_STR(byte) (((byte)&LP_ENCODING_12BIT_STR_MASK)==LP_ENCODING_12BIT_STR) 68 69 #define LP_ENCODING_16BIT_INT 0xF1 70 #define LP_ENCODING_16BIT_INT_MASK 0xFF 71 #define LP_ENCODING_IS_16BIT_INT(byte) (((byte)&LP_ENCODING_16BIT_INT_MASK)==LP_ENCODING_16BIT_INT) 72 73 #define LP_ENCODING_24BIT_INT 0xF2 74 #define LP_ENCODING_24BIT_INT_MASK 0xFF 75 #define LP_ENCODING_IS_24BIT_INT(byte) (((byte)&LP_ENCODING_24BIT_INT_MASK)==LP_ENCODING_24BIT_INT) 76 77 #define LP_ENCODING_32BIT_INT 0xF3 78 #define LP_ENCODING_32BIT_INT_MASK 0xFF 79 #define LP_ENCODING_IS_32BIT_INT(byte) (((byte)&LP_ENCODING_32BIT_INT_MASK)==LP_ENCODING_32BIT_INT) 80 81 #define LP_ENCODING_64BIT_INT 0xF4 82 #define LP_ENCODING_64BIT_INT_MASK 0xFF 83 #define LP_ENCODING_IS_64BIT_INT(byte) (((byte)&LP_ENCODING_64BIT_INT_MASK)==LP_ENCODING_64BIT_INT) 84 85 #define LP_ENCODING_32BIT_STR 0xF0 86 #define LP_ENCODING_32BIT_STR_MASK 0xFF 87 #define LP_ENCODING_IS_32BIT_STR(byte) (((byte)&LP_ENCODING_32BIT_STR_MASK)==LP_ENCODING_32BIT_STR) 88 89 #define LP_EOF 0xFF 90 91 #define LP_ENCODING_6BIT_STR_LEN(p) ((p)[0] & 0x3F) 92 #define LP_ENCODING_12BIT_STR_LEN(p) ((((p)[0] & 0xF) << 8) | (p)[1]) 93 #define LP_ENCODING_32BIT_STR_LEN(p) (((uint32_t)(p)[1]<<0) | \ 94 ((uint32_t)(p)[2]<<8) | \ 95 ((uint32_t)(p)[3]<<16) | \ 96 ((uint32_t)(p)[4]<<24)) 97 98 #define lpGetTotalBytes(p) (((uint32_t)(p)[0]<<0) | \ 99 ((uint32_t)(p)[1]<<8) | \ 100 ((uint32_t)(p)[2]<<16) | \ 101 ((uint32_t)(p)[3]<<24)) 102 103 #define lpGetNumElements(p) (((uint32_t)(p)[4]<<0) | \ 104 ((uint32_t)(p)[5]<<8)) 105 #define lpSetTotalBytes(p,v) do { \ 106 (p)[0] = (v)&0xff; \ 107 (p)[1] = ((v)>>8)&0xff; \ 108 (p)[2] = ((v)>>16)&0xff; \ 109 (p)[3] = ((v)>>24)&0xff; \ 110 } while(0) 111 112 #define lpSetNumElements(p,v) do { \ 113 (p)[4] = (v)&0xff; \ 114 (p)[5] = ((v)>>8)&0xff; \ 115 } while(0) 116 117 /* Convert a string into a signed 64 bit integer. 118 * The function returns 1 if the string could be parsed into a (non-overflowing) 119 * signed 64 bit int, 0 otherwise. The 'value' will be set to the parsed value 120 * when the function returns success. 121 * 122 * Note that this function demands that the string strictly represents 123 * a int64 value: no spaces or other characters before or after the string 124 * representing the number are accepted, nor zeroes at the start if not 125 * for the string "0" representing the zero number. 126 * 127 * Because of its strictness, it is safe to use this function to check if 128 * you can convert a string into a long long, and obtain back the string 129 * from the number without any loss in the string representation. * 130 * 131 * ----------------------------------------------------------------------------- 132 * 133 * Credits: this function was adapted from the Redis source code, file 134 * "utils.c", function string2ll(), and is copyright: 135 * 136 * Copyright(C) 2011, Pieter Noordhuis 137 * Copyright(C) 2011, Salvatore Sanfilippo 138 * 139 * The function is released under the BSD 3-clause license. 140 */ 141 int lpStringToInt64(const char *s, unsigned long slen, int64_t *value) { 142 const char *p = s; 143 unsigned long plen = 0; 144 int negative = 0; 145 uint64_t v; 146 147 if (plen == slen) 148 return 0; 149 150 /* Special case: first and only digit is 0. */ 151 if (slen == 1 && p[0] == '0') { 152 if (value != NULL) *value = 0; 153 return 1; 154 } 155 156 if (p[0] == '-') { 157 negative = 1; 158 p++; plen++; 159 160 /* Abort on only a negative sign. */ 161 if (plen == slen) 162 return 0; 163 } 164 165 /* First digit should be 1-9, otherwise the string should just be 0. */ 166 if (p[0] >= '1' && p[0] <= '9') { 167 v = p[0]-'0'; 168 p++; plen++; 169 } else if (p[0] == '0' && slen == 1) { 170 *value = 0; 171 return 1; 172 } else { 173 return 0; 174 } 175 176 while (plen < slen && p[0] >= '0' && p[0] <= '9') { 177 if (v > (UINT64_MAX / 10)) /* Overflow. */ 178 return 0; 179 v *= 10; 180 181 if (v > (UINT64_MAX - (p[0]-'0'))) /* Overflow. */ 182 return 0; 183 v += p[0]-'0'; 184 185 p++; plen++; 186 } 187 188 /* Return if not all bytes were used. */ 189 if (plen < slen) 190 return 0; 191 192 if (negative) { 193 if (v > ((uint64_t)(-(INT64_MIN+1))+1)) /* Overflow. */ 194 return 0; 195 if (value != NULL) *value = -v; 196 } else { 197 if (v > INT64_MAX) /* Overflow. */ 198 return 0; 199 if (value != NULL) *value = v; 200 } 201 return 1; 202 } 203 204 /* Create a new, empty listpack. 205 * On success the new listpack is returned, otherwise an error is returned. */ 206 unsigned char *lpNew(void) { 207 unsigned char *lp = lp_malloc(LP_HDR_SIZE+1); 208 if (lp == NULL) return NULL; 209 lpSetTotalBytes(lp,LP_HDR_SIZE+1); 210 lpSetNumElements(lp,0); 211 lp[LP_HDR_SIZE] = LP_EOF; 212 return lp; 213 } 214 215 /* Free the specified listpack. */ 216 void lpFree(unsigned char *lp) { 217 lp_free(lp); 218 } 219 220 /* Given an element 'ele' of size 'size', determine if the element can be 221 * represented inside the listpack encoded as integer, and returns 222 * LP_ENCODING_INT if so. Otherwise returns LP_ENCODING_STR if no integer 223 * encoding is possible. 224 * 225 * If the LP_ENCODING_INT is returned, the function stores the integer encoded 226 * representation of the element in the 'intenc' buffer. 227 * 228 * Regardless of the returned encoding, 'enclen' is populated by reference to 229 * the number of bytes that the string or integer encoded element will require 230 * in order to be represented. */ 231 int lpEncodeGetType(unsigned char *ele, uint32_t size, unsigned char *intenc, uint64_t *enclen) { 232 int64_t v; 233 if (lpStringToInt64((const char*)ele, size, &v)) { 234 if (v >= 0 && v <= 127) { 235 /* Single byte 0-127 integer. */ 236 intenc[0] = v; 237 *enclen = 1; 238 } else if (v >= -4096 && v <= 4095) { 239 /* 13 bit integer. */ 240 if (v < 0) v = ((int64_t)1<<13)+v; 241 intenc[0] = (v>>8)|LP_ENCODING_13BIT_INT; 242 intenc[1] = v&0xff; 243 *enclen = 2; 244 } else if (v >= -32768 && v <= 32767) { 245 /* 16 bit integer. */ 246 if (v < 0) v = ((int64_t)1<<16)+v; 247 intenc[0] = LP_ENCODING_16BIT_INT; 248 intenc[1] = v&0xff; 249 intenc[2] = v>>8; 250 *enclen = 3; 251 } else if (v >= -8388608 && v <= 8388607) { 252 /* 24 bit integer. */ 253 if (v < 0) v = ((int64_t)1<<24)+v; 254 intenc[0] = LP_ENCODING_24BIT_INT; 255 intenc[1] = v&0xff; 256 intenc[2] = (v>>8)&0xff; 257 intenc[3] = v>>16; 258 *enclen = 4; 259 } else if (v >= -2147483648 && v <= 2147483647) { 260 /* 32 bit integer. */ 261 if (v < 0) v = ((int64_t)1<<32)+v; 262 intenc[0] = LP_ENCODING_32BIT_INT; 263 intenc[1] = v&0xff; 264 intenc[2] = (v>>8)&0xff; 265 intenc[3] = (v>>16)&0xff; 266 intenc[4] = v>>24; 267 *enclen = 5; 268 } else { 269 /* 64 bit integer. */ 270 uint64_t uv = v; 271 intenc[0] = LP_ENCODING_64BIT_INT; 272 intenc[1] = uv&0xff; 273 intenc[2] = (uv>>8)&0xff; 274 intenc[3] = (uv>>16)&0xff; 275 intenc[4] = (uv>>24)&0xff; 276 intenc[5] = (uv>>32)&0xff; 277 intenc[6] = (uv>>40)&0xff; 278 intenc[7] = (uv>>48)&0xff; 279 intenc[8] = uv>>56; 280 *enclen = 9; 281 } 282 return LP_ENCODING_INT; 283 } else { 284 if (size < 64) *enclen = 1+size; 285 else if (size < 4096) *enclen = 2+size; 286 else *enclen = 5+size; 287 return LP_ENCODING_STRING; 288 } 289 } 290 291 /* Store a reverse-encoded variable length field, representing the length 292 * of the previous element of size 'l', in the target buffer 'buf'. 293 * The function returns the number of bytes used to encode it, from 294 * 1 to 5. If 'buf' is NULL the function just returns the number of bytes 295 * needed in order to encode the backlen. */ 296 unsigned long lpEncodeBacklen(unsigned char *buf, uint64_t l) { 297 if (l <= 127) { 298 if (buf) buf[0] = l; 299 return 1; 300 } else if (l < 16383) { 301 if (buf) { 302 buf[0] = l>>7; 303 buf[1] = (l&127)|128; 304 } 305 return 2; 306 } else if (l < 2097151) { 307 if (buf) { 308 buf[0] = l>>14; 309 buf[1] = ((l>>7)&127)|128; 310 buf[2] = (l&127)|128; 311 } 312 return 3; 313 } else if (l < 268435455) { 314 if (buf) { 315 buf[0] = l>>21; 316 buf[1] = ((l>>14)&127)|128; 317 buf[2] = ((l>>7)&127)|128; 318 buf[3] = (l&127)|128; 319 } 320 return 4; 321 } else { 322 if (buf) { 323 buf[0] = l>>28; 324 buf[1] = ((l>>21)&127)|128; 325 buf[2] = ((l>>14)&127)|128; 326 buf[3] = ((l>>7)&127)|128; 327 buf[4] = (l&127)|128; 328 } 329 return 5; 330 } 331 } 332 333 /* Decode the backlen and returns it. If the encoding looks invalid (more than 334 * 5 bytes are used), UINT64_MAX is returned to report the problem. */ 335 uint64_t lpDecodeBacklen(unsigned char *p) { 336 uint64_t val = 0; 337 uint64_t shift = 0; 338 do { 339 val |= (uint64_t)(p[0] & 127) << shift; 340 if (!(p[0] & 128)) break; 341 shift += 7; 342 p--; 343 if (shift > 28) return UINT64_MAX; 344 } while(1); 345 return val; 346 } 347 348 /* Encode the string element pointed by 's' of size 'len' in the target 349 * buffer 's'. The function should be called with 'buf' having always enough 350 * space for encoding the string. This is done by calling lpEncodeGetType() 351 * before calling this function. */ 352 void lpEncodeString(unsigned char *buf, unsigned char *s, uint32_t len) { 353 if (len < 64) { 354 buf[0] = len | LP_ENCODING_6BIT_STR; 355 memcpy(buf+1,s,len); 356 } else if (len < 4096) { 357 buf[0] = (len >> 8) | LP_ENCODING_12BIT_STR; 358 buf[1] = len & 0xff; 359 memcpy(buf+2,s,len); 360 } else { 361 buf[0] = LP_ENCODING_32BIT_STR; 362 buf[1] = len & 0xff; 363 buf[2] = (len >> 8) & 0xff; 364 buf[3] = (len >> 16) & 0xff; 365 buf[4] = (len >> 24) & 0xff; 366 memcpy(buf+5,s,len); 367 } 368 } 369 370 /* Return the encoded length of the listpack element pointed by 'p'. If the 371 * element encoding is wrong then 0 is returned. */ 372 uint32_t lpCurrentEncodedSize(unsigned char *p) { 373 if (LP_ENCODING_IS_7BIT_UINT(p[0])) return 1; 374 if (LP_ENCODING_IS_6BIT_STR(p[0])) return 1+LP_ENCODING_6BIT_STR_LEN(p); 375 if (LP_ENCODING_IS_13BIT_INT(p[0])) return 2; 376 if (LP_ENCODING_IS_16BIT_INT(p[0])) return 3; 377 if (LP_ENCODING_IS_24BIT_INT(p[0])) return 4; 378 if (LP_ENCODING_IS_32BIT_INT(p[0])) return 5; 379 if (LP_ENCODING_IS_64BIT_INT(p[0])) return 9; 380 if (LP_ENCODING_IS_12BIT_STR(p[0])) return 2+LP_ENCODING_12BIT_STR_LEN(p); 381 if (LP_ENCODING_IS_32BIT_STR(p[0])) return 5+LP_ENCODING_32BIT_STR_LEN(p); 382 if (p[0] == LP_EOF) return 1; 383 return 0; 384 } 385 386 /* Skip the current entry returning the next. It is invalid to call this 387 * function if the current element is the EOF element at the end of the 388 * listpack, however, while this function is used to implement lpNext(), 389 * it does not return NULL when the EOF element is encountered. */ 390 unsigned char *lpSkip(unsigned char *p) { 391 unsigned long entrylen = lpCurrentEncodedSize(p); 392 entrylen += lpEncodeBacklen(NULL,entrylen); 393 p += entrylen; 394 return p; 395 } 396 397 /* If 'p' points to an element of the listpack, calling lpNext() will return 398 * the pointer to the next element (the one on the right), or NULL if 'p' 399 * already pointed to the last element of the listpack. */ 400 unsigned char *lpNext(unsigned char *lp, unsigned char *p) { 401 ((void) lp); /* lp is not used for now. However lpPrev() uses it. */ 402 p = lpSkip(p); 403 if (p[0] == LP_EOF) return NULL; 404 return p; 405 } 406 407 /* If 'p' points to an element of the listpack, calling lpPrev() will return 408 * the pointer to the preivous element (the one on the left), or NULL if 'p' 409 * already pointed to the first element of the listpack. */ 410 unsigned char *lpPrev(unsigned char *lp, unsigned char *p) { 411 if (p-lp == LP_HDR_SIZE) return NULL; 412 p--; /* Seek the first backlen byte of the last element. */ 413 uint64_t prevlen = lpDecodeBacklen(p); 414 prevlen += lpEncodeBacklen(NULL,prevlen); 415 return p-prevlen+1; /* Seek the first byte of the previous entry. */ 416 } 417 418 /* Return a pointer to the first element of the listpack, or NULL if the 419 * listpack has no elements. */ 420 unsigned char *lpFirst(unsigned char *lp) { 421 lp += LP_HDR_SIZE; /* Skip the header. */ 422 if (lp[0] == LP_EOF) return NULL; 423 return lp; 424 } 425 426 /* Return a pointer to the last element of the listpack, or NULL if the 427 * listpack has no elements. */ 428 unsigned char *lpLast(unsigned char *lp) { 429 unsigned char *p = lp+lpGetTotalBytes(lp)-1; /* Seek EOF element. */ 430 return lpPrev(lp,p); /* Will return NULL if EOF is the only element. */ 431 } 432 433 /* Return the number of elements inside the listpack. This function attempts 434 * to use the cached value when within range, otherwise a full scan is 435 * needed. As a side effect of calling this function, the listpack header 436 * could be modified, because if the count is found to be already within 437 * the 'numele' header field range, the new value is set. */ 438 uint32_t lpLength(unsigned char *lp) { 439 uint32_t numele = lpGetNumElements(lp); 440 if (numele != LP_HDR_NUMELE_UNKNOWN) return numele; 441 442 /* Too many elements inside the listpack. We need to scan in order 443 * to get the total number. */ 444 uint32_t count = 0; 445 unsigned char *p = lpFirst(lp); 446 while(p) { 447 count++; 448 p = lpNext(lp,p); 449 } 450 451 /* If the count is again within range of the header numele field, 452 * set it. */ 453 if (count < LP_HDR_NUMELE_UNKNOWN) lpSetNumElements(lp,count); 454 return count; 455 } 456 457 /* Return the listpack element pointed by 'p'. 458 * 459 * The function changes behavior depending on the passed 'intbuf' value. 460 * Specifically, if 'intbuf' is NULL: 461 * 462 * If the element is internally encoded as an integer, the function returns 463 * NULL and populates the integer value by reference in 'count'. Otherwise if 464 * the element is encoded as a string a pointer to the string (pointing inside 465 * the listpack itself) is returned, and 'count' is set to the length of the 466 * string. 467 * 468 * If instead 'intbuf' points to a buffer passed by the caller, that must be 469 * at least LP_INTBUF_SIZE bytes, the function always returns the element as 470 * it was a string (returning the pointer to the string and setting the 471 * 'count' argument to the string length by reference). However if the element 472 * is encoded as an integer, the 'intbuf' buffer is used in order to store 473 * the string representation. 474 * 475 * The user should use one or the other form depending on what the value will 476 * be used for. If there is immediate usage for an integer value returned 477 * by the function, than to pass a buffer (and convert it back to a number) 478 * is of course useless. 479 * 480 * If the function is called against a badly encoded ziplist, so that there 481 * is no valid way to parse it, the function returns like if there was an 482 * integer encoded with value 12345678900000000 + <unrecognized byte>, this may 483 * be an hint to understand that something is wrong. To crash in this case is 484 * not sensible because of the different requirements of the application using 485 * this lib. 486 * 487 * Similarly, there is no error returned since the listpack normally can be 488 * assumed to be valid, so that would be a very high API cost. However a function 489 * in order to check the integrity of the listpack at load time is provided, 490 * check lpIsValid(). */ 491 unsigned char *lpGet(unsigned char *p, int64_t *count, unsigned char *intbuf) { 492 int64_t val; 493 uint64_t uval, negstart, negmax; 494 495 if (LP_ENCODING_IS_7BIT_UINT(p[0])) { 496 negstart = UINT64_MAX; /* 7 bit ints are always positive. */ 497 negmax = 0; 498 uval = p[0] & 0x7f; 499 } else if (LP_ENCODING_IS_6BIT_STR(p[0])) { 500 *count = LP_ENCODING_6BIT_STR_LEN(p); 501 return p+1; 502 } else if (LP_ENCODING_IS_13BIT_INT(p[0])) { 503 uval = ((p[0]&0x1f)<<8) | p[1]; 504 negstart = (uint64_t)1<<12; 505 negmax = 8191; 506 } else if (LP_ENCODING_IS_16BIT_INT(p[0])) { 507 uval = (uint64_t)p[1] | 508 (uint64_t)p[2]<<8; 509 negstart = (uint64_t)1<<15; 510 negmax = UINT16_MAX; 511 } else if (LP_ENCODING_IS_24BIT_INT(p[0])) { 512 uval = (uint64_t)p[1] | 513 (uint64_t)p[2]<<8 | 514 (uint64_t)p[3]<<16; 515 negstart = (uint64_t)1<<23; 516 negmax = UINT32_MAX>>8; 517 } else if (LP_ENCODING_IS_32BIT_INT(p[0])) { 518 uval = (uint64_t)p[1] | 519 (uint64_t)p[2]<<8 | 520 (uint64_t)p[3]<<16 | 521 (uint64_t)p[4]<<24; 522 negstart = (uint64_t)1<<31; 523 negmax = UINT32_MAX; 524 } else if (LP_ENCODING_IS_64BIT_INT(p[0])) { 525 uval = (uint64_t)p[1] | 526 (uint64_t)p[2]<<8 | 527 (uint64_t)p[3]<<16 | 528 (uint64_t)p[4]<<24 | 529 (uint64_t)p[5]<<32 | 530 (uint64_t)p[6]<<40 | 531 (uint64_t)p[7]<<48 | 532 (uint64_t)p[8]<<56; 533 negstart = (uint64_t)1<<63; 534 negmax = UINT64_MAX; 535 } else if (LP_ENCODING_IS_12BIT_STR(p[0])) { 536 *count = LP_ENCODING_12BIT_STR_LEN(p); 537 return p+2; 538 } else if (LP_ENCODING_IS_32BIT_STR(p[0])) { 539 *count = LP_ENCODING_32BIT_STR_LEN(p); 540 return p+5; 541 } else { 542 uval = 12345678900000000ULL + p[0]; 543 negstart = UINT64_MAX; 544 negmax = 0; 545 } 546 547 /* We reach this code path only for integer encodings. 548 * Convert the unsigned value to the signed one using two's complement 549 * rule. */ 550 if (uval >= negstart) { 551 /* This three steps conversion should avoid undefined behaviors 552 * in the unsigned -> signed conversion. */ 553 uval = negmax-uval; 554 val = uval; 555 val = -val-1; 556 } else { 557 val = uval; 558 } 559 560 /* Return the string representation of the integer or the value itself 561 * depending on intbuf being NULL or not. */ 562 if (intbuf) { 563 *count = snprintf((char*)intbuf,LP_INTBUF_SIZE,"%lld",(long long)val); 564 return intbuf; 565 } else { 566 *count = val; 567 return NULL; 568 } 569 } 570 571 /* Insert, delete or replace the specified element 'ele' of length 'len' at 572 * the specified position 'p', with 'p' being a listpack element pointer 573 * obtained with lpFirst(), lpLast(), lpIndex(), lpNext(), lpPrev() or 574 * lpSeek(). 575 * 576 * The element is inserted before, after, or replaces the element pointed 577 * by 'p' depending on the 'where' argument, that can be LP_BEFORE, LP_AFTER 578 * or LP_REPLACE. 579 * 580 * If 'ele' is set to NULL, the function removes the element pointed by 'p' 581 * instead of inserting one. 582 * 583 * Returns NULL on out of memory or when the listpack total length would exceed 584 * the max allowed size of 2^32-1, otherwise the new pointer to the listpack 585 * holding the new element is returned (and the old pointer passed is no longer 586 * considered valid) 587 * 588 * If 'newp' is not NULL, at the end of a successful call '*newp' will be set 589 * to the address of the element just added, so that it will be possible to 590 * continue an interation with lpNext() and lpPrev(). 591 * 592 * For deletion operations ('ele' set to NULL) 'newp' is set to the next 593 * element, on the right of the deleted one, or to NULL if the deleted element 594 * was the last one. */ 595 unsigned char *lpInsert(unsigned char *lp, unsigned char *ele, uint32_t size, unsigned char *p, int where, unsigned char **newp) { 596 unsigned char intenc[LP_MAX_INT_ENCODING_LEN]; 597 unsigned char backlen[LP_MAX_BACKLEN_SIZE]; 598 599 uint64_t enclen; /* The length of the encoded element. */ 600 601 /* An element pointer set to NULL means deletion, which is conceptually 602 * replacing the element with a zero-length element. So whatever we 603 * get passed as 'where', set it to LP_REPLACE. */ 604 if (ele == NULL) where = LP_REPLACE; 605 606 /* If we need to insert after the current element, we just jump to the 607 * next element (that could be the EOF one) and handle the case of 608 * inserting before. So the function will actually deal with just two 609 * cases: LP_BEFORE and LP_REPLACE. */ 610 if (where == LP_AFTER) { 611 p = lpSkip(p); 612 where = LP_BEFORE; 613 } 614 615 /* Store the offset of the element 'p', so that we can obtain its 616 * address again after a reallocation. */ 617 unsigned long poff = p-lp; 618 619 /* Calling lpEncodeGetType() results into the encoded version of the 620 * element to be stored into 'intenc' in case it is representable as 621 * an integer: in that case, the function returns LP_ENCODING_INT. 622 * Otherwise if LP_ENCODING_STR is returned, we'll have to call 623 * lpEncodeString() to actually write the encoded string on place later. 624 * 625 * Whatever the returned encoding is, 'enclen' is populated with the 626 * length of the encoded element. */ 627 int enctype; 628 if (ele) { 629 enctype = lpEncodeGetType(ele,size,intenc,&enclen); 630 } else { 631 enctype = -1; 632 enclen = 0; 633 } 634 635 /* We need to also encode the backward-parsable length of the element 636 * and append it to the end: this allows to traverse the listpack from 637 * the end to the start. */ 638 unsigned long backlen_size = ele ? lpEncodeBacklen(backlen,enclen) : 0; 639 uint64_t old_listpack_bytes = lpGetTotalBytes(lp); 640 uint32_t replaced_len = 0; 641 if (where == LP_REPLACE) { 642 replaced_len = lpCurrentEncodedSize(p); 643 replaced_len += lpEncodeBacklen(NULL,replaced_len); 644 } 645 646 uint64_t new_listpack_bytes = old_listpack_bytes + enclen + backlen_size 647 - replaced_len; 648 if (new_listpack_bytes > UINT32_MAX) return NULL; 649 650 /* We now need to reallocate in order to make space or shrink the 651 * allocation (in case 'when' value is LP_REPLACE and the new element is 652 * smaller). However we do that before memmoving the memory to 653 * make room for the new element if the final allocation will get 654 * larger, or we do it after if the final allocation will get smaller. */ 655 656 unsigned char *dst = lp + poff; /* May be updated after reallocation. */ 657 658 /* Realloc before: we need more room. */ 659 if (new_listpack_bytes > old_listpack_bytes) { 660 if ((lp = lp_realloc(lp,new_listpack_bytes)) == NULL) return NULL; 661 dst = lp + poff; 662 } 663 664 /* Setup the listpack relocating the elements to make the exact room 665 * we need to store the new one. */ 666 if (where == LP_BEFORE) { 667 memmove(dst+enclen+backlen_size,dst,old_listpack_bytes-poff); 668 } else { /* LP_REPLACE. */ 669 long lendiff = (enclen+backlen_size)-replaced_len; 670 memmove(dst+replaced_len+lendiff, 671 dst+replaced_len, 672 old_listpack_bytes-poff-replaced_len); 673 } 674 675 /* Realloc after: we need to free space. */ 676 if (new_listpack_bytes < old_listpack_bytes) { 677 if ((lp = lp_realloc(lp,new_listpack_bytes)) == NULL) return NULL; 678 dst = lp + poff; 679 } 680 681 /* Store the entry. */ 682 if (newp) { 683 *newp = dst; 684 /* In case of deletion, set 'newp' to NULL if the next element is 685 * the EOF element. */ 686 if (!ele && dst[0] == LP_EOF) *newp = NULL; 687 } 688 if (ele) { 689 if (enctype == LP_ENCODING_INT) { 690 memcpy(dst,intenc,enclen); 691 } else { 692 lpEncodeString(dst,ele,size); 693 } 694 dst += enclen; 695 memcpy(dst,backlen,backlen_size); 696 dst += backlen_size; 697 } 698 699 /* Update header. */ 700 if (where != LP_REPLACE || ele == NULL) { 701 uint32_t num_elements = lpGetNumElements(lp); 702 if (num_elements != LP_HDR_NUMELE_UNKNOWN) { 703 if (ele) 704 lpSetNumElements(lp,num_elements+1); 705 else 706 lpSetNumElements(lp,num_elements-1); 707 } 708 } 709 lpSetTotalBytes(lp,new_listpack_bytes); 710 711 #if 0 712 /* This code path is normally disabled: what it does is to force listpack 713 * to return *always* a new pointer after performing some modification to 714 * the listpack, even if the previous allocation was enough. This is useful 715 * in order to spot bugs in code using listpacks: by doing so we can find 716 * if the caller forgets to set the new pointer where the listpack reference 717 * is stored, after an update. */ 718 unsigned char *oldlp = lp; 719 lp = lp_malloc(new_listpack_bytes); 720 memcpy(lp,oldlp,new_listpack_bytes); 721 if (newp) { 722 unsigned long offset = (*newp)-oldlp; 723 *newp = lp + offset; 724 } 725 /* Make sure the old allocation contains garbage. */ 726 memset(oldlp,'A',new_listpack_bytes); 727 lp_free(oldlp); 728 #endif 729 730 return lp; 731 } 732 733 /* Append the specified element 'ele' of length 'len' at the end of the 734 * listpack. It is implemented in terms of lpInsert(), so the return value is 735 * the same as lpInsert(). */ 736 unsigned char *lpAppend(unsigned char *lp, unsigned char *ele, uint32_t size) { 737 uint64_t listpack_bytes = lpGetTotalBytes(lp); 738 unsigned char *eofptr = lp + listpack_bytes - 1; 739 return lpInsert(lp,ele,size,eofptr,LP_BEFORE,NULL); 740 } 741 742 /* Remove the element pointed by 'p', and return the resulting listpack. 743 * If 'newp' is not NULL, the next element pointer (to the right of the 744 * deleted one) is returned by reference. If the deleted element was the 745 * last one, '*newp' is set to NULL. */ 746 unsigned char *lpDelete(unsigned char *lp, unsigned char *p, unsigned char **newp) { 747 return lpInsert(lp,NULL,0,p,LP_REPLACE,newp); 748 } 749 750 /* Return the total number of bytes the listpack is composed of. */ 751 uint32_t lpBytes(unsigned char *lp) { 752 return lpGetTotalBytes(lp); 753 } 754 755 /* Seek the specified element and returns the pointer to the seeked element. 756 * Positive indexes specify the zero-based element to seek from the head to 757 * the tail, negative indexes specify elements starting from the tail, where 758 * -1 means the last element, -2 the penultimate and so forth. If the index 759 * is out of range, NULL is returned. */ 760 unsigned char *lpSeek(unsigned char *lp, long index) { 761 int forward = 1; /* Seek forward by default. */ 762 763 /* We want to seek from left to right or the other way around 764 * depending on the listpack length and the element position. 765 * However if the listpack length cannot be obtained in constant time, 766 * we always seek from left to right. */ 767 uint32_t numele = lpGetNumElements(lp); 768 if (numele != LP_HDR_NUMELE_UNKNOWN) { 769 if (index < 0) index = (long)numele+index; 770 if (index < 0) return NULL; /* Index still < 0 means out of range. */ 771 if (index >= numele) return NULL; /* Out of range the other side. */ 772 /* We want to scan right-to-left if the element we are looking for 773 * is past the half of the listpack. */ 774 if (index > numele/2) { 775 forward = 0; 776 /* Left to right scanning always expects a negative index. Convert 777 * our index to negative form. */ 778 index -= numele; 779 } 780 } else { 781 /* If the listpack length is unspecified, for negative indexes we 782 * want to always scan left-to-right. */ 783 if (index < 0) forward = 0; 784 } 785 786 /* Forward and backward scanning is trivially based on lpNext()/lpPrev(). */ 787 if (forward) { 788 unsigned char *ele = lpFirst(lp); 789 while (index > 0 && ele) { 790 ele = lpNext(lp,ele); 791 index--; 792 } 793 return ele; 794 } else { 795 unsigned char *ele = lpLast(lp); 796 while (index < -1 && ele) { 797 ele = lpPrev(lp,ele); 798 index++; 799 } 800 return ele; 801 } 802 } 803 804