1 /* String -> String Map data structure optimized for size. 2 * This file implements a data structure mapping strings to other strings 3 * implementing an O(n) lookup data structure designed to be very memory 4 * efficient. 5 * 6 * The Redis Hash type uses this data structure for hashes composed of a small 7 * number of elements, to switch to a hash table once a given number of 8 * elements is reached. 9 * 10 * Given that many times Redis Hashes are used to represent objects composed 11 * of few fields, this is a very big win in terms of used memory. 12 * 13 * -------------------------------------------------------------------------- 14 * 15 * Copyright (c) 2009-2010, Salvatore Sanfilippo <antirez at gmail dot com> 16 * All rights reserved. 17 * 18 * Redistribution and use in source and binary forms, with or without 19 * modification, are permitted provided that the following conditions are met: 20 * 21 * * Redistributions of source code must retain the above copyright notice, 22 * this list of conditions and the following disclaimer. 23 * * Redistributions in binary form must reproduce the above copyright 24 * notice, this list of conditions and the following disclaimer in the 25 * documentation and/or other materials provided with the distribution. 26 * * Neither the name of Redis nor the names of its contributors may be used 27 * to endorse or promote products derived from this software without 28 * specific prior written permission. 29 * 30 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 31 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 32 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 33 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 34 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 35 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 36 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 37 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 38 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 39 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 40 * POSSIBILITY OF SUCH DAMAGE. 41 */ 42 43 /* Memory layout of a zipmap, for the map "foo" => "bar", "hello" => "world": 44 * 45 * <zmlen><len>"foo"<len><free>"bar"<len>"hello"<len><free>"world" 46 * 47 * <zmlen> is 1 byte length that holds the current size of the zipmap. 48 * When the zipmap length is greater than or equal to 254, this value 49 * is not used and the zipmap needs to be traversed to find out the length. 50 * 51 * <len> is the length of the following string (key or value). 52 * <len> lengths are encoded in a single value or in a 5 bytes value. 53 * If the first byte value (as an unsigned 8 bit value) is between 0 and 54 * 253, it's a single-byte length. If it is 254 then a four bytes unsigned 55 * integer follows (in the host byte ordering). A value of 255 is used to 56 * signal the end of the hash. 57 * 58 * <free> is the number of free unused bytes after the string, resulting 59 * from modification of values associated to a key. For instance if "foo" 60 * is set to "bar", and later "foo" will be set to "hi", it will have a 61 * free byte to use if the value will enlarge again later, or even in 62 * order to add a key/value pair if it fits. 63 * 64 * <free> is always an unsigned 8 bit number, because if after an 65 * update operation there are more than a few free bytes, the zipmap will be 66 * reallocated to make sure it is as small as possible. 67 * 68 * The most compact representation of the above two elements hash is actually: 69 * 70 * "\x02\x03foo\x03\x00bar\x05hello\x05\x00world\xff" 71 * 72 * Note that because keys and values are prefixed length "objects", 73 * the lookup will take O(N) where N is the number of elements 74 * in the zipmap and *not* the number of bytes needed to represent the zipmap. 75 * This lowers the constant times considerably. 76 */ 77 78 #include <stdio.h> 79 #include <string.h> 80 #include "zmalloc.h" 81 #include "endianconv.h" 82 83 #define ZIPMAP_BIGLEN 254 84 #define ZIPMAP_END 255 85 86 /* The following defines the max value for the <free> field described in the 87 * comments above, that is, the max number of trailing bytes in a value. */ 88 #define ZIPMAP_VALUE_MAX_FREE 4 89 90 /* The following macro returns the number of bytes needed to encode the length 91 * for the integer value _l, that is, 1 byte for lengths < ZIPMAP_BIGLEN and 92 * 5 bytes for all the other lengths. */ 93 #define ZIPMAP_LEN_BYTES(_l) (((_l) < ZIPMAP_BIGLEN) ? 1 : sizeof(unsigned int)+1) 94 95 /* Create a new empty zipmap. */ 96 unsigned char *zipmapNew(void) { 97 unsigned char *zm = zmalloc(2); 98 99 zm[0] = 0; /* Length */ 100 zm[1] = ZIPMAP_END; 101 return zm; 102 } 103 104 /* Decode the encoded length pointed by 'p' */ 105 static unsigned int zipmapDecodeLength(unsigned char *p) { 106 unsigned int len = *p; 107 108 if (len < ZIPMAP_BIGLEN) return len; 109 memcpy(&len,p+1,sizeof(unsigned int)); 110 memrev32ifbe(&len); 111 return len; 112 } 113 114 /* Encode the length 'l' writing it in 'p'. If p is NULL it just returns 115 * the amount of bytes required to encode such a length. */ 116 static unsigned int zipmapEncodeLength(unsigned char *p, unsigned int len) { 117 if (p == NULL) { 118 return ZIPMAP_LEN_BYTES(len); 119 } else { 120 if (len < ZIPMAP_BIGLEN) { 121 p[0] = len; 122 return 1; 123 } else { 124 p[0] = ZIPMAP_BIGLEN; 125 memcpy(p+1,&len,sizeof(len)); 126 memrev32ifbe(p+1); 127 return 1+sizeof(len); 128 } 129 } 130 } 131 132 /* Search for a matching key, returning a pointer to the entry inside the 133 * zipmap. Returns NULL if the key is not found. 134 * 135 * If NULL is returned, and totlen is not NULL, it is set to the entire 136 * size of the zimap, so that the calling function will be able to 137 * reallocate the original zipmap to make room for more entries. */ 138 static unsigned char *zipmapLookupRaw(unsigned char *zm, unsigned char *key, unsigned int klen, unsigned int *totlen) { 139 unsigned char *p = zm+1, *k = NULL; 140 unsigned int l,llen; 141 142 while(*p != ZIPMAP_END) { 143 unsigned char free; 144 145 /* Match or skip the key */ 146 l = zipmapDecodeLength(p); 147 llen = zipmapEncodeLength(NULL,l); 148 if (key != NULL && k == NULL && l == klen && !memcmp(p+llen,key,l)) { 149 /* Only return when the user doesn't care 150 * for the total length of the zipmap. */ 151 if (totlen != NULL) { 152 k = p; 153 } else { 154 return p; 155 } 156 } 157 p += llen+l; 158 /* Skip the value as well */ 159 l = zipmapDecodeLength(p); 160 p += zipmapEncodeLength(NULL,l); 161 free = p[0]; 162 p += l+1+free; /* +1 to skip the free byte */ 163 } 164 if (totlen != NULL) *totlen = (unsigned int)(p-zm)+1; 165 return k; 166 } 167 168 static unsigned long zipmapRequiredLength(unsigned int klen, unsigned int vlen) { 169 unsigned int l; 170 171 l = klen+vlen+3; 172 if (klen >= ZIPMAP_BIGLEN) l += 4; 173 if (vlen >= ZIPMAP_BIGLEN) l += 4; 174 return l; 175 } 176 177 /* Return the total amount used by a key (encoded length + payload) */ 178 static unsigned int zipmapRawKeyLength(unsigned char *p) { 179 unsigned int l = zipmapDecodeLength(p); 180 return zipmapEncodeLength(NULL,l) + l; 181 } 182 183 /* Return the total amount used by a value 184 * (encoded length + single byte free count + payload) */ 185 static unsigned int zipmapRawValueLength(unsigned char *p) { 186 unsigned int l = zipmapDecodeLength(p); 187 unsigned int used; 188 189 used = zipmapEncodeLength(NULL,l); 190 used += p[used] + 1 + l; 191 return used; 192 } 193 194 /* If 'p' points to a key, this function returns the total amount of 195 * bytes used to store this entry (entry = key + associated value + trailing 196 * free space if any). */ 197 static unsigned int zipmapRawEntryLength(unsigned char *p) { 198 unsigned int l = zipmapRawKeyLength(p); 199 return l + zipmapRawValueLength(p+l); 200 } 201 202 static inline unsigned char *zipmapResize(unsigned char *zm, unsigned int len) { 203 zm = zrealloc(zm, len); 204 zm[len-1] = ZIPMAP_END; 205 return zm; 206 } 207 208 /* Set key to value, creating the key if it does not already exist. 209 * If 'update' is not NULL, *update is set to 1 if the key was 210 * already preset, otherwise to 0. */ 211 unsigned char *zipmapSet(unsigned char *zm, unsigned char *key, unsigned int klen, unsigned char *val, unsigned int vlen, int *update) { 212 unsigned int zmlen, offset; 213 unsigned int freelen, reqlen = zipmapRequiredLength(klen,vlen); 214 unsigned int empty, vempty; 215 unsigned char *p; 216 217 freelen = reqlen; 218 if (update) *update = 0; 219 p = zipmapLookupRaw(zm,key,klen,&zmlen); 220 if (p == NULL) { 221 /* Key not found: enlarge */ 222 zm = zipmapResize(zm, zmlen+reqlen); 223 p = zm+zmlen-1; 224 zmlen = zmlen+reqlen; 225 226 /* Increase zipmap length (this is an insert) */ 227 if (zm[0] < ZIPMAP_BIGLEN) zm[0]++; 228 } else { 229 /* Key found. Is there enough space for the new value? */ 230 /* Compute the total length: */ 231 if (update) *update = 1; 232 freelen = zipmapRawEntryLength(p); 233 if (freelen < reqlen) { 234 /* Store the offset of this key within the current zipmap, so 235 * it can be resized. Then, move the tail backwards so this 236 * pair fits at the current position. */ 237 offset = p-zm; 238 zm = zipmapResize(zm, zmlen-freelen+reqlen); 239 p = zm+offset; 240 241 /* The +1 in the number of bytes to be moved is caused by the 242 * end-of-zipmap byte. Note: the *original* zmlen is used. */ 243 memmove(p+reqlen, p+freelen, zmlen-(offset+freelen+1)); 244 zmlen = zmlen-freelen+reqlen; 245 freelen = reqlen; 246 } 247 } 248 249 /* We now have a suitable block where the key/value entry can 250 * be written. If there is too much free space, move the tail 251 * of the zipmap a few bytes to the front and shrink the zipmap, 252 * as we want zipmaps to be very space efficient. */ 253 empty = freelen-reqlen; 254 if (empty >= ZIPMAP_VALUE_MAX_FREE) { 255 /* First, move the tail <empty> bytes to the front, then resize 256 * the zipmap to be <empty> bytes smaller. */ 257 offset = p-zm; 258 memmove(p+reqlen, p+freelen, zmlen-(offset+freelen+1)); 259 zmlen -= empty; 260 zm = zipmapResize(zm, zmlen); 261 p = zm+offset; 262 vempty = 0; 263 } else { 264 vempty = empty; 265 } 266 267 /* Just write the key + value and we are done. */ 268 /* Key: */ 269 p += zipmapEncodeLength(p,klen); 270 memcpy(p,key,klen); 271 p += klen; 272 /* Value: */ 273 p += zipmapEncodeLength(p,vlen); 274 *p++ = vempty; 275 memcpy(p,val,vlen); 276 return zm; 277 } 278 279 /* Remove the specified key. If 'deleted' is not NULL the pointed integer is 280 * set to 0 if the key was not found, to 1 if it was found and deleted. */ 281 unsigned char *zipmapDel(unsigned char *zm, unsigned char *key, unsigned int klen, int *deleted) { 282 unsigned int zmlen, freelen; 283 unsigned char *p = zipmapLookupRaw(zm,key,klen,&zmlen); 284 if (p) { 285 freelen = zipmapRawEntryLength(p); 286 memmove(p, p+freelen, zmlen-((p-zm)+freelen+1)); 287 zm = zipmapResize(zm, zmlen-freelen); 288 289 /* Decrease zipmap length */ 290 if (zm[0] < ZIPMAP_BIGLEN) zm[0]--; 291 292 if (deleted) *deleted = 1; 293 } else { 294 if (deleted) *deleted = 0; 295 } 296 return zm; 297 } 298 299 /* Call before iterating through elements via zipmapNext() */ 300 unsigned char *zipmapRewind(unsigned char *zm) { 301 return zm+1; 302 } 303 304 /* This function is used to iterate through all the zipmap elements. 305 * In the first call the first argument is the pointer to the zipmap + 1. 306 * In the next calls what zipmapNext returns is used as first argument. 307 * Example: 308 * 309 * unsigned char *i = zipmapRewind(my_zipmap); 310 * while((i = zipmapNext(i,&key,&klen,&value,&vlen)) != NULL) { 311 * printf("%d bytes key at $p\n", klen, key); 312 * printf("%d bytes value at $p\n", vlen, value); 313 * } 314 */ 315 unsigned char *zipmapNext(unsigned char *zm, unsigned char **key, unsigned int *klen, unsigned char **value, unsigned int *vlen) { 316 if (zm[0] == ZIPMAP_END) return NULL; 317 if (key) { 318 *key = zm; 319 *klen = zipmapDecodeLength(zm); 320 *key += ZIPMAP_LEN_BYTES(*klen); 321 } 322 zm += zipmapRawKeyLength(zm); 323 if (value) { 324 *value = zm+1; 325 *vlen = zipmapDecodeLength(zm); 326 *value += ZIPMAP_LEN_BYTES(*vlen); 327 } 328 zm += zipmapRawValueLength(zm); 329 return zm; 330 } 331 332 /* Search a key and retrieve the pointer and len of the associated value. 333 * If the key is found the function returns 1, otherwise 0. */ 334 int zipmapGet(unsigned char *zm, unsigned char *key, unsigned int klen, unsigned char **value, unsigned int *vlen) { 335 unsigned char *p; 336 337 if ((p = zipmapLookupRaw(zm,key,klen,NULL)) == NULL) return 0; 338 p += zipmapRawKeyLength(p); 339 *vlen = zipmapDecodeLength(p); 340 *value = p + ZIPMAP_LEN_BYTES(*vlen) + 1; 341 return 1; 342 } 343 344 /* Return 1 if the key exists, otherwise 0 is returned. */ 345 int zipmapExists(unsigned char *zm, unsigned char *key, unsigned int klen) { 346 return zipmapLookupRaw(zm,key,klen,NULL) != NULL; 347 } 348 349 /* Return the number of entries inside a zipmap */ 350 unsigned int zipmapLen(unsigned char *zm) { 351 unsigned int len = 0; 352 if (zm[0] < ZIPMAP_BIGLEN) { 353 len = zm[0]; 354 } else { 355 unsigned char *p = zipmapRewind(zm); 356 while((p = zipmapNext(p,NULL,NULL,NULL,NULL)) != NULL) len++; 357 358 /* Re-store length if small enough */ 359 if (len < ZIPMAP_BIGLEN) zm[0] = len; 360 } 361 return len; 362 } 363 364 /* Return the raw size in bytes of a zipmap, so that we can serialize 365 * the zipmap on disk (or everywhere is needed) just writing the returned 366 * amount of bytes of the C array starting at the zipmap pointer. */ 367 size_t zipmapBlobLen(unsigned char *zm) { 368 unsigned int totlen; 369 zipmapLookupRaw(zm,NULL,0,&totlen); 370 return totlen; 371 } 372 373 #ifdef REDIS_TEST 374 static void zipmapRepr(unsigned char *p) { 375 unsigned int l; 376 377 printf("{status %u}",*p++); 378 while(1) { 379 if (p[0] == ZIPMAP_END) { 380 printf("{end}"); 381 break; 382 } else { 383 unsigned char e; 384 385 l = zipmapDecodeLength(p); 386 printf("{key %u}",l); 387 p += zipmapEncodeLength(NULL,l); 388 if (l != 0 && fwrite(p,l,1,stdout) == 0) perror("fwrite"); 389 p += l; 390 391 l = zipmapDecodeLength(p); 392 printf("{value %u}",l); 393 p += zipmapEncodeLength(NULL,l); 394 e = *p++; 395 if (l != 0 && fwrite(p,l,1,stdout) == 0) perror("fwrite"); 396 p += l+e; 397 if (e) { 398 printf("["); 399 while(e--) printf("."); 400 printf("]"); 401 } 402 } 403 } 404 printf("\n"); 405 } 406 407 #define UNUSED(x) (void)(x) 408 int zipmapTest(int argc, char *argv[]) { 409 unsigned char *zm; 410 411 UNUSED(argc); 412 UNUSED(argv); 413 414 zm = zipmapNew(); 415 416 zm = zipmapSet(zm,(unsigned char*) "name",4, (unsigned char*) "foo",3,NULL); 417 zm = zipmapSet(zm,(unsigned char*) "surname",7, (unsigned char*) "foo",3,NULL); 418 zm = zipmapSet(zm,(unsigned char*) "age",3, (unsigned char*) "foo",3,NULL); 419 zipmapRepr(zm); 420 421 zm = zipmapSet(zm,(unsigned char*) "hello",5, (unsigned char*) "world!",6,NULL); 422 zm = zipmapSet(zm,(unsigned char*) "foo",3, (unsigned char*) "bar",3,NULL); 423 zm = zipmapSet(zm,(unsigned char*) "foo",3, (unsigned char*) "!",1,NULL); 424 zipmapRepr(zm); 425 zm = zipmapSet(zm,(unsigned char*) "foo",3, (unsigned char*) "12345",5,NULL); 426 zipmapRepr(zm); 427 zm = zipmapSet(zm,(unsigned char*) "new",3, (unsigned char*) "xx",2,NULL); 428 zm = zipmapSet(zm,(unsigned char*) "noval",5, (unsigned char*) "",0,NULL); 429 zipmapRepr(zm); 430 zm = zipmapDel(zm,(unsigned char*) "new",3,NULL); 431 zipmapRepr(zm); 432 433 printf("\nLook up large key:\n"); 434 { 435 unsigned char buf[512]; 436 unsigned char *value; 437 unsigned int vlen, i; 438 for (i = 0; i < 512; i++) buf[i] = 'a'; 439 440 zm = zipmapSet(zm,buf,512,(unsigned char*) "long",4,NULL); 441 if (zipmapGet(zm,buf,512,&value,&vlen)) { 442 printf(" <long key> is associated to the %d bytes value: %.*s\n", 443 vlen, vlen, value); 444 } 445 } 446 447 printf("\nPerform a direct lookup:\n"); 448 { 449 unsigned char *value; 450 unsigned int vlen; 451 452 if (zipmapGet(zm,(unsigned char*) "foo",3,&value,&vlen)) { 453 printf(" foo is associated to the %d bytes value: %.*s\n", 454 vlen, vlen, value); 455 } 456 } 457 printf("\nIterate through elements:\n"); 458 { 459 unsigned char *i = zipmapRewind(zm); 460 unsigned char *key, *value; 461 unsigned int klen, vlen; 462 463 while((i = zipmapNext(i,&key,&klen,&value,&vlen)) != NULL) { 464 printf(" %d:%.*s => %d:%.*s\n", klen, klen, key, vlen, vlen, value); 465 } 466 } 467 return 0; 468 } 469 #endif 470